diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index 4761a2111f..d8ef6357cd 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -3308,8 +3308,8 @@ - - + + diff --git a/doc/ChangeLog b/doc/ChangeLog index fcdf87d053..d26715f699 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,4 +1,75 @@ =============================================================== +Tag name: ctsm5.1.dev169 +Originator(s): samrabin (Sam Rabin, UCAR/TSS, samrabin@ucar.edu) +Date: Thu 22 Feb 2024 09:42:57 AM MST +One-line Summary: Merge b4b-dev + +Purpose and description of changes +---------------------------------- + +Brings in 3 PRs from b4b-dev to master: +- Do not crash "make all" even if pylint isn't clean (ESCOMP/CTSM#2353; Sam Rabin) +- Resolve pylint issues (ESCOMP/CTSM#2354; Sam Rabin) +- Move FSURDATMODIFYCTSM test to Derecho (ESCOMP/CTSM#2364; Sam Rabin) + +Significant changes to scientifically-supported configurations +-------------------------------------------------------------- + +Does this tag change answers significantly for any of the following physics configurations? +(Details of any changes will be given in the "Answer changes" section below.) + +[ ] clm5_1 + +[ ] clm5_0 + +[ ] ctsm5_0-nwp + +[ ] clm4_5 + + +Bugs fixed +---------- + +CTSM issues fixed: +- Fixes ESCOMP/CTSM#2255: make lint is not clean in ctsm5.1.dev152 +- Fixes ESCOMP/CTSM#2316: "make all" doesn't run black if lint fails +- FIXES ESCOMP/CTSM#2362: FSURDATMODIFYCTSM test should be moved to Derecho or Izumi + + +Notes of particular relevance for developers: +--------------------------------------------- + +Changes to tests or testing: +- FSURDATMODIFYCTSM test changed from derecho_intel (didn't work in debug mode) to derecho_gnu. I.e., from + FSURDATMODIFYCTSM_D_Mmpi-serial_Ld1.5x5_amazon.I2000Clm50SpRs.derecho_intel + to + FSURDATMODIFYCTSM_D_Mmpi-serial_Ld1.5x5_amazon.I2000Clm50SpRs.derecho_gnu + + +Testing summary: +---------------- + + [PASS means all tests PASS; OK means tests PASS other than expected fails.] + + regular tests (aux_clm: https://github.com/ESCOMP/CTSM/wiki/System-Testing-Guide#pre-merge-system-testing): + + derecho ----- OK + izumi ------- OK + + any other testing (give details below): + - "make all" in python/ is clean. + + +Other details +------------- + +Pull Requests that document the changes (include PR ids): +- ESCOMP/CTSM#2353: Do not crash "make all" even if pylint isn't clean (https://github.com/ESCOMP/CTSM/pull/2353) +- ESCOMP/CTSM#2354: Resolve pylint issues (https://github.com/ESCOMP/CTSM/pull/2354) +- ESCOMP/CTSM#2364: Move FSURDATMODIFYCTSM test to Derecho (https://github.com/ESCOMP/CTSM/pull/2364) + +=============================================================== +=============================================================== Tag name: ctsm5.1.dev168 Originator(s): slevis (Samuel Levis,UCAR/TSS,303-665-1310) Date: Fri 16 Feb 2024 01:27:41 PM MST diff --git a/doc/ChangeSum b/doc/ChangeSum index 56a460ea85..18ae34626f 100644 --- a/doc/ChangeSum +++ b/doc/ChangeSum @@ -1,5 +1,6 @@ Tag Who Date Summary ============================================================================================================================ + ctsm5.1.dev169 samrabin 02/22/2024 Merge b4b-dev ctsm5.1.dev168 slevis 02/16/2024 Remove a source of negative snocan in CanopyFluxesMod ctsm5.1.dev167 samrabin 02/08/2024 Delete _FillValue and history from parameter files ctsm5.1.dev166 multiple 01/24/2024 BFB merge tag diff --git a/python/Makefile b/python/Makefile index 271e977046..b43e1c5e53 100644 --- a/python/Makefile +++ b/python/Makefile @@ -19,7 +19,7 @@ ifneq ($(verbose), not-set) endif PYLINT=pylint -PYLINT_ARGS=-j 4 --rcfile=ctsm/.pylintrc +PYLINT_ARGS=-j 4 --rcfile=ctsm/.pylintrc --fail-under=0 PYLINT_SRC = \ ctsm # NOTE: These don't pass pylint checking and should be added when we put into effort to get them to pass @@ -27,7 +27,7 @@ PYLINT_SRC = \ # ../cime_config/buildlib \ # ../cime_config/buildnml -all: test lint black +all: test black lint @echo @echo @echo "Successfully ran all standard tests" diff --git a/python/ctsm/.pylintrc b/python/ctsm/.pylintrc index 2087913e8a..ceff04c7d8 100644 --- a/python/ctsm/.pylintrc +++ b/python/ctsm/.pylintrc @@ -436,7 +436,10 @@ good-names=i, _, # --- default list is above here, our own list is below here --- # Allow logger as a global name in each module, because this seems to follow general recommended convention: - logger + logger, +# Allow these names, which are commonly used in matplotlib instructions + ax, + im # Include a hint for the correct naming format with invalid-name. include-naming-hint=no diff --git a/python/ctsm/crop_calendars/check_constant_vars.py b/python/ctsm/crop_calendars/check_constant_vars.py new file mode 100644 index 0000000000..aa25a412fe --- /dev/null +++ b/python/ctsm/crop_calendars/check_constant_vars.py @@ -0,0 +1,385 @@ +""" +For variables that should stay constant, make sure they are +""" + +import numpy as np +from ctsm.crop_calendars.cropcal_module import import_rx_dates + + +def check_one_constant_var_setup(this_ds, case, var): + """ + Various setup steps for check_one_constant_var() + """ + if "gs" in this_ds[var].dims: + time_coord = "gs" + elif "time" in this_ds[var].dims: + time_coord = "time" + else: + raise RuntimeError(f"Which of these is the time coordinate? {this_ds[var].dims}") + i_time_coord = this_ds[var].dims.index(time_coord) + + this_da = this_ds[var] + ra_sp = np.moveaxis(this_da.copy().values, i_time_coord, 0) + incl_patches = [] + bad_patches = np.array([]) + str_list = [] + + # Read prescription file, if needed + rx_ds = None + if isinstance(case, dict): + if var == "GDDHARV" and "rx_gdds_file" in case: + rx_ds = import_rx_dates( + "gdd", case["rx_gdds_file"], this_ds, set_neg1_to_nan=False + ).squeeze() + + return time_coord, this_da, ra_sp, incl_patches, str_list, rx_ds, bad_patches + + +def loop_through_bad_patches( + verbose, + emojus, + var, + everything_ok, + str_list, + rx_ds, + time_1, + t1_yr, + t1_vals, + timestep, + t_yr, + t_vals, + bad_patches_this_time, + found_in_rx, + vary_patches, + vary_lons, + vary_lats, + vary_crops, + vary_crops_int, + any_bad, +): + """ + Loop through and check any patches that were "bad" according to check_constant_vars(). + + This is pretty inefficient, but it works. + """ + patch = None # In case bad_patches_this_time is empty + for i, patch in enumerate(bad_patches_this_time): + this_patch = vary_patches[i] + this_lon = vary_lons[i] + this_lat = vary_lats[i] + this_crop = vary_crops[i] + this_crop_int = vary_crops_int[i] + + # If prescribed input had missing value (-1), it's fine for it to vary. + if rx_ds: + rx_var = f"gs1_{this_crop_int}" + if this_lon in rx_ds.lon.values and this_lat in rx_ds.lat.values: + rx_vals = rx_ds[rx_var].sel(lon=this_lon, lat=this_lat).values + n_unique = len(np.unique(rx_vals)) + if n_unique == 1: + found_in_rx[i] = True + if rx_vals == -1: + continue + elif n_unique > 1: + raise RuntimeError( + f"How does lon {this_lon} lat {this_lat} {this_crop} have " + + f"time-varying {var}?" + ) + else: + raise RuntimeError(f"lon {this_lon} lat {this_lat} {this_crop} not in rx dataset?") + + # Print info (or save to print later) + any_bad = True + if verbose: + this_str = ( + f" Patch {this_patch} (lon {this_lon} lat {this_lat}) " + + f"{this_crop} ({this_crop_int})" + ) + if rx_ds and not found_in_rx[i]: + this_str = this_str.replace("(lon", "* (lon") + if not np.isnan(t1_vals[patch]): + t1_val_print = int(t1_vals[patch]) + else: + t1_val_print = "NaN" + if not np.isnan(t_vals[patch]): + t_val_print = int(t_vals[patch]) + else: + t_val_print = "NaN" + if var == "SDATES": + str_list.append( + f"{this_str}: Sowing {t1_yr} jday {t1_val_print}, {t_yr} " + + f"jday {t_val_print}" + ) + else: + str_list.append( + f"{this_str}: {t1_yr} {var} {t1_val_print}, {t_yr} {var} " + f"{t_val_print}" + ) + else: + if everything_ok: + print(f"{emojus} CLM output {var} unexpectedly vary over time:") + everything_ok = False + print(f"{var} timestep {timestep} does not match timestep {time_1}") + break + return any_bad, patch + + +def ensure_all_patches_checked(this_ds, this_da, ra_sp, incl_patches): + """ + In check_one_constant_var(), make sure every patch was checked once (or is all-NaN except + possibly final season) + """ + incl_patches = np.sort(incl_patches) + if not np.array_equal(incl_patches, np.unique(incl_patches)): + raise RuntimeError("Patch(es) checked more than once!") + incl_patches = list(incl_patches) + incl_patches += list( + np.where( + np.all( + np.isnan( + ra_sp[ + :-1, + ] + ), + axis=0, + ) + )[0] + ) + incl_patches = np.sort(incl_patches) + if not np.array_equal(incl_patches, np.unique(incl_patches)): + raise RuntimeError("Patch(es) checked but also all-NaN??") + if not np.array_equal(incl_patches, np.arange(this_ds.dims["patch"])): + for patch in np.arange(this_ds.dims["patch"]): + if patch not in incl_patches: + raise RuntimeError( + f"Not all patches checked! E.g., {patch}: {this_da.isel(patch=patch).values}" + ) + + +def check_one_constant_var_loop_through_timesteps( + this_ds, + ignore_nan, + verbose, + emojus, + var, + everything_ok, + time_coord, + this_da, + str_list, + rx_ds, + time_1, + these_patches, + t1_yr, + t1_vals, + any_bad, + any_bad_before_checking_rx, + bad_patches, +): + """ + In check_one_constant_var(), loop through timesteps + """ + found_in_rx = None + for timestep in np.arange(time_1 + 1, this_ds.dims[time_coord]): + t_yr = this_ds[time_coord].values[timestep] + t_vals = np.squeeze(this_da.isel({time_coord: timestep, "patch": these_patches}).values) + ok_p = t1_vals == t_vals + + # If allowed, ignore where either t or t1 is NaN. Should only be used for runs where + # land use varies over time. + if ignore_nan: + ok_p = np.squeeze(np.bitwise_or(ok_p, np.isnan(t1_vals + t_vals))) + + if not np.all(ok_p): + any_bad_before_checking_rx = True + bad_patches_this_time = list(np.where(np.bitwise_not(ok_p))[0]) + bad_patches = np.concatenate( + (bad_patches, np.array(these_patches)[bad_patches_this_time]) + ) + if rx_ds: + found_in_rx = np.array([False for x in bad_patches]) + vary_patches = list(np.array(these_patches)[bad_patches_this_time]) + vary_lons = this_ds.patches1d_lon.values[bad_patches_this_time] + vary_lats = this_ds.patches1d_lat.values[bad_patches_this_time] + vary_crops = this_ds.patches1d_itype_veg_str.values[bad_patches_this_time] + vary_crops_int = this_ds.patches1d_itype_veg.values[bad_patches_this_time] + + any_bad_any_crop = False + for crop_int in np.unique(vary_crops_int): + rx_var = f"gs1_{crop_int}" + vary_lons_this_crop = vary_lons[np.where(vary_crops_int == crop_int)] + vary_lats_this_crop = vary_lats[np.where(vary_crops_int == crop_int)] + these_rx_vals = np.diag( + rx_ds[rx_var].sel(lon=vary_lons_this_crop, lat=vary_lats_this_crop).values + ) + if len(these_rx_vals) != len(vary_lats_this_crop): + raise RuntimeError( + f"Expected {len(vary_lats_this_crop)} rx values; got " + + f"{len(these_rx_vals)}" + ) + if not np.any(these_rx_vals != -1): + continue + any_bad_any_crop = True + break + if not any_bad_any_crop: + continue + + # Loop through and check any patches that were "bad" + any_bad = loop_through_bad_patches( + verbose, + emojus, + var, + everything_ok, + str_list, + rx_ds, + time_1, + t1_yr, + t1_vals, + timestep, + t_yr, + t_vals, + bad_patches_this_time, + found_in_rx, + vary_patches, + vary_lons, + vary_lats, + vary_crops, + vary_crops_int, + any_bad, + ) + + return any_bad_before_checking_rx, bad_patches, found_in_rx, any_bad + + +def check_one_constant_var( + this_ds, case, ignore_nan, verbose, emojus, var, any_bad, any_bad_before_checking_rx +): + """ + Ensure that a variable that should be constant actually is + """ + everything_ok = True + + ( + time_coord, + this_da, + ra_sp, + incl_patches, + str_list, + rx_ds, + bad_patches, + ) = check_one_constant_var_setup(this_ds, case, var) + + for time_1 in np.arange(this_ds.dims[time_coord] - 1): + condn = ~np.isnan(ra_sp[time_1, ...]) + if time_1 > 0: + condn = np.bitwise_and(condn, np.all(np.isnan(ra_sp[:time_1, ...]), axis=0)) + these_patches = np.where(condn)[0] + if these_patches.size == 0: + continue + these_patches = list(np.where(condn)[0]) + incl_patches += these_patches + + t1_yr = this_ds[time_coord].values[time_1] + t1_vals = np.squeeze(this_da.isel({time_coord: time_1, "patch": these_patches}).values) + + ( + any_bad_before_checking_rx, + bad_patches, + found_in_rx, + any_bad, + ) = check_one_constant_var_loop_through_timesteps( + this_ds, + ignore_nan, + verbose, + emojus, + var, + everything_ok, + time_coord, + this_da, + str_list, + rx_ds, + time_1, + these_patches, + t1_yr, + t1_vals, + any_bad, + any_bad_before_checking_rx, + bad_patches, + ) + + if verbose and any_bad: + print(f"{emojus} CLM output {var} unexpectedly vary over time:") + str_list.sort() + if found_in_rx is None: + raise RuntimeError("Somehow any_bad True but found_in_rx None") + if rx_ds and np.any(~found_in_rx): # pylint: disable=invalid-unary-operand-type + str_list = [ + "*: Not found in prescribed input file (maybe minor lon/lat mismatch)" + ] + str_list + elif not rx_ds: + str_list = ["(No rx file checked)"] + str_list + print("\n".join(str_list)) + + # Make sure every patch was checked once (or is all-NaN except possibly final season) + ensure_all_patches_checked(this_ds, this_da, ra_sp, incl_patches) + + if not any_bad: + if any_bad_before_checking_rx: + print( + f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " + + "seasons of output (except for patch(es) with missing rx)." + ) + else: + print( + f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " + + "seasons of output." + ) + + return any_bad, any_bad_before_checking_rx, bad_patches + + +def check_constant_vars( + this_ds, case, ignore_nan, const_growing_seasons=None, verbose=True, throw_error=True +): + """ + For variables that should stay constant, make sure they are + """ + if isinstance(case, str): + const_vars = [case] + elif isinstance(case, list): + const_vars = case + elif isinstance(case, dict): + const_vars = case["const_vars"] + else: + raise TypeError(f"case must be str or dict, not {type(case)}") + + if not const_vars: + return None + + if const_growing_seasons: + gs_0 = this_ds.gs.values[0] + gs_n = this_ds.gs.values[-1] + if const_growing_seasons.start > gs_0 or const_growing_seasons.stop < gs_n: + print( + f"❗ Only checking const_vars over {const_growing_seasons.start}-" + + f"{const_growing_seasons.stop} (run includes {gs_0}-{gs_n})" + ) + this_ds = this_ds.sel(gs=const_growing_seasons) + + any_bad = False + any_bad_before_checking_rx = False + if throw_error: + emojus = "❌" + else: + emojus = "❗" + if not isinstance(const_vars, list): + const_vars = [const_vars] + + for var in const_vars: + any_bad, any_bad_before_checking_rx, bad_patches = check_one_constant_var( + this_ds, case, ignore_nan, verbose, emojus, var, any_bad, any_bad_before_checking_rx + ) + + if any_bad and throw_error: + raise RuntimeError("Stopping due to failed check_constant_vars().") + + bad_patches = np.unique(bad_patches) + return [int(p) for p in bad_patches] diff --git a/python/ctsm/crop_calendars/check_rx_obeyed.py b/python/ctsm/crop_calendars/check_rx_obeyed.py new file mode 100644 index 0000000000..99b8d80bde --- /dev/null +++ b/python/ctsm/crop_calendars/check_rx_obeyed.py @@ -0,0 +1,216 @@ +""" +Check that prescribed crop calendars were obeyed +""" + +import numpy as np + +import ctsm.crop_calendars.cropcal_utils as utils +from ctsm.crop_calendars.cropcal_constants import DEFAULT_GDD_MIN + + +def get_pct_harv_at_mature(harvest_reason_da): + """ + Get percentage of harvests that happened at maturity + """ + n_harv_at_mature = len(np.where(harvest_reason_da.values == 1)[0]) + with np.errstate(invalid="ignore"): + harv_reason_gt_0 = harvest_reason_da.values > 0 + n_harv = len(np.where(harv_reason_gt_0)[0]) + if n_harv == 0: + return np.nan + pct_harv_at_mature = n_harv_at_mature / n_harv * 100 + pct_harv_at_mature = np.format_float_positional( + pct_harv_at_mature, precision=2, unique=False, fractional=False, trim="k" + ) # Round to 2 significant digits + return pct_harv_at_mature + + +def check_rx_obeyed_handle_gdharv(output_var, gdd_min, ds_thisveg, rx_array): + """ + In check_rx_obeyed(), account for the GDD harvest threshold minimum set in PlantCrop() + """ + if gdd_min is None: + gdd_min = DEFAULT_GDD_MIN + print( + f"gdd_min not provided when doing check_rx_obeyed() for {output_var}; using " + + f"default {gdd_min}" + ) + with np.errstate(invalid="ignore"): + rx_array[(rx_array >= 0) & (rx_array < gdd_min)] = gdd_min + + # ...harvest reason + # 0: Should never happen in any simulation + # 1: Harvesting at maturity + # 2: Harvesting at max season length (mxmat) + # 3: Crop was incorrectly planted in last time step of Dec. 31 + # 4: Today was supposed to be the planting day, but the previous crop still hasn't been + # harvested. + # 5: Harvest the day before the next sowing date this year. + # 6: Same as #5. + # 7: Harvest the day before the next sowing date (today is Dec. 31 and the sowing date + # is Jan. 1) + harvest_reason_da = ds_thisveg["HARVEST_REASON"] + unique_harvest_reasons = np.unique( + harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] + ) + pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) + return gdd_min, unique_harvest_reasons, pct_harv_at_mature + + +def check_rx_obeyed_setup(dates_ds, which_ds, output_var, verbose): + """ + Various setup steps for check_rx_obeyed() + """ + all_ok = 2 + diff_str_list = [] + gdd_tolerance = 1 + + if "GDDHARV" in output_var and verbose: + harvest_reason_da = dates_ds["HARVEST_REASON"] + unique_harvest_reasons = np.unique( + harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] + ) + pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) + print( + f"{which_ds} harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harv at " + + "maturity)" + ) + + return all_ok, diff_str_list, gdd_tolerance + + +def get_extreme_info(diff_array, rx_array, mxn, dims, gs_da, patches1d_lon, patches1d_lat): + """ + Get information about extreme gridcells (for debugging) + """ + if mxn == np.min: # pylint: disable=comparison-with-callable + diff_array = np.ma.masked_array(diff_array, mask=np.abs(diff_array) == 0) + themxn = mxn(diff_array) + + # Find the first patch-gs that has the mxn value + matching_indices = np.where(diff_array == themxn) + first_indices = [x[0] for x in matching_indices] + + # Get the lon, lat, and growing season of that patch-gs + patch_index = first_indices[dims.index("patch")] + this_lon = patches1d_lon.values[patch_index] + this_lat = patches1d_lat.values[patch_index] + season_index = first_indices[dims.index("gs")] + this_gs = gs_da.values[season_index] + + # Get the prescribed value for this patch-gs + this_rx = rx_array[patch_index][0] + + return round(themxn, 3), round(this_lon, 3), round(this_lat, 3), this_gs, round(this_rx) + + +def check_rx_obeyed( + vegtype_list, rx_ds, dates_ds, which_ds, output_var, gdd_min=None, verbose=False +): + """ + Check that prescribed crop calendars were obeyed + """ + all_ok, diff_str_list, gdd_tolerance = check_rx_obeyed_setup( + dates_ds, which_ds, output_var, verbose + ) + + for vegtype_str in vegtype_list: + thisveg_patches = np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] + if thisveg_patches.size == 0: + continue + ds_thisveg = dates_ds.isel(patch=thisveg_patches) + + vegtype_int = utils.vegtype_str2int(vegtype_str)[0] + rx_da = rx_ds[f"gs1_{vegtype_int}"] + rx_array = rx_da.values[ + ds_thisveg.patches1d_jxy.values.astype(int) - 1, + ds_thisveg.patches1d_ixy.values.astype(int) - 1, + ] + rx_array = np.expand_dims(rx_array, axis=1) + sim_array = ds_thisveg[output_var].values + sim_array_dims = ds_thisveg[output_var].dims + + # Ignore patches without prescribed value + with np.errstate(invalid="ignore"): + rx_array[np.where(rx_array < 0)] = np.nan + + # Account for... + if "GDDHARV" in output_var: + # ...GDD harvest threshold minimum set in PlantCrop() + gdd_min, unique_harvest_reasons, pct_harv_at_mature = check_rx_obeyed_handle_gdharv( + output_var, gdd_min, ds_thisveg, rx_array + ) + + if np.any(sim_array != rx_array): + diff_array = sim_array - rx_array + + # Allow negative GDDHARV values when harvest occurred because sowing was scheduled for + # the next day + if output_var == "GDDHARV_PERHARV": + diff_array = np.ma.masked_array( + diff_array, + mask=(diff_array < 0) & (ds_thisveg["HARVEST_REASON_PERHARV"].values == 5), + ) + elif output_var == "GDDHARV": + with np.errstate(invalid="ignore"): + diff_lt_0 = diff_array < 0 + harv_reason_5 = ds_thisveg["HARVEST_REASON"].values == 5 + diff_array = np.ma.masked_array(diff_array, mask=diff_lt_0 & harv_reason_5) + + with np.errstate(invalid="ignore"): + abs_gt_0 = abs(diff_array) > 0 + if np.any(np.abs(diff_array[abs_gt_0]) > 0): + min_diff, min_lon, min_lat, min_gs, min_rx = get_extreme_info( + diff_array, + rx_array, + np.nanmin, + sim_array_dims, + dates_ds.gs, + ds_thisveg.patches1d_lon, + ds_thisveg.patches1d_lat, + ) + max_diff, max_lon, max_lat, max_gs, max_rx = get_extreme_info( + diff_array, + rx_array, + np.nanmax, + sim_array_dims, + dates_ds.gs, + ds_thisveg.patches1d_lon, + ds_thisveg.patches1d_lat, + ) + + diffs_eg_txt = ( + f"{vegtype_str} ({vegtype_int}): diffs range {min_diff} (lon {min_lon}, lat " + + f"{min_lat}, gs {min_gs}, rx ~{min_rx}) to {max_diff} (lon {max_lon}, lat " + + f"{max_lat}, gs {max_gs}, rx ~{max_rx})" + ) + if "GDDHARV" in output_var: + diffs_eg_txt += ( + f"; harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}" + + "% harvested at maturity)" + ) + if "GDDHARV" in output_var and np.nanmax(abs(diff_array)) <= gdd_tolerance: + if all_ok > 0: + all_ok = 1 + diff_str_list.append(f" {diffs_eg_txt}") + else: + all_ok = 0 + if verbose: + print( + f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., " + + f"{diffs_eg_txt}" + ) + else: + break + + if all_ok == 2: + print(f"✅ {which_ds}: Prescribed {output_var} always obeyed") + elif all_ok == 1: + # print(f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable:") + # for x in diff_str_list: print(x) + print( + f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable (diffs <= " + + f"{gdd_tolerance})" + ) + elif not verbose: + print(f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., {diffs_eg_txt}") diff --git a/python/ctsm/crop_calendars/check_rxboth_run.py b/python/ctsm/crop_calendars/check_rxboth_run.py index 6dae071937..ae4decde30 100644 --- a/python/ctsm/crop_calendars/check_rxboth_run.py +++ b/python/ctsm/crop_calendars/check_rxboth_run.py @@ -1,12 +1,32 @@ -# %% Setup - +""" +Check the results of a run with prescribed sowing dates and maturity requirements +""" +import sys +import argparse +import glob +import os import numpy as np -import sys, argparse -import cropcal_module as cc -import glob, os + +# Import the CTSM Python utilities. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. +_CTSM_PYTHON = os.path.join( + os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" +) +sys.path.insert(1, _CTSM_PYTHON) +import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position +from ctsm.crop_calendars.check_rx_obeyed import ( # pylint: disable=wrong-import-position + check_rx_obeyed, +) +from ctsm.crop_calendars.check_constant_vars import ( # pylint: disable=wrong-import-position + check_constant_vars, +) def main(argv): + """ + Main method: Check the results of a run with prescribed sowing dates and maturity requirements + """ # Set arguments parser = argparse.ArgumentParser(description="ADD DESCRIPTION HERE") parser.add_argument( @@ -40,7 +60,7 @@ def main(argv): args = parser.parse_args(argv) # Note that _PERHARV will be stripped off upon import - myVars = [ + my_vars = [ "GRAINC_TO_FOOD_PERHARV", "GRAINC_TO_FOOD_ANN", "SDATES", @@ -60,18 +80,18 @@ def main(argv): # These should be constant in a Prescribed Calendars (rxboth) run, as long as the inputs were # static. case = { - "constantVars": ["SDATES", "GDDHARV"], + "const_vars": ["SDATES", "GDDHARV"], "rx_sdates_file": args.rx_sdates_file, "rx_gdds_file": args.rx_gdds_file, } case["ds"] = cc.import_output( annual_outfiles, - myVars=myVars, - y1=args.first_usable_year, - yN=args.last_usable_year, + my_vars=my_vars, + year_1=args.first_usable_year, + year_n=args.last_usable_year, ) - cc.check_constant_vars(case["ds"], case, ignore_nan=True, verbose=True, throw_error=True) + check_constant_vars(case["ds"], case, ignore_nan=True, verbose=True, throw_error=True) # Import GGCMI sowing and harvest dates, and check sims casename = "Prescribed Calendars" @@ -84,24 +104,31 @@ def main(argv): # Equalize lons/lats lonlat_tol = 1e-4 - for v in ["rx_sdates_ds", "rx_gdds_ds"]: - if v in case: - for l in ["lon", "lat"]: - max_diff_orig = np.max(np.abs(case[v][l].values - case["ds"][l].values)) + for ds_name in ["rx_sdates_ds", "rx_gdds_ds"]: + if ds_name in case: + for coord_name in ["lon", "lat"]: + max_diff_orig = np.max( + np.abs(case[ds_name][coord_name].values - case["ds"][coord_name].values) + ) if max_diff_orig > lonlat_tol: raise RuntimeError( - f"{v} {l} values differ too much ({max_diff_orig} > {lonlat_tol})" + f"{ds_name} {coord_name} values differ too much ({max_diff_orig} > " + + f"{lonlat_tol})" + ) + if max_diff_orig > 0: + case[ds_name] = case[ds_name].assign_coords( + {coord_name: case["ds"][coord_name].values} + ) + max_diff = np.max( + np.abs(case[ds_name][coord_name].values - case["ds"][coord_name].values) ) - elif max_diff_orig > 0: - case[v] = case[v].assign_coords({l: case["ds"][l].values}) - max_diff = np.max(np.abs(case[v][l].values - case["ds"][l].values)) - print(f"{v} {l} max_diff {max_diff_orig} → {max_diff}") + print(f"{ds_name} {coord_name} max_diff {max_diff_orig} → {max_diff}") else: - print(f"{v} {l} max_diff {max_diff_orig}") + print(f"{ds_name} {coord_name} max_diff {max_diff_orig}") # Check if case["rx_sdates_file"]: - cc.check_rx_obeyed( + check_rx_obeyed( case["ds"].vegtype_str.values, case["rx_sdates_ds"].isel(time=0), case["ds"], @@ -109,7 +136,7 @@ def main(argv): "SDATES", ) if case["rx_gdds_file"]: - cc.check_rx_obeyed( + check_rx_obeyed( case["ds"].vegtype_str.values, case["rx_gdds_ds"].isel(time=0), case["ds"], diff --git a/python/ctsm/crop_calendars/convert_axis_time2gs.py b/python/ctsm/crop_calendars/convert_axis_time2gs.py new file mode 100644 index 0000000000..d48514370d --- /dev/null +++ b/python/ctsm/crop_calendars/convert_axis_time2gs.py @@ -0,0 +1,622 @@ +""" +Convert time*mxharvests axes to growingseason axis +""" +import warnings +import sys +import numpy as np +import xarray as xr + +try: + import pandas as pd +except ModuleNotFoundError: + pass + + +def pym_to_pg(pym_array, quiet=False): + """ + In convert_axis_time2gs(), convert year x month array to growingseason axis + """ + pg_array = np.reshape(pym_array, (pym_array.shape[0], -1)) + ok_pg = pg_array[~np.isnan(pg_array)] + if not quiet: + print( + f"{ok_pg.size} included; unique N seasons = " + + f"{np.unique(np.sum(~np.isnan(pg_array), axis=1))}" + ) + return pg_array + + +def ignore_lastyear_complete_season(pg_array, excl, mxharvests): + """ + Helper function for convert_axis_time2gs() + """ + tmp_l = pg_array[:, :-mxharvests] + tmp_r = pg_array[:, -mxharvests:] + tmp_r[np.where(excl)] = np.nan + pg_array = np.concatenate((tmp_l, tmp_r), axis=1) + return pg_array + + +def convert_axis_time2gs_setup(this_ds, verbose): + """ + Various setup steps for convert_axis_time2gs_setup() + """ + # How many non-NaN patch-seasons do we expect to have once we're done organizing things? + n_patch = this_ds.dims["patch"] + # Because some patches will be planted in the last year but not complete, we have to ignore any + # finalyear-planted seasons that do complete. + n_gs = this_ds.dims["time"] - 1 + expected_valid = n_patch * n_gs + + mxharvests = this_ds.dims["mxharvests"] + + if verbose: + print( + f"Start: discrepancy of {np.sum(~np.isnan(this_ds.HDATES.values)) - expected_valid} " + + "patch-seasons" + ) + + # Set all non-positive date values to NaN. These are seasons that were never harvested + # (or never started): "non-seasons." + if this_ds.HDATES.dims != ("time", "mxharvests", "patch"): + raise RuntimeError( + "This code relies on HDATES dims ('time', 'mxharvests', 'patch'), not " + + f"{this_ds.HDATES.dims}" + ) + hdates_ymp = this_ds.HDATES.copy().where(this_ds.HDATES > 0).values + hdates_pym = np.transpose(hdates_ymp.copy(), (2, 0, 1)) + sdates_ymp = this_ds.SDATES_PERHARV.copy().where(this_ds.SDATES_PERHARV > 0).values + sdates_pym = np.transpose(sdates_ymp.copy(), (2, 0, 1)) + with np.errstate(invalid="ignore"): + hdates_pym[hdates_pym <= 0] = np.nan + return n_patch, n_gs, expected_valid, mxharvests, hdates_ymp, hdates_pym, sdates_ymp, sdates_pym + + +def set_up_ds_with_gs_axis(ds_in): + """ + Set up empty Dataset with time axis as "gs" (growing season) instead of what CLM puts out. + + Includes all the same variables as the input dataset, minus any that had dimensions mxsowings or + mxharvests. + """ + # Get the data variables to include in the new dataset + data_vars = {} + for var in ds_in.data_vars: + if not any(x in ["mxsowings", "mxharvests"] for x in ds_in[var].dims): + data_vars[var] = ds_in[var] + # Set up the new dataset + gs_years = [t.year - 1 for t in ds_in.time.values[:-1]] + coords = ds_in.coords + coords["gs"] = gs_years + ds_out = xr.Dataset(data_vars=data_vars, coords=coords, attrs=ds_in.attrs) + return ds_out + + +def print_onepatch_wrong_n_gs( + patch_index, + this_ds_orig, + sdates_ymp, + hdates_ymp, + sdates_pym, + hdates_pym, + sdates_pym2, + hdates_pym2, + sdates_pym3, + hdates_pym3, + sdates_pg, + hdates_pg, + sdates_pg2, + hdates_pg2, +): + """ + Print information about a patch (for debugging) + """ + + print( + f"patch {patch_index}: {this_ds_orig.patches1d_itype_veg_str.values[patch_index]}, lon " + f"{this_ds_orig.patches1d_lon.values[patch_index]} lat " + f"{this_ds_orig.patches1d_lat.values[patch_index]}" + ) + + print("Original SDATES (per sowing):") + print(this_ds_orig.SDATES.values[:, :, patch_index]) + + print("Original HDATES (per harvest):") + print(this_ds_orig.HDATES.values[:, :, patch_index]) + + if "pandas" in sys.modules: + + def print_pandas_ymp(msg, cols, arrs_tuple): + print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") + mxharvests = arrs_tuple[0].shape[1] + arrs_list2 = [] + cols2 = [] + for harvest_index in np.arange(mxharvests): + for i, array in enumerate(arrs_tuple): + arrs_list2.append(array[:, harvest_index]) + cols2.append(cols[i] + str(harvest_index)) + arrs_tuple2 = tuple(arrs_list2) + dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) + dataframe.columns = cols2 + print(dataframe) + + print_pandas_ymp( + "Original", + ["sdate", "hdate"], + ( + this_ds_orig.SDATES_PERHARV.values[:, :, patch_index], + this_ds_orig.HDATES.values[:, :, patch_index], + ), + ) + + print_pandas_ymp( + "Masked", + ["sdate", "hdate"], + (sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index]), + ) + + print_pandas_ymp( + 'After "Ignore harvests from before this output began"', + ["sdate", "hdate"], + ( + np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], + ), + ) + + print_pandas_ymp( + 'After "In years with no sowing, pretend the first no-harvest is meaningful"', + ["sdate", "hdate"], + ( + np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], + ), + ) + + print_pandas_ymp( + ( + 'After "In years with sowing that are followed by inactive years, check whether the' + " last sowing was harvested before the patch was deactivated. If not, pretend the" + ' LAST no-harvest is meaningful."' + ), + ["sdate", "hdate"], + ( + np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], + ), + ) + + def print_pandas_pg(msg, cols, arrs_tuple): + print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") + arrs_list = list(arrs_tuple) + for i, array in enumerate(arrs_tuple): + arrs_list[i] = np.reshape(array, (-1)) + arrs_tuple2 = tuple(arrs_list) + dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) + dataframe.columns = cols + print(dataframe) + + print_pandas_pg( + "Same, but converted to gs axis", + ["sdate", "hdate"], + (sdates_pg[patch_index, :], hdates_pg[patch_index, :]), + ) + + print_pandas_pg( + ( + 'After "Ignore any harvests that were planted in the final year, because some cells' + ' will have incomplete growing seasons for the final year"' + ), + ["sdate", "hdate"], + (sdates_pg2[patch_index, :], hdates_pg2[patch_index, :]), + ) + else: + print("Couldn't import pandas, so not displaying example bad patch ORIGINAL.") + + def print_nopandas(array_1, array_2, msg): + print(msg) + if array_1.ndim == 1: + # I don't know why these aren't side-by-side! + print(np.stack((array_1, array_2), axis=1)) + else: + print(np.concatenate((array_1, array_2), axis=1)) + + print_nopandas(sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index], "Masked:") + + print_nopandas( + np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], + 'After "Ignore harvests from before this output began"', + ) + + print_nopandas( + np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], + 'After "In years with no sowing, pretend the first no-harvest is meaningful"', + ) + + print_nopandas( + np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], + ( + 'After "In years with sowing that are followed by inactive years, check whether the' + " last sowing was harvested before the patch was deactivated. If not, pretend the" + ' LAST [easier to implement!] no-harvest is meaningful."' + ), + ) + + print_nopandas( + sdates_pg[patch_index, :], hdates_pg[patch_index, :], "Same, but converted to gs axis" + ) + + print_nopandas( + sdates_pg2[patch_index, :], + hdates_pg2[patch_index, :], + ( + 'After "Ignore any harvests that were planted in the final year, because some cells' + ' will have incomplete growing seasons for the final year"' + ), + ) + + print("\n\n") + + +def handle_years_with_no_sowing(this_ds, mxharvests, hdates_pym, sdates_pym): + """ + In years with no sowing, pretend the first no-harvest is meaningful, unless that was + intentionally ignored earlier in convert_axis_time2gs(). + """ + sdates_orig_ymp = this_ds.SDATES.copy().values + sdates_orig_pym = np.transpose(sdates_orig_ymp.copy(), (2, 0, 1)) + hdates_pym2 = hdates_pym.copy() + sdates_pym2 = sdates_pym.copy() + with np.errstate(invalid="ignore"): + sdates_gt_0 = sdates_orig_pym > 0 + nosow_py = np.all(~sdates_gt_0, axis=2) + nosow_py_1st = nosow_py & np.isnan(hdates_pym[:, :, 0]) + where_nosow_py_1st = np.where(nosow_py_1st) + hdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf + sdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf + for harvest_index in np.arange(mxharvests - 1): + if harvest_index == 0: + continue + if harvest_index == 1: + print("Warning: Untested with mxharvests > 2") + where_nosow_py = np.where( + nosow_py + & ~np.any(np.isnan(hdates_pym[:, :, 0:harvest_index]), axis=2) + & np.isnan(hdates_pym[:, :, harvest_index]) + ) + hdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf + sdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf + return sdates_orig_pym, hdates_pym2, sdates_pym2 + + +def handle_years_with_sowing_then_inactive( + verbose, + n_patch, + n_gs, + expected_valid, + mxharvests, + inactive_py, + sdates_orig_pym, + hdates_pym2, + sdates_pym2, +): + """ + In years with sowing that are followed by inactive years, check whether the last sowing was + harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] + no-harvest is meaningful. + """ + sdates_orig_masked_pym = sdates_orig_pym.copy() + with np.errstate(invalid="ignore"): + sdates_le_0 = sdates_orig_masked_pym <= 0 + sdates_orig_masked_pym[np.where(sdates_le_0)] = np.nan + with warnings.catch_warnings(): + warnings.filterwarnings(action="ignore", message="All-NaN slice encountered") + last_sdate_first_n_gs_py = np.nanmax(sdates_orig_masked_pym[:, :-1, :], axis=2) + last_hdate_first_n_gs_py = np.nanmax(hdates_pym2[:, :-1, :], axis=2) + with np.errstate(invalid="ignore"): + hdate_lt_sdate = last_hdate_first_n_gs_py < last_sdate_first_n_gs_py + last_sowing_not_harvested_sameyear_first_n_gs_py = hdate_lt_sdate | np.isnan( + last_hdate_first_n_gs_py + ) + inactive_last_n_gs_py = inactive_py[:, 1:] + last_sowing_never_harvested_first_n_gs_py = ( + last_sowing_not_harvested_sameyear_first_n_gs_py & inactive_last_n_gs_py + ) + last_sowing_never_harvested_py = np.concatenate( + (last_sowing_never_harvested_first_n_gs_py, np.full((n_patch, 1), False)), axis=1 + ) + last_sowing_never_harvested_pym = np.concatenate( + ( + np.full((n_patch, n_gs + 1, mxharvests - 1), False), + np.expand_dims(last_sowing_never_harvested_py, axis=2), + ), + axis=2, + ) + where_last_sowing_never_harvested_pym = last_sowing_never_harvested_pym + hdates_pym3 = hdates_pym2.copy() + sdates_pym3 = sdates_pym2.copy() + hdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf + sdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf + + hdates_pg = pym_to_pg(hdates_pym3.copy(), quiet=~verbose) + sdates_pg = pym_to_pg(sdates_pym3.copy(), quiet=True) + if verbose: + print( + "After 'In years with no sowing, pretend the first no-harvest is meaningful: " + + f"discrepancy of {np.sum(~np.isnan(hdates_pg)) - expected_valid} patch-seasons" + ) + + return hdates_pym3, sdates_pym3, hdates_pg, sdates_pg + + +def ignore_harvests_planted_in_final_year( + this_ds, verbose, n_gs, expected_valid, mxharvests, hdates_pg, sdates_pg +): + """ + Ignore any harvests that were planted in the final year, because some cells will have + incomplete growing seasons for the final year. + """ + with np.errstate(invalid="ignore"): + hdates_ge_sdates = hdates_pg[:, -mxharvests:] >= sdates_pg[:, -mxharvests:] + lastyear_complete_season = hdates_ge_sdates | np.isinf(hdates_pg[:, -mxharvests:]) + + hdates_pg2 = ignore_lastyear_complete_season( + hdates_pg.copy(), lastyear_complete_season, mxharvests + ) + sdates_pg2 = ignore_lastyear_complete_season( + sdates_pg.copy(), lastyear_complete_season, mxharvests + ) + is_valid = ~np.isnan(hdates_pg2) + is_fake = np.isneginf(hdates_pg2) + is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], n_gs)) + discrepancy = np.sum(is_valid) - expected_valid + unique_n_seasons = np.unique(np.sum(is_valid, axis=1)) + if verbose: + print( + "After 'Ignore any harvests that were planted in the final year, because other cells " + + "will have incomplete growing seasons for the final year': discrepancy of " + + f"{discrepancy} patch-seasons" + ) + if "pandas" in sys.modules: + bincount = np.bincount(np.sum(is_valid, axis=1)) + bincount = bincount[bincount > 0] + dataframe = pd.DataFrame({"Ngs": unique_n_seasons, "Count": bincount}) + print(dataframe) + else: + print(f"unique N seasons = {unique_n_seasons}") + print(" ") + return hdates_pg2, sdates_pg2, is_valid, is_fake, discrepancy, unique_n_seasons + + +def create_dataset( + this_ds, + my_vars, + n_gs, + hdates_ymp, + hdates_pym, + sdates_ymp, + sdates_pym, + hdates_pym2, + sdates_pym2, + hdates_pym3, + sdates_pym3, + hdates_pg, + sdates_pg, + hdates_pg2, + sdates_pg2, + is_valid, + is_fake, + discrepancy, + unique_n_seasons, +): + """ + Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out + """ + if discrepancy == 0: + this_ds_gs = set_up_ds_with_gs_axis(this_ds) + for var in this_ds.data_vars: + if this_ds[var].dims != ("time", "mxharvests", "patch") or ( + my_vars and var not in my_vars + ): + continue + + # Set invalid values to NaN + da_yhp = this_ds[var].copy() + da_yhp = da_yhp.where(~np.isneginf(da_yhp)) + + # Remove the nans and reshape to patches*growingseasons + da_pyh = da_yhp.transpose("patch", "time", "mxharvests") + ar_pg = np.reshape(da_pyh.values, (this_ds.dims["patch"], -1)) + ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], n_gs)) + # Change -infs to nans + ar_valid_pg[is_fake] = np.nan + # Save as DataArray to new Dataset, stripping _PERHARV from variable name + newname = var.replace("_PERHARV", "") + if newname in this_ds_gs: + raise RuntimeError(f"{newname} already in dataset!") + da_pg = xr.DataArray( + data=ar_valid_pg, + coords=[this_ds_gs.coords["patch"], this_ds_gs.coords["gs"]], + name=newname, + attrs=da_yhp.attrs, + ) + this_ds_gs[newname] = da_pg + this_ds_gs[newname].attrs["units"] = this_ds[var].attrs["units"] + else: + # Print details about example bad patch(es) + if min(unique_n_seasons) < n_gs: + print(f"Too few seasons (min {min(unique_n_seasons)} < {n_gs})") + patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == min(unique_n_seasons))[ + 0 + ][0] + print_onepatch_wrong_n_gs( + patch_index, + this_ds, + sdates_ymp, + hdates_ymp, + sdates_pym, + hdates_pym, + sdates_pym2, + hdates_pym2, + sdates_pym3, + hdates_pym3, + sdates_pg, + hdates_pg, + sdates_pg2, + hdates_pg2, + ) + if max(unique_n_seasons) > n_gs: + print(f"Too many seasons (max {max(unique_n_seasons)} > {n_gs})") + patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == max(unique_n_seasons))[ + 0 + ][0] + print_onepatch_wrong_n_gs( + patch_index, + this_ds, + sdates_ymp, + hdates_ymp, + sdates_pym, + hdates_pym, + sdates_pym2, + hdates_pym2, + sdates_pym3, + hdates_pym3, + sdates_pg, + hdates_pg, + sdates_pg2, + hdates_pg2, + ) + raise RuntimeError( + "Can't convert time*mxharvests axes to growingseason axis: discrepancy of " + + f"{discrepancy} patch-seasons" + ) + + # Preserve units + for var_1 in this_ds_gs: + var_0 = var_1 + if var_0 not in this_ds: + var_0 += "_PERHARV" + if var_0 not in this_ds: + continue + if "units" in this_ds[var_0].attrs: + this_ds_gs[var_1].attrs["units"] = this_ds[var_0].attrs["units"] + return this_ds_gs + + +def convert_axis_time2gs(this_ds, verbose=False, my_vars=None, incl_orig=False): + """ + Convert time*mxharvests axes to growingseason axis + """ + + ( + n_patch, + n_gs, + expected_valid, + mxharvests, + hdates_ymp, + hdates_pym, + sdates_ymp, + sdates_pym, + ) = convert_axis_time2gs_setup(this_ds, verbose) + + # Find years where patch was inactive + inactive_py = np.transpose( + np.isnan(this_ds.HDATES).all(dim="mxharvests").values + & np.isnan(this_ds.SDATES_PERHARV).all(dim="mxharvests").values + ) + # Find seasons that were planted while the patch was inactive + with np.errstate(invalid="ignore"): + sown_inactive_py = inactive_py[:, :-1] & (hdates_pym[:, 1:, 0] < sdates_pym[:, 1:, 0]) + sown_inactive_py = np.concatenate((np.full((n_patch, 1), False), sown_inactive_py), axis=1) + + # "Ignore harvests from seasons sown (a) before this output began or (b) when the crop was + # inactive" + with np.errstate(invalid="ignore"): + first_season_before_first_year_p = hdates_pym[:, 0, 0] < sdates_pym[:, 0, 0] + first_season_before_first_year_py = np.full(hdates_pym.shape[:-1], fill_value=False) + first_season_before_first_year_py[:, 0] = first_season_before_first_year_p + sown_prerun_or_inactive_py = first_season_before_first_year_py | sown_inactive_py + sown_prerun_or_inactive_pym = np.concatenate( + ( + np.expand_dims(sown_prerun_or_inactive_py, axis=2), + np.full((n_patch, n_gs + 1, mxharvests - 1), False), + ), + axis=2, + ) + where_sown_prerun_or_inactive_pym = np.where(sown_prerun_or_inactive_pym) + hdates_pym[where_sown_prerun_or_inactive_pym] = np.nan + sdates_pym[where_sown_prerun_or_inactive_pym] = np.nan + if verbose: + print( + "After 'Ignore harvests from before this output began: discrepancy of " + + f"{np.sum(~np.isnan(hdates_pym)) - expected_valid} patch-seasons'" + ) + + # We need to keep some non-seasons---it's possible that "the yearY growing season" never + # happened (sowing conditions weren't met), but we still need something there so that we can + # make an array of dimension Npatch*Ngs. We do this by changing those non-seasons from NaN to + # -Inf before doing the filtering and reshaping, after which we'll convert them back to NaNs. + + # "In years with no sowing, pretend the first no-harvest is meaningful, unless that was + # intentionally ignored above." + sdates_orig_pym, hdates_pym2, sdates_pym2 = handle_years_with_no_sowing( + this_ds, mxharvests, hdates_pym, sdates_pym + ) + + # "In years with sowing that are followed by inactive years, check whether the last sowing was + # harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] + # no-harvest is meaningful." + hdates_pym3, sdates_pym3, hdates_pg, sdates_pg = handle_years_with_sowing_then_inactive( + verbose, + n_patch, + n_gs, + expected_valid, + mxharvests, + inactive_py, + sdates_orig_pym, + hdates_pym2, + sdates_pym2, + ) + + # "Ignore any harvests that were planted in the final year, because some cells will have + # incomplete growing seasons for the final year." + ( + hdates_pg2, + sdates_pg2, + is_valid, + is_fake, + discrepancy, + unique_n_seasons, + ) = ignore_harvests_planted_in_final_year( + this_ds, verbose, n_gs, expected_valid, mxharvests, hdates_pg, sdates_pg + ) + + # Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out + this_ds_gs = create_dataset( + this_ds, + my_vars, + n_gs, + hdates_ymp, + hdates_pym, + sdates_ymp, + sdates_pym, + hdates_pym2, + sdates_pym2, + hdates_pym3, + sdates_pym3, + hdates_pg, + sdates_pg, + hdates_pg2, + sdates_pg2, + is_valid, + is_fake, + discrepancy, + unique_n_seasons, + ) + + if incl_orig: + return this_ds_gs, this_ds + return this_ds_gs diff --git a/python/ctsm/crop_calendars/cropcal_constants.py b/python/ctsm/crop_calendars/cropcal_constants.py new file mode 100644 index 0000000000..f015ac7db1 --- /dev/null +++ b/python/ctsm/crop_calendars/cropcal_constants.py @@ -0,0 +1,26 @@ +""" +Constants used in crop calendar scripts +""" + +# Define conversion multipliers, {from: {to1, to2, ...}, ...} +multiplier_dict = { + # Mass + "g": { + "Mt": 1e-12, + }, + "t": { + "Mt": 1e-6, + }, + # Volume + "m3": { + "km3": 1e-9, + }, + # Yield + "g/m2": { + "t/ha": 1e-6 * 1e4, + }, +} + +# Minimum harvest threshold allowed in PlantCrop() +# Was 50 before cropcal runs 2023-01-28 +DEFAULT_GDD_MIN = 1.0 diff --git a/python/ctsm/crop_calendars/cropcal_figs_module.py b/python/ctsm/crop_calendars/cropcal_figs_module.py index 8d7f472fec..d820460175 100644 --- a/python/ctsm/crop_calendars/cropcal_figs_module.py +++ b/python/ctsm/crop_calendars/cropcal_figs_module.py @@ -1,5 +1,11 @@ +""" +Functions for making crop calendar figures +""" + import numpy as np +# It's fine if these can't be imported. The script using these will handle it. +# pylint: disable=import-error import cartopy.crs as ccrs import matplotlib.pyplot as plt import matplotlib.colors as mcolors @@ -23,6 +29,9 @@ # Cases (line and scatter plots) def cropcal_colors_cases(casename): + """ + Define colors for each case + """ case_color_dict = { "clm default": [x / 255 for x in [92, 219, 219]], "prescribed calendars": [x / 255 for x in [250, 102, 240]], @@ -32,11 +41,8 @@ def cropcal_colors_cases(casename): case_color_dict["5.0 lu"] = case_color_dict["clm default"] case_color_dict["5.2 lu"] = case_color_dict["prescribed calendars"] - case_color = None casename_for_colors = casename.lower().replace(" (0)", "").replace(" (1)", "") - if casename_for_colors in case_color_dict: - case_color = case_color_dict[casename_for_colors] - return case_color + return case_color_dict.get(casename_for_colors, None) def make_map( @@ -65,6 +71,9 @@ def make_map( vmin=None, vrange=None, ): + """ + Make map + """ if underlay is not None: if underlay_color is None: underlay_color = cropcal_colors["underlay"] @@ -147,23 +156,25 @@ def make_map( # Need to do this for subplot row labels set_ticks(-1, fontsize, "y") plt.yticks([]) - for x in ax.spines: - ax.spines[x].set_visible(False) + for spine in ax.spines: + ax.spines[spine].set_visible(False) if show_cbar: return im, cbar - else: - return im, None + return im, None def deal_with_ticklabels(cbar, cbar_max, ticklabels, ticklocations, units, im): + """ + Handle settings related to ticklabels + """ if ticklocations is not None: cbar.set_ticks(ticklocations) if units is not None and units.lower() == "month": cbar.set_ticklabels( ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] ) - units == "Month" + units = "Month" elif ticklabels is not None: cbar.set_ticklabels(ticklabels) if isinstance(im, mplcol.QuadMesh): @@ -173,7 +184,7 @@ def deal_with_ticklabels(cbar, cbar_max, ticklabels, ticklocations, units, im): if cbar_max is not None and clim_max > cbar_max: if ticklabels is not None: raise RuntimeError( - "How to handle this now that you are specifying ticklocations separate from ticklabels?" + "How to handle this now that ticklocations is specified separately from ticklabels?" ) ticks = cbar.get_ticks() if ticks[-2] > cbar_max: @@ -182,24 +193,28 @@ def deal_with_ticklabels(cbar, cbar_max, ticklabels, ticklocations, units, im): ) ticklabels = ticks.copy() ticklabels[-1] = cbar_max - for i, x in enumerate(ticklabels): - if x == int(x): - ticklabels[i] = str(int(x)) + for i, ticklabel in enumerate(ticklabels): + if ticklabel == int(ticklabel): + ticklabels[i] = str(int(ticklabel)) cbar.set_ticks( ticks - ) # Calling this before set_xticklabels() avoids "UserWarning: FixedFormatter should only be used together with FixedLocator" (https://stackoverflow.com/questions/63723514/userwarning-fixedformatter-should-only-be-used-together-with-fixedlocator) + ) # Calling this before set_xticklabels() avoids "UserWarning: FixedFormatter should only + # be used together with FixedLocator" (https://stackoverflow.com/questions/63723514) cbar.set_ticklabels(ticklabels) def set_ticks(lonlat_bin_width, fontsize, x_or_y): + """ + Plot tick marks + """ if x_or_y == "x": ticks = np.arange(-180, 181, lonlat_bin_width) else: ticks = np.arange(-60, 91, lonlat_bin_width) ticklabels = [str(x) for x in ticks] - for i, x in enumerate(ticks): - if x % 2: + for i, tick in enumerate(ticks): + if tick % 2: ticklabels[i] = "" if x_or_y == "x": diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index 76c295974d..3fe6942f94 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -1,46 +1,23 @@ -import numpy as np -import xarray as xr -import warnings -import sys +""" +Helper functions for various crop calendar stuff +""" + import os import glob +import numpy as np +import xarray as xr -# Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script in the RUN phase seems to require the python/ directory to be manually added to path. -_CTSM_PYTHON = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" -) -sys.path.insert(1, _CTSM_PYTHON) import ctsm.crop_calendars.cropcal_utils as utils - -try: - import pandas as pd -except: - pass - - -# Define conversion multipliers, {from: {to1, to2, ...}, ...} -multiplier_dict = { - # Mass - "g": { - "Mt": 1e-12, - }, - "t": { - "Mt": 1e-6, - }, - # Volume - "m3": { - "km3": 1e-9, - }, - # Yield - "g/m2": { - "t/ha": 1e-6 * 1e4, - }, -} +from ctsm.crop_calendars.convert_axis_time2gs import convert_axis_time2gs +from ctsm.crop_calendars.check_rx_obeyed import check_rx_obeyed +from ctsm.crop_calendars.cropcal_constants import DEFAULT_GDD_MIN +from ctsm.crop_calendars.import_ds import import_ds -# After importing a file, restrict it to years of interest. -def check_and_trim_years(y1, yN, ds_in): +def check_and_trim_years(year_1, year_n, ds_in): + """ + After importing a file, restrict it to years of interest. + """ ### In annual outputs, file with name Y is actually results from year Y-1. ### Note that time values refer to when it was SAVED. So 1981-01-01 is for year 1980. @@ -49,65 +26,80 @@ def get_year_from_cftime(cftime_date): return cftime_date.year - 1 # Check that all desired years are included - if get_year_from_cftime(ds_in.time.values[0]) > y1: + if get_year_from_cftime(ds_in.time.values[0]) > year_1: raise RuntimeError( - f"Requested y1 is {y1} but first year in outputs is {get_year_from_cftime(ds_in.time.values[0])}" + f"Requested year_1 is {year_1} but first year in outputs is " + + f"{get_year_from_cftime(ds_in.time.values[0])}" ) - elif get_year_from_cftime(ds_in.time.values[-1]) < y1: + if get_year_from_cftime(ds_in.time.values[-1]) < year_1: raise RuntimeError( - f"Requested yN is {yN} but last year in outputs is {get_year_from_cftime(ds_in.time.values[-1])}" + f"Requested year_n is {year_n} but last year in outputs is " + + f"{get_year_from_cftime(ds_in.time.values[-1])}" ) # Remove years outside range of interest ### Include an extra year at the end to finish out final seasons. - ds_in = utils.safer_timeslice(ds_in, slice(f"{y1+1}-01-01", f"{yN+2}-01-01")) + ds_in = utils.safer_timeslice(ds_in, slice(f"{year_1+1}-01-01", f"{year_n+2}-01-01")) # Make sure you have the expected number of timesteps (including extra year) - Nyears_expected = yN - y1 + 2 - if ds_in.dims["time"] != Nyears_expected: + n_years_expected = year_n - year_1 + 2 + if ds_in.dims["time"] != n_years_expected: raise RuntimeError( - f"Expected {Nyears_expected} timesteps in output but got {ds_in.dims['time']}" + f"Expected {n_years_expected} timesteps in output but got {ds_in.dims['time']}" ) return ds_in -def open_lu_ds(filename, y1, yN, existing_ds, ungrid=True): +def open_lu_ds(filename, year_1, year_n, existing_ds, ungrid=True): + """ + Open land-use dataset + """ # Open and trim to years of interest - dsg = xr.open_dataset(filename).sel(time=slice(y1, yN)) + this_ds_gridded = xr.open_dataset(filename).sel(time=slice(year_1, year_n)) # Assign actual lon/lat coordinates - dsg = dsg.assign_coords( + this_ds_gridded = this_ds_gridded.assign_coords( lon=("lsmlon", existing_ds.lon.values), lat=("lsmlat", existing_ds.lat.values) ) - dsg = dsg.swap_dims({"lsmlon": "lon", "lsmlat": "lat"}) - - if "AREA" in dsg: - dsg["AREA_CFT"] = dsg.AREA * 1e6 * dsg.LANDFRAC_PFT * dsg.PCT_CROP / 100 * dsg.PCT_CFT / 100 - dsg["AREA_CFT"].attrs = {"units": "m2"} - dsg["AREA_CFT"].load() + this_ds_gridded = this_ds_gridded.swap_dims({"lsmlon": "lon", "lsmlat": "lat"}) + + if "AREA" in this_ds_gridded: + this_ds_gridded["AREA_CFT"] = ( + this_ds_gridded.AREA + * 1e6 + * this_ds_gridded.LANDFRAC_PFT + * this_ds_gridded.PCT_CROP + / 100 + * this_ds_gridded.PCT_CFT + / 100 + ) + this_ds_gridded["AREA_CFT"].attrs = {"units": "m2"} + this_ds_gridded["AREA_CFT"].load() else: print("Warning: AREA missing from Dataset, so AREA_CFT will not be created") if not ungrid: - return dsg + return this_ds_gridded # Un-grid query_ilons = [int(x) - 1 for x in existing_ds["patches1d_ixy"].values] query_ilats = [int(x) - 1 for x in existing_ds["patches1d_jxy"].values] - query_ivts = [list(dsg.cft.values).index(x) for x in existing_ds["patches1d_itype_veg"].values] + query_ivts = [ + list(this_ds_gridded.cft.values).index(x) for x in existing_ds["patches1d_itype_veg"].values + ] - ds = xr.Dataset(attrs=dsg.attrs) - for v in ["AREA", "LANDFRAC_PFT", "PCT_CFT", "PCT_CROP", "AREA_CFT"]: - if v not in dsg: + this_ds = xr.Dataset(attrs=this_ds_gridded.attrs) + for var in ["AREA", "LANDFRAC_PFT", "PCT_CFT", "PCT_CROP", "AREA_CFT"]: + if var not in this_ds_gridded: continue - if "time" in dsg[v].dims: + if "time" in this_ds_gridded[var].dims: new_coords = existing_ds["GRAINC_TO_FOOD_ANN"].coords else: new_coords = existing_ds["patches1d_lon"].coords - if "cft" in dsg[v].dims: - ds[v] = ( - dsg[v] + if "cft" in this_ds_gridded[var].dims: + this_ds[var] = ( + this_ds_gridded[var] .isel( lon=xr.DataArray(query_ilons, dims="patch"), lat=xr.DataArray(query_ilats, dims="patch"), @@ -117,8 +109,8 @@ def open_lu_ds(filename, y1, yN, existing_ds, ungrid=True): .assign_coords(new_coords) ) else: - ds[v] = ( - dsg[v] + this_ds[var] = ( + this_ds_gridded[var] .isel( lon=xr.DataArray(query_ilons, dims="patch"), lat=xr.DataArray(query_ilats, dims="patch"), @@ -126,715 +118,79 @@ def open_lu_ds(filename, y1, yN, existing_ds, ungrid=True): ) .assign_coords(new_coords) ) - for v in existing_ds: - if "patches1d_" in v or "grid1d_" in v: - ds[v] = existing_ds[v] - ds["lon"] = dsg["lon"] - ds["lat"] = dsg["lat"] + for var in existing_ds: + if "patches1d_" in var or "grid1d_" in var: + this_ds[var] = existing_ds[var] + this_ds["lon"] = this_ds_gridded["lon"] + this_ds["lat"] = this_ds_gridded["lat"] # Which crops are irrigated? - is_irrigated = np.full_like(ds["patches1d_itype_veg"], False) - for vegtype_str in np.unique(ds["patches1d_itype_veg_str"].values): + is_irrigated = np.full_like(this_ds["patches1d_itype_veg"], False) + for vegtype_str in np.unique(this_ds["patches1d_itype_veg_str"].values): if "irrigated" not in vegtype_str: continue vegtype_int = utils.ivt_str2int(vegtype_str) - is_this_vegtype = np.where(ds["patches1d_itype_veg"].values == vegtype_int)[0] + is_this_vegtype = np.where(this_ds["patches1d_itype_veg"].values == vegtype_int)[0] is_irrigated[is_this_vegtype] = True - ["irrigated" in x for x in ds["patches1d_itype_veg_str"].values] - ds["IRRIGATED"] = xr.DataArray( + this_ds["IRRIGATED"] = xr.DataArray( data=is_irrigated, - coords=ds["patches1d_itype_veg_str"].coords, + coords=this_ds["patches1d_itype_veg_str"].coords, attrs={"long_name": "Is patch irrigated?"}, ) # How much area is irrigated? - ds["IRRIGATED_AREA_CFT"] = ds["IRRIGATED"] * ds["AREA_CFT"] - ds["IRRIGATED_AREA_CFT"].attrs = { + this_ds["IRRIGATED_AREA_CFT"] = this_ds["IRRIGATED"] * this_ds["AREA_CFT"] + this_ds["IRRIGATED_AREA_CFT"].attrs = { "long name": "CFT area (irrigated types only)", "units": "m^2", } - ds["IRRIGATED_AREA_GRID"] = ( - ds["IRRIGATED_AREA_CFT"] - .groupby(ds["patches1d_gi"]) + this_ds["IRRIGATED_AREA_GRID"] = ( + this_ds["IRRIGATED_AREA_CFT"] + .groupby(this_ds["patches1d_gi"]) .sum() .rename({"patches1d_gi": "gridcell"}) ) - ds["IRRIGATED_AREA_GRID"].attrs = {"long name": "Irrigated area in gridcell", "units": "m^2"} - - return ds - - -def check_constant_vars( - this_ds, case, ignore_nan, constantGSs=None, verbose=True, throw_error=True -): - if isinstance(case, str): - constantVars = [case] - elif isinstance(case, list): - constantVars = case - elif isinstance(case, dict): - constantVars = case["constantVars"] - else: - raise TypeError(f"case must be str or dict, not {type(case)}") - - if not constantVars: - return None - - if constantGSs: - gs0 = this_ds.gs.values[0] - gsN = this_ds.gs.values[-1] - if constantGSs.start > gs0 or constantGSs.stop < gsN: - print( - f"❗ Only checking constantVars over {constantGSs.start}-{constantGSs.stop} (run includes {gs0}-{gsN})" - ) - this_ds = this_ds.sel(gs=constantGSs) - - any_bad = False - any_bad_before_checking_rx = False - if throw_error: - emojus = "❌" - else: - emojus = "❗" - if not isinstance(constantVars, list): - constantVars = [constantVars] - - for v in constantVars: - ok = True - - if "gs" in this_ds[v].dims: - time_coord = "gs" - elif "time" in this_ds[v].dims: - time_coord = "time" - else: - raise RuntimeError(f"Which of these is the time coordinate? {this_ds[v].dims}") - i_time_coord = this_ds[v].dims.index(time_coord) - - this_da = this_ds[v] - ra_sp = np.moveaxis(this_da.copy().values, i_time_coord, 0) - incl_patches = [] - bad_patches = np.array([]) - strList = [] - - # Read prescription file, if needed - rx_ds = None - if isinstance(case, dict): - if v == "GDDHARV" and "rx_gdds_file" in case: - rx_ds = import_rx_dates( - "gdd", case["rx_gdds_file"], this_ds, set_neg1_to_nan=False - ).squeeze() - - for t1 in np.arange(this_ds.dims[time_coord] - 1): - condn = ~np.isnan(ra_sp[t1, ...]) - if t1 > 0: - condn = np.bitwise_and(condn, np.all(np.isnan(ra_sp[:t1, ...]), axis=0)) - thesePatches = np.where(condn)[0] - if thesePatches.size == 0: - continue - thesePatches = list(np.where(condn)[0]) - incl_patches += thesePatches - # print(f't1 {t1}: {thesePatches}') - - t1_yr = this_ds[time_coord].values[t1] - t1_vals = np.squeeze(this_da.isel({time_coord: t1, "patch": thesePatches}).values) - - for t in np.arange(t1 + 1, this_ds.dims[time_coord]): - t_yr = this_ds[time_coord].values[t] - t_vals = np.squeeze(this_da.isel({time_coord: t, "patch": thesePatches}).values) - ok_p = t1_vals == t_vals - - # If allowed, ignore where either t or t1 is NaN. Should only be used for runs where land use varies over time. - if ignore_nan: - ok_p = np.squeeze(np.bitwise_or(ok_p, np.isnan(t1_vals + t_vals))) - - if not np.all(ok_p): - any_bad_before_checking_rx = True - bad_patches_thisT = list(np.where(np.bitwise_not(ok_p))[0]) - bad_patches = np.concatenate( - (bad_patches, np.array(thesePatches)[bad_patches_thisT]) - ) - if rx_ds: - found_in_rx = np.array([False for x in bad_patches]) - varyPatches = list(np.array(thesePatches)[bad_patches_thisT]) - varyLons = this_ds.patches1d_lon.values[bad_patches_thisT] - varyLats = this_ds.patches1d_lat.values[bad_patches_thisT] - varyCrops = this_ds.patches1d_itype_veg_str.values[bad_patches_thisT] - varyCrops_int = this_ds.patches1d_itype_veg.values[bad_patches_thisT] - - any_bad_anyCrop = False - for c in np.unique(varyCrops_int): - rx_var = f"gs1_{c}" - varyLons_thisCrop = varyLons[np.where(varyCrops_int == c)] - varyLats_thisCrop = varyLats[np.where(varyCrops_int == c)] - theseRxVals = np.diag( - rx_ds[rx_var].sel(lon=varyLons_thisCrop, lat=varyLats_thisCrop).values - ) - if len(theseRxVals) != len(varyLats_thisCrop): - raise RuntimeError( - f"Expected {len(varyLats_thisCrop)} rx values; got {len(theseRxVals)}" - ) - if not np.any(theseRxVals != -1): - continue - any_bad_anyCrop = True - break - if not any_bad_anyCrop: - continue - - # This bit is pretty inefficient, but I'm not going to optimize it until I actually need to use it. - for i, p in enumerate(bad_patches_thisT): - thisPatch = varyPatches[i] - thisLon = varyLons[i] - thisLat = varyLats[i] - thisCrop = varyCrops[i] - thisCrop_int = varyCrops_int[i] - - # If prescribed input had missing value (-1), it's fine for it to vary. - if rx_ds: - rx_var = f"gs1_{thisCrop_int}" - if thisLon in rx_ds.lon.values and thisLat in rx_ds.lat.values: - rx = rx_ds[rx_var].sel(lon=thisLon, lat=thisLat).values - Nunique = len(np.unique(rx)) - if Nunique == 1: - found_in_rx[i] = True - if rx == -1: - continue - elif Nunique > 1: - raise RuntimeError( - f"How does lon {thisLon} lat {thisLat} {thisCrop} have time-varying {v}?" - ) - else: - raise RuntimeError( - "lon {thisLon} lat {thisLat} {thisCrop} not in rx dataset?" - ) - - # Print info (or save to print later) - any_bad = True - if verbose: - thisStr = f" Patch {thisPatch} (lon {thisLon} lat {thisLat}) {thisCrop} ({thisCrop_int})" - if rx_ds and not found_in_rx[i]: - thisStr = thisStr.replace("(lon", "* (lon") - if not np.isnan(t1_vals[p]): - t1_val_print = int(t1_vals[p]) - else: - t1_val_print = "NaN" - if not np.isnan(t_vals[p]): - t_val_print = int(t_vals[p]) - else: - t_val_print = "NaN" - if v == "SDATES": - strList.append( - f"{thisStr}: Sowing {t1_yr} jday {t1_val_print}, {t_yr} jday {t_val_print}" - ) - else: - strList.append( - f"{thisStr}: {t1_yr} {v} {t1_val_print}, {t_yr} {v} {t_val_print}" - ) - else: - if ok: - print(f"{emojus} CLM output {v} unexpectedly vary over time:") - ok = False - print(f"{v} timestep {t} does not match timestep {t1}") - break - if verbose and any_bad: - print(f"{emojus} CLM output {v} unexpectedly vary over time:") - strList.sort() - if rx_ds and np.any(~found_in_rx): - strList = [ - "*: Not found in prescribed input file (maybe minor lon/lat mismatch)" - ] + strList - elif not rx_ds: - strList = ["(No rx file checked)"] + strList - print("\n".join(strList)) - - # Make sure every patch was checked once (or is all-NaN except possibly final season) - incl_patches = np.sort(incl_patches) - if not np.array_equal(incl_patches, np.unique(incl_patches)): - raise RuntimeError("Patch(es) checked more than once!") - incl_patches = list(incl_patches) - incl_patches += list( - np.where( - np.all( - np.isnan( - ra_sp[ - :-1, - ] - ), - axis=0, - ) - )[0] - ) - incl_patches = np.sort(incl_patches) - if not np.array_equal(incl_patches, np.unique(incl_patches)): - raise RuntimeError("Patch(es) checked but also all-NaN??") - if not np.array_equal(incl_patches, np.arange(this_ds.dims["patch"])): - for p in np.arange(this_ds.dims["patch"]): - if p not in incl_patches: - break - raise RuntimeError( - f"Not all patches checked! E.g., {p}: {this_da.isel(patch=p).values}" - ) - - if not any_bad: - if any_bad_before_checking_rx: - print( - f"✅ CLM output {v} do not vary through {this_ds.dims[time_coord]} growing seasons of output (except for patch(es) with missing rx)." - ) - else: - print( - f"✅ CLM output {v} do not vary through {this_ds.dims[time_coord]} growing seasons of output." - ) - - if any_bad and throw_error: - raise RuntimeError("Stopping due to failed check_constant_vars().") - - bad_patches = np.unique(bad_patches) - return [int(p) for p in bad_patches] - - -def check_rx_obeyed( - vegtype_list, rx_ds, dates_ds, which_ds, output_var, gdd_min=None, verbose=False -): - all_ok = 2 - diff_str_list = [] - gdd_tolerance = 1 - - if "GDDHARV" in output_var and verbose: - harvest_reason_da = dates_ds["HARVEST_REASON"] - unique_harvest_reasons = np.unique( - harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] - ) - pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) - print( - f"{which_ds} harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harv at maturity)" - ) - - for vegtype_str in vegtype_list: - thisVeg_patches = np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] - if thisVeg_patches.size == 0: - continue - ds_thisVeg = dates_ds.isel(patch=thisVeg_patches) - patch_inds_lon_thisVeg = ds_thisVeg.patches1d_ixy.values.astype(int) - 1 - patch_inds_lat_thisVeg = ds_thisVeg.patches1d_jxy.values.astype(int) - 1 - patch_lons_thisVeg = ds_thisVeg.patches1d_lon - patch_lats_thisVeg = ds_thisVeg.patches1d_lat - - vegtype_int = utils.vegtype_str2int(vegtype_str)[0] - rx_da = rx_ds[f"gs1_{vegtype_int}"] - rx_array = rx_da.values[patch_inds_lat_thisVeg, patch_inds_lon_thisVeg] - rx_array = np.expand_dims(rx_array, axis=1) - sim_array = ds_thisVeg[output_var].values - sim_array_dims = ds_thisVeg[output_var].dims - - # Ignore patches without prescribed value - with np.errstate(invalid="ignore"): - rx_array[np.where(rx_array < 0)] = np.nan - - # Account for... - if "GDDHARV" in output_var: - # ...GDD harvest threshold minimum set in PlantCrop() - if gdd_min == None: - gdd_min = default_gdd_min() - print( - f"gdd_min not provided when doing check_rx_obeyed() for {output_var}; using default {gdd_min}" - ) - with np.errstate(invalid="ignore"): - rx_array[(rx_array >= 0) & (rx_array < gdd_min)] = gdd_min - - # ...harvest reason - # 0: Should never happen in any simulation - # 1: Harvesting at maturity - # 2: Harvesting at max season length (mxmat) - # 3: Crop was incorrectly planted in last time step of Dec. 31 - # 4: Today was supposed to be the planting day, but the previous crop still hasn't been harvested. - # 5: Harvest the day before the next sowing date this year. - # 6: Same as #5. - # 7: Harvest the day before the next sowing date (today is Dec. 31 and the sowing date is Jan. 1) - harvest_reason_da = ds_thisVeg["HARVEST_REASON"] - unique_harvest_reasons = np.unique( - harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] - ) - pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) - - if np.any(sim_array != rx_array): - diff_array = sim_array - rx_array - - # Allow negative GDDHARV values when harvest occurred because sowing was scheduled for the next day - if output_var == "GDDHARV_PERHARV": - diff_array = np.ma.masked_array( - diff_array, - mask=(diff_array < 0) & (ds_thisVeg["HARVEST_REASON_PERHARV"].values == 5), - ) - elif output_var == "GDDHARV": - with np.errstate(invalid="ignore"): - diff_lt_0 = diff_array < 0 - harv_reason_5 = ds_thisVeg["HARVEST_REASON"].values == 5 - diff_array = np.ma.masked_array(diff_array, mask=diff_lt_0 & harv_reason_5) - - with np.errstate(invalid="ignore"): - abs_gt_0 = abs(diff_array) > 0 - if np.any(np.abs(diff_array[abs_gt_0]) > 0): - min_diff, minLon, minLat, minGS, minRx = get_extreme_info( - diff_array, - rx_array, - np.nanmin, - sim_array_dims, - dates_ds.gs, - patch_lons_thisVeg, - patch_lats_thisVeg, - ) - max_diff, maxLon, maxLat, maxGS, maxRx = get_extreme_info( - diff_array, - rx_array, - np.nanmax, - sim_array_dims, - dates_ds.gs, - patch_lons_thisVeg, - patch_lats_thisVeg, - ) - - diffs_eg_txt = f"{vegtype_str} ({vegtype_int}): diffs range {min_diff} (lon {minLon}, lat {minLat}, gs {minGS}, rx ~{minRx}) to {max_diff} (lon {maxLon}, lat {maxLat}, gs {maxGS}, rx ~{maxRx})" - if "GDDHARV" in output_var: - diffs_eg_txt += f"; harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harvested at maturity)" - if "GDDHARV" in output_var and np.nanmax(abs(diff_array)) <= gdd_tolerance: - if all_ok > 0: - all_ok = 1 - diff_str_list.append(f" {diffs_eg_txt}") - else: - all_ok = 0 - if verbose: - print( - f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., {diffs_eg_txt}" - ) - else: - break + this_ds["IRRIGATED_AREA_GRID"].attrs = { + "long name": "Irrigated area in gridcell", + "units": "m^2", + } - if all_ok == 2: - print(f"✅ {which_ds}: Prescribed {output_var} always obeyed") - elif all_ok == 1: - # print(f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable:") - # for x in diff_str_list: print(x) - print( - f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable (diffs <= {gdd_tolerance})" - ) - elif not verbose: - print(f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., {diffs_eg_txt}") + return this_ds -# Make sure that, e.g., GDDACCUM_PERHARV is always <= HUI_PERHARV -def check_v0_le_v1(this_ds, vars, msg_txt=" ", both_nan_ok=False, throw_error=False): - v0 = vars[0] - v1 = vars[1] - gdd_lt_hui = this_ds[v0] <= this_ds[v1] +def check_v0_le_v1(this_ds, var_list, msg_txt=" ", both_nan_ok=False, throw_error=False): + """ + Make sure that, e.g., GDDACCUM_PERHARV is always <= HUI_PERHARV + """ + var0 = var_list[0] + var1 = var_list[1] + gdd_lt_hui = this_ds[var0] <= this_ds[var1] if both_nan_ok: - gdd_lt_hui = gdd_lt_hui | (np.isnan(this_ds[v0]) & np.isnan(this_ds[v1])) + gdd_lt_hui = gdd_lt_hui | (np.isnan(this_ds[var0]) & np.isnan(this_ds[var1])) if np.all(gdd_lt_hui): - print(f"✅{msg_txt}{v0} always <= {v1}") + print(f"✅{msg_txt}{var0} always <= {var1}") else: - msg = f"❌{msg_txt}{v0} *not* always <= {v1}" + msg = f"❌{msg_txt}{var0} *not* always <= {var1}" gdd_lt_hui_vals = gdd_lt_hui.values - p = np.where(~gdd_lt_hui_vals)[0][0] + patch_index = np.where(~gdd_lt_hui_vals)[0][0] msg = ( msg - + f"\ne.g., patch {p}: {this_ds.patches1d_itype_veg_str.values[p]}, lon {this_ds.patches1d_lon.values[p]} lat {this_ds.patches1d_lat.values[p]}:" + + f"\ne.g., patch {patch_index}: {this_ds.patches1d_itype_veg_str.values[patch_index]}," + + f" lon {this_ds.patches1d_lon.values[patch_index]} lat " + + f"{this_ds.patches1d_lat.values[patch_index]}:" ) - msg = msg + f"\n{this_ds[v0].values[p,:]}" - msg = msg + f"\n{this_ds[v1].values[p,:]}" + msg = msg + f"\n{this_ds[var0].values[patch_index,:]}" + msg = msg + f"\n{this_ds[var1].values[patch_index,:]}" if throw_error: print(msg) else: raise RuntimeError(msg) -# Convert time*mxharvests axes to growingseason axis -def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): - # How many non-NaN patch-seasons do we expect to have once we're done organizing things? - Npatch = this_ds.dims["patch"] - # Because some patches will be planted in the last year but not complete, we have to ignore any finalyear-planted seasons that do complete. - Ngs = this_ds.dims["time"] - 1 - expected_valid = Npatch * Ngs - - mxharvests = this_ds.dims["mxharvests"] - - if verbose: - print( - f"Start: discrepancy of {np.sum(~np.isnan(this_ds.HDATES.values)) - expected_valid} patch-seasons" - ) - - # Set all non-positive date values to NaN. These are seasons that were never harvested (or never started): "non-seasons." - if this_ds.HDATES.dims != ("time", "mxharvests", "patch"): - raise RuntimeError( - f"This code relies on HDATES dims ('time', 'mxharvests', 'patch'), not {this_ds.HDATES.dims}" - ) - hdates_ymp = this_ds.HDATES.copy().where(this_ds.HDATES > 0).values - hdates_pym = np.transpose(hdates_ymp.copy(), (2, 0, 1)) - sdates_ymp = this_ds.SDATES_PERHARV.copy().where(this_ds.SDATES_PERHARV > 0).values - sdates_pym = np.transpose(sdates_ymp.copy(), (2, 0, 1)) - with np.errstate(invalid="ignore"): - hdates_pym[hdates_pym <= 0] = np.nan - - # Find years where patch was inactive - inactive_py = np.transpose( - np.isnan(this_ds.HDATES).all(dim="mxharvests").values - & np.isnan(this_ds.SDATES_PERHARV).all(dim="mxharvests").values - ) - # Find seasons that were planted while the patch was inactive - with np.errstate(invalid="ignore"): - sown_inactive_py = inactive_py[:, :-1] & (hdates_pym[:, 1:, 0] < sdates_pym[:, 1:, 0]) - sown_inactive_py = np.concatenate((np.full((Npatch, 1), False), sown_inactive_py), axis=1) - - # "Ignore harvests from seasons sown (a) before this output began or (b) when the crop was inactive" - with np.errstate(invalid="ignore"): - first_season_before_first_year_p = hdates_pym[:, 0, 0] < sdates_pym[:, 0, 0] - first_season_before_first_year_py = np.full(hdates_pym.shape[:-1], fill_value=False) - first_season_before_first_year_py[:, 0] = first_season_before_first_year_p - sown_prerun_or_inactive_py = first_season_before_first_year_py | sown_inactive_py - sown_prerun_or_inactive_pym = np.concatenate( - ( - np.expand_dims(sown_prerun_or_inactive_py, axis=2), - np.full((Npatch, Ngs + 1, mxharvests - 1), False), - ), - axis=2, - ) - where_sown_prerun_or_inactive_pym = np.where(sown_prerun_or_inactive_pym) - hdates_pym[where_sown_prerun_or_inactive_pym] = np.nan - sdates_pym[where_sown_prerun_or_inactive_pym] = np.nan - if verbose: - print( - f'After "Ignore harvests from before this output began: discrepancy of {np.sum(~np.isnan(hdates_pym)) - expected_valid} patch-seasons' - ) - - # We need to keep some non-seasons---it's possible that "the yearY growing season" never happened (sowing conditions weren't met), but we still need something there so that we can make an array of dimension Npatch*Ngs. We do this by changing those non-seasons from NaN to -Inf before doing the filtering and reshaping, after which we'll convert them back to NaNs. - - # "In years with no sowing, pretend the first no-harvest is meaningful, unless that was intentionally ignored above." - sdates_orig_ymp = this_ds.SDATES.copy().values - sdates_orig_pym = np.transpose(sdates_orig_ymp.copy(), (2, 0, 1)) - hdates_pym2 = hdates_pym.copy() - sdates_pym2 = sdates_pym.copy() - with np.errstate(invalid="ignore"): - sdates_gt_0 = sdates_orig_pym > 0 - nosow_py = np.all(~sdates_gt_0, axis=2) - nosow_py_1st = nosow_py & np.isnan(hdates_pym[:, :, 0]) - where_nosow_py_1st = np.where(nosow_py_1st) - hdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf - sdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf - for h in np.arange(mxharvests - 1): - if h == 0: - continue - elif h == 1: - print("Warning: Untested with mxharvests > 2") - where_nosow_py = np.where( - nosow_py - & ~np.any(np.isnan(hdates_pym[:, :, 0:h]), axis=2) - & np.isnan(hdates_pym[:, :, h]) - ) - hdates_pym2[where_nosow_py[0], where_nosow_py[1], h + 1] = -np.inf - sdates_pym2[where_nosow_py[0], where_nosow_py[1], h + 1] = -np.inf - - # "In years with sowing that are followed by inactive years, check whether the last sowing was harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] no-harvest is meaningful." - sdates_orig_masked_pym = sdates_orig_pym.copy() - with np.errstate(invalid="ignore"): - sdates_le_0 = sdates_orig_masked_pym <= 0 - sdates_orig_masked_pym[np.where(sdates_le_0)] = np.nan - with warnings.catch_warnings(): - warnings.filterwarnings(action="ignore", message="All-NaN slice encountered") - last_sdate_firstNgs_py = np.nanmax(sdates_orig_masked_pym[:, :-1, :], axis=2) - last_hdate_firstNgs_py = np.nanmax(hdates_pym2[:, :-1, :], axis=2) - with np.errstate(invalid="ignore"): - hdate_lt_sdate = last_hdate_firstNgs_py < last_sdate_firstNgs_py - last_sowing_not_harvested_sameyear_firstNgs_py = hdate_lt_sdate | np.isnan( - last_hdate_firstNgs_py - ) - inactive_lastNgs_py = inactive_py[:, 1:] - last_sowing_never_harvested_firstNgs_py = ( - last_sowing_not_harvested_sameyear_firstNgs_py & inactive_lastNgs_py - ) - last_sowing_never_harvested_py = np.concatenate( - (last_sowing_never_harvested_firstNgs_py, np.full((Npatch, 1), False)), axis=1 - ) - last_sowing_never_harvested_pym = np.concatenate( - ( - np.full((Npatch, Ngs + 1, mxharvests - 1), False), - np.expand_dims(last_sowing_never_harvested_py, axis=2), - ), - axis=2, - ) - where_last_sowing_never_harvested_pym = last_sowing_never_harvested_pym - hdates_pym3 = hdates_pym2.copy() - sdates_pym3 = sdates_pym2.copy() - hdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf - sdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf - - # Convert to growingseason axis - def pym_to_pg(pym, quiet=False): - pg = np.reshape(pym, (pym.shape[0], -1)) - ok_pg = pg[~np.isnan(pg)] - if not quiet: - print( - f"{ok_pg.size} included; unique N seasons = {np.unique(np.sum(~np.isnan(pg), axis=1))}" - ) - return pg - - hdates_pg = pym_to_pg(hdates_pym3.copy(), quiet=~verbose) - sdates_pg = pym_to_pg(sdates_pym3.copy(), quiet=True) - if verbose: - print( - f'After "In years with no sowing, pretend the first no-harvest is meaningful: discrepancy of {np.sum(~np.isnan(hdates_pg)) - expected_valid} patch-seasons' - ) - - # "Ignore any harvests that were planted in the final year, because some cells will have incomplete growing seasons for the final year." - with np.errstate(invalid="ignore"): - hdates_ge_sdates = hdates_pg[:, -mxharvests:] >= sdates_pg[:, -mxharvests:] - lastyear_complete_season = hdates_ge_sdates | np.isinf(hdates_pg[:, -mxharvests:]) - - def ignore_lastyear_complete_season(pg, excl, mxharvests): - tmp_L = pg[:, :-mxharvests] - tmp_R = pg[:, -mxharvests:] - tmp_R[np.where(excl)] = np.nan - pg = np.concatenate((tmp_L, tmp_R), axis=1) - return pg - - hdates_pg2 = ignore_lastyear_complete_season( - hdates_pg.copy(), lastyear_complete_season, mxharvests - ) - sdates_pg2 = ignore_lastyear_complete_season( - sdates_pg.copy(), lastyear_complete_season, mxharvests - ) - is_valid = ~np.isnan(hdates_pg2) - is_fake = np.isneginf(hdates_pg2) - is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], Ngs)) - discrepancy = np.sum(is_valid) - expected_valid - unique_Nseasons = np.unique(np.sum(is_valid, axis=1)) - if verbose: - print( - f'After "Ignore any harvests that were planted in the final year, because other cells will have incomplete growing seasons for the final year": discrepancy of {discrepancy} patch-seasons' - ) - if "pandas" in sys.modules: - bc = np.bincount(np.sum(is_valid, axis=1)) - bc = bc[bc > 0] - df = pd.DataFrame({"Ngs": unique_Nseasons, "Count": bc}) - print(df) - else: - print(f"unique N seasons = {unique_Nseasons}") - print(" ") - - # Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out - if discrepancy == 0: - this_ds_gs = set_up_ds_with_gs_axis(this_ds) - for v in this_ds.data_vars: - if this_ds[v].dims != ("time", "mxharvests", "patch") or (myVars and v not in myVars): - continue - - # Set invalid values to NaN - da_yhp = this_ds[v].copy() - da_yhp = da_yhp.where(~np.isneginf(da_yhp)) - - # Remove the nans and reshape to patches*growingseasons - da_pyh = da_yhp.transpose("patch", "time", "mxharvests") - ar_pg = np.reshape(da_pyh.values, (this_ds.dims["patch"], -1)) - ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], Ngs)) - # Change -infs to nans - ar_valid_pg[is_fake] = np.nan - # Save as DataArray to new Dataset, stripping _PERHARV from variable name - newname = v.replace("_PERHARV", "") - if newname in this_ds_gs: - raise RuntimeError(f"{newname} already in dataset!") - da_pg = xr.DataArray( - data=ar_valid_pg, - coords=[this_ds_gs.coords["patch"], this_ds_gs.coords["gs"]], - name=newname, - attrs=da_yhp.attrs, - ) - this_ds_gs[newname] = da_pg - this_ds_gs[newname].attrs["units"] = this_ds[v].attrs["units"] - else: - # Print details about example bad patch(es) - if min(unique_Nseasons) < Ngs: - print(f"Too few seasons (min {min(unique_Nseasons)} < {Ngs})") - p = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == min(unique_Nseasons))[0][0] - print_onepatch_wrongNgs( - p, - this_ds, - sdates_ymp, - hdates_ymp, - sdates_pym, - hdates_pym, - sdates_pym2, - hdates_pym2, - sdates_pym3, - hdates_pym3, - sdates_pg, - hdates_pg, - sdates_pg2, - hdates_pg2, - ) - if max(unique_Nseasons) > Ngs: - print(f"Too many seasons (max {max(unique_Nseasons)} > {Ngs})") - p = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == max(unique_Nseasons))[0][0] - print_onepatch_wrongNgs( - p, - this_ds, - sdates_ymp, - hdates_ymp, - sdates_pym, - hdates_pym, - sdates_pym2, - hdates_pym2, - sdates_pym3, - hdates_pym3, - sdates_pg, - hdates_pg, - sdates_pg2, - hdates_pg2, - ) - raise RuntimeError( - f"Can't convert time*mxharvests axes to growingseason axis: discrepancy of {discrepancy} patch-seasons" - ) - - # Preserve units - for v1 in this_ds_gs: - v0 = v1 - if v0 not in this_ds: - v0 += "_PERHARV" - if v0 not in this_ds: - continue - if "units" in this_ds[v0].attrs: - this_ds_gs[v1].attrs["units"] = this_ds[v0].attrs["units"] - - if incl_orig: - return this_ds_gs, this_ds - else: - return this_ds_gs - - -# Minimum harvest threshold allowed in PlantCrop() -# Was 50 before cropcal runs 2023-01-28 -def default_gdd_min(): - return 1.0 - - -# Get information about extreme gridcells (for debugging) -def get_extreme_info(diff_array, rx_array, mxn, dims, gs, patches1d_lon, patches1d_lat): - if mxn == np.min: - diff_array = np.ma.masked_array(diff_array, mask=(np.abs(diff_array) == 0)) - themxn = mxn(diff_array) - - # Find the first patch-gs that has the mxn value - matching_indices = np.where(diff_array == themxn) - first_indices = [x[0] for x in matching_indices] - - # Get the lon, lat, and growing season of that patch-gs - p = first_indices[dims.index("patch")] - thisLon = patches1d_lon.values[p] - thisLat = patches1d_lat.values[p] - s = first_indices[dims.index("gs")] - thisGS = gs.values[s] - - # Get the prescribed value for this patch-gs - thisRx = rx_array[p][0] - - return round(themxn, 3), round(thisLon, 3), round(thisLat, 3), thisGS, round(thisRx) - - -# Get growing season lengths from a DataArray of hdate-sdate def get_gs_len_da(this_da): + """ + Get growing season lengths from a DataArray of hdate-sdate + """ tmp = this_da.values with np.errstate(invalid="ignore"): tmp_lt_0 = tmp < 0 @@ -844,21 +200,10 @@ def get_gs_len_da(this_da): return this_da -def get_pct_harv_at_mature(harvest_reason_da): - Nharv_at_mature = len(np.where(harvest_reason_da.values == 1)[0]) - with np.errstate(invalid="ignore"): - harv_reason_gt_0 = harvest_reason_da.values > 0 - Nharv = len(np.where(harv_reason_gt_0)[0]) - if Nharv == 0: - return np.nan - pct_harv_at_mature = Nharv_at_mature / Nharv * 100 - pct_harv_at_mature = np.format_float_positional( - pct_harv_at_mature, precision=2, unique=False, fractional=False, trim="k" - ) # Round to 2 significant digits - return pct_harv_at_mature - - def import_max_gs_length(paramfile_dir, my_clm_ver, my_clm_subver): + """ + Import maximum growing season length + """ # Get parameter file pattern = os.path.join(paramfile_dir, f"*{my_clm_ver}_params.{my_clm_subver}.nc") paramfile = glob.glob(pattern) @@ -886,8 +231,12 @@ def import_max_gs_length(paramfile_dir, my_clm_ver, my_clm_subver): return mxmat_dict -# E.g. import_rx_dates("sdate", sdates_rx_file, dates_ds0_orig) -def import_rx_dates(var_prefix, date_inFile, dates_ds, set_neg1_to_nan=True): +def import_rx_dates(var_prefix, date_infile, dates_ds, set_neg1_to_nan=True): + """ + Import prescribed sowing/harvest dates + + E.g. import_rx_dates("sdate", sdates_rx_file, dates_ds0_orig) + """ # Get run info: # Max number of growing seasons per year if "mxsowings" in dates_ds: @@ -896,53 +245,112 @@ def import_rx_dates(var_prefix, date_inFile, dates_ds, set_neg1_to_nan=True): mxsowings = 1 # Which vegetation types were simulated? - itype_veg_toImport = np.unique(dates_ds.patches1d_itype_veg) + itype_veg_to_import = np.unique(dates_ds.patches1d_itype_veg) - date_varList = [] - for i in itype_veg_toImport: - for g in np.arange(mxsowings): - thisVar = f"{var_prefix}{g+1}_{i}" - date_varList = date_varList + [thisVar] + date_varlist = [] + for i in itype_veg_to_import: + for j in np.arange(mxsowings): + this_var = f"{var_prefix}{j+1}_{i}" + date_varlist = date_varlist + [this_var] - ds = utils.import_ds(date_inFile, myVars=date_varList) + this_ds = import_ds(date_infile, my_vars=date_varlist) did_warn = False - for v in ds: - v_new = v.replace(var_prefix, "gs") - ds = ds.rename({v: v_new}) + for var in this_ds: + v_new = var.replace(var_prefix, "gs") + this_ds = this_ds.rename({var: v_new}) # Set -1 prescribed GDD values to NaN. Only warn the first time. - if set_neg1_to_nan and var_prefix == "gdd" and v_new != v and np.any(ds[v_new].values < 0): - if np.any((ds[v_new].values < 0) & (ds[v_new].values != -1)): - raise RuntimeError(f"Unexpected negative value in {v}") + if ( + set_neg1_to_nan + and var_prefix == "gdd" + and v_new != var + and np.any(this_ds[v_new].values < 0) + ): + if np.any((this_ds[v_new].values < 0) & (this_ds[v_new].values != -1)): + raise RuntimeError(f"Unexpected negative value in {var}") if not did_warn: - print(f"Setting -1 rx GDD values to NaN") + print("Setting -1 rx GDD values to NaN") did_warn = True - ds[v_new] = ds[v_new].where(ds[v_new] != -1) + this_ds[v_new] = this_ds[v_new].where(this_ds[v_new] != -1) + + return this_ds - return ds + +def check_no_negative(this_ds_in, varlist_no_negative, which_file, verbose): + """ + In import_output(), check that there are no unexpected negative values. + """ + tiny_neg_ok = 1e-12 + this_ds = this_ds_in.copy() + for var in this_ds: + if not any(x in var for x in varlist_no_negative): + continue + the_min = np.nanmin(this_ds[var].values) + if the_min < 0: + if np.abs(the_min) <= tiny_neg_ok: + if verbose: + print( + f"Tiny negative value(s) in {var} (abs <= {tiny_neg_ok}) being set to 0" + + f" ({which_file})" + ) + else: + print( + f"WARNING: Unexpected negative value(s) in {var}; minimum {the_min} " + + f"({which_file})" + ) + values = this_ds[var].copy().values + with np.errstate(invalid="ignore"): + do_setto_0 = (values < 0) & (values >= -tiny_neg_ok) + values[np.where(do_setto_0)] = 0 + this_ds[var] = xr.DataArray( + values, + coords=this_ds[var].coords, + dims=this_ds[var].dims, + attrs=this_ds[var].attrs, + ) + + elif verbose: + print(f"No negative value(s) in {var}; min {the_min} ({which_file})") + return this_ds + + +def check_no_zeros(this_ds, varlist_no_zero, which_file, verbose): + """ + In import_output(), check that there are no unexpected zeros. + """ + for var in this_ds: + if not any(x in var for x in varlist_no_zero): + continue + if np.any(this_ds[var].values == 0): + print(f"WARNING: Unexpected zero(s) in {var} ({which_file})") + elif verbose: + print(f"No zero value(s) in {var} ({which_file})") def import_output( filename, - myVars, - y1=None, - yN=None, - myVegtypes=utils.define_mgdcrop_list(), + my_vars, + year_1=None, + year_n=None, + my_vegtypes=utils.define_mgdcrop_list(), sdates_rx_ds=None, gdds_rx_ds=None, verbose=False, ): + """ + Import CLM output + """ # Import - this_ds = utils.import_ds(filename, myVars=myVars, myVegtypes=myVegtypes) + this_ds = import_ds(filename, my_vars=my_vars, my_vegtypes=my_vegtypes) # Trim to years of interest (do not include extra year needed for finishing last growing season) - if y1 and yN: - this_ds = check_and_trim_years(y1, yN, this_ds) + if year_1 and year_n: + this_ds = check_and_trim_years(year_1, year_n, this_ds) else: # Assume including all growing seasons except last complete one are "of interest" - y1 = this_ds.time.values[0].year - yN = this_ds.time.values[-1].year - 2 - this_ds = check_and_trim_years(y1, yN, this_ds) + year_1 = this_ds.time.values[0].year + year_n = this_ds.time.values[-1].year - 2 + this_ds = check_and_trim_years(year_1, year_n, this_ds) # What vegetation types are included? vegtype_list = [ @@ -954,82 +362,24 @@ def import_output( all_nan = np.full(this_ds[date_vars[0]].shape, True) all_nonpos = np.full(this_ds[date_vars[0]].shape, True) all_pos = np.full(this_ds[date_vars[0]].shape, True) - for v in date_vars: - all_nan = all_nan & np.isnan(this_ds[v].values) + for var in date_vars: + all_nan = all_nan & np.isnan(this_ds[var].values) with np.errstate(invalid="ignore"): - all_nonpos = all_nonpos & (this_ds[v].values <= 0) - all_pos = all_pos & (this_ds[v].values > 0) + all_nonpos = all_nonpos & (this_ds[var].values <= 0) + all_pos = all_pos & (this_ds[var].values > 0) if np.any(np.bitwise_not(all_nan | all_nonpos | all_pos)): raise RuntimeError("Inconsistent missing/present values on mxharvests axis") - # When doing transient runs, it's somehow possible for crops in newly-active patches to be *already alive*. They even have a sowing date (idop)! This will of course not show up in SDATES, but it does show up in SDATES_PERHARV. - # I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going to invalidate those "seasons." - # - # In all but the last calendar year, which patches had no sowing? - no_sowing_yp = np.all(np.isnan(this_ds.SDATES.values[:-1, :, :]), axis=1) - # In all but the first calendar year, which harvests' jdays are < their sowings' jdays? (Indicates sowing the previous calendar year.) - with np.errstate(invalid="ignore"): - hsdate1_gt_hdate1_yp = ( - this_ds.SDATES_PERHARV.values[1:, 0, :] > this_ds.HDATES.values[1:, 0, :] - ) - # Where both, we have the problem. - falsely_alive_yp = no_sowing_yp & hsdate1_gt_hdate1_yp - if np.any(falsely_alive_yp): - print( - f"Warning: {np.sum(falsely_alive_yp)} patch-seasons being ignored: Seemingly sown the year before harvest, but no sowings occurred that year." - ) - falsely_alive_yp = np.concatenate( - (np.full((1, this_ds.dims["patch"]), False), falsely_alive_yp), axis=0 - ) - falsely_alive_y1p = np.expand_dims(falsely_alive_yp, axis=1) - dummy_false_y1p = np.expand_dims(np.full_like(falsely_alive_yp, False), axis=1) - falsely_alive_yhp = np.concatenate((falsely_alive_y1p, dummy_false_y1p), axis=1) - for v in this_ds.data_vars: - if this_ds[v].dims != ("time", "mxharvests", "patch"): - continue - this_ds[v] = this_ds[v].where(~falsely_alive_yhp) - - def check_no_negative(this_ds_in, varList_no_negative, which_file, verbose=False): - tiny_negOK = 1e-12 - this_ds = this_ds_in.copy() - for v in this_ds: - if not any(x in v for x in varList_no_negative): - continue - the_min = np.nanmin(this_ds[v].values) - if the_min < 0: - if np.abs(the_min) <= tiny_negOK: - if verbose: - print( - f"Tiny negative value(s) in {v} (abs <= {tiny_negOK}) being set to 0 ({which_file})" - ) - else: - print( - f"WARNING: Unexpected negative value(s) in {v}; minimum {the_min} ({which_file})" - ) - values = this_ds[v].copy().values - with np.errstate(invalid="ignore"): - do_setto_0 = (values < 0) & (values >= -tiny_negOK) - values[np.where(do_setto_0)] = 0 - this_ds[v] = xr.DataArray( - values, coords=this_ds[v].coords, dims=this_ds[v].dims, attrs=this_ds[v].attrs - ) - - elif verbose: - print(f"No negative value(s) in {v}; min {the_min} ({which_file})") - return this_ds - - def check_no_zeros(this_ds, varList_no_zero, which_file): - for v in this_ds: - if not any(x in v for x in varList_no_zero): - continue - if np.any(this_ds[v].values == 0): - print(f"WARNING: Unexpected zero(s) in {v} ({which_file})") - elif verbose: - print(f"No zero value(s) in {v} ({which_file})") + # When doing transient runs, it's somehow possible for crops in newly-active patches to be + # *already alive*. They even have a sowing date (idop)! This will of course not show up in + # SDATES, but it does show up in SDATES_PERHARV. + # I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going + # to invalidate those "seasons." + this_ds = handle_zombie_crops(this_ds) # Check for no zero values where there shouldn't be - varList_no_zero = ["DATE", "YEAR"] - check_no_zeros(this_ds, varList_no_zero, "original file") + varlist_no_zero = ["DATE", "YEAR"] + check_no_zeros(this_ds, varlist_no_zero, "original file", verbose) # Convert time*mxharvests axes to growingseason axis this_ds_gs = convert_axis_time2gs(this_ds, verbose=verbose, incl_orig=False) @@ -1046,21 +396,21 @@ def check_no_zeros(this_ds, varList_no_zero, which_file): # Get HUI accumulation as fraction of required this_ds_gs["HUIFRAC"] = this_ds_gs["HUI"] / this_ds_gs["GDDHARV"] this_ds_gs["HUIFRAC_PERHARV"] = this_ds["HUI_PERHARV"] / this_ds["GDDHARV_PERHARV"] - for v in ["HUIFRAC", "HUIFRAC_PERHARV"]: - this_ds_gs[v].attrs["units"] = "Fraction of required" + for var in ["HUIFRAC", "HUIFRAC_PERHARV"]: + this_ds_gs[var].attrs["units"] = "Fraction of required" # Avoid tiny negative values - varList_no_negative = ["GRAIN", "REASON", "GDD", "HUI", "YEAR", "DATE", "GSLEN"] - this_ds_gs = check_no_negative(this_ds_gs, varList_no_negative, "new file", verbose=verbose) + varlist_no_negative = ["GRAIN", "REASON", "GDD", "HUI", "YEAR", "DATE", "GSLEN"] + this_ds_gs = check_no_negative(this_ds_gs, varlist_no_negative, "new file", verbose) # Check for no zero values where there shouldn't be - varList_no_zero = ["REASON", "DATE"] - check_no_zeros(this_ds_gs, varList_no_zero, "new file") + varlist_no_zero = ["REASON", "DATE"] + check_no_zeros(this_ds_gs, varlist_no_zero, "new file", verbose) # Check that e.g., GDDACCUM <= HUI - for vars in [["GDDACCUM", "HUI"], ["SYEARS", "HYEARS"]]: - if all(v in this_ds_gs for v in vars): - check_v0_le_v1(this_ds_gs, vars, both_nan_ok=True, throw_error=True) + for var_list in [["GDDACCUM", "HUI"], ["SYEARS", "HYEARS"]]: + if all(v in this_ds_gs for v in var_list): + check_v0_le_v1(this_ds_gs, var_list, both_nan_ok=True, throw_error=True) # Check that prescribed calendars were obeyed if sdates_rx_ds: @@ -1071,9 +421,8 @@ def check_no_zeros(this_ds, varList_no_zero, which_file): gdds_rx_ds, this_ds, "this_ds", - "SDATES", "GDDHARV", - gdd_min=default_gdd_min(), + gdd_min=DEFAULT_GDD_MIN, ) # Convert time axis to integer year, saving original as 'cftime' @@ -1092,175 +441,37 @@ def check_no_zeros(this_ds, varList_no_zero, which_file): return this_ds_gs -# Print information about a patch (for debugging) -def print_onepatch_wrongNgs( - p, - this_ds_orig, - sdates_ymp, - hdates_ymp, - sdates_pym, - hdates_pym, - sdates_pym2, - hdates_pym2, - sdates_pym3, - hdates_pym3, - sdates_pg, - hdates_pg, - sdates_pg2, - hdates_pg2, -): - try: - import pandas as pd - except: - print("Couldn't import pandas, so not displaying example bad patch ORIGINAL.") - - print( - f"patch {p}: {this_ds_orig.patches1d_itype_veg_str.values[p]}, lon" - f" {this_ds_orig.patches1d_lon.values[p]} lat {this_ds_orig.patches1d_lat.values[p]}" - ) - - print("Original SDATES (per sowing):") - print(this_ds_orig.SDATES.values[:, :, p]) - - print("Original HDATES (per harvest):") - print(this_ds_orig.HDATES.values[:, :, p]) - - if "pandas" in sys.modules: - - def print_pandas_ymp(msg, cols, arrs_tuple): - print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") - mxharvests = arrs_tuple[0].shape[1] - arrs_list2 = [] - cols2 = [] - for h in np.arange(mxharvests): - for i, a in enumerate(arrs_tuple): - arrs_list2.append(a[:, h]) - cols2.append(cols[i] + str(h)) - arrs_tuple2 = tuple(arrs_list2) - df = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) - df.columns = cols2 - print(df) - - print_pandas_ymp( - "Original", - ["sdate", "hdate"], - (this_ds_orig.SDATES_PERHARV.values[:, :, p], this_ds_orig.HDATES.values[:, :, p]), - ) - - print_pandas_ymp("Masked", ["sdate", "hdate"], (sdates_ymp[:, :, p], hdates_ymp[:, :, p])) - - print_pandas_ymp( - 'After "Ignore harvests from before this output began"', - ["sdate", "hdate"], - ( - np.transpose(sdates_pym, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym, (1, 2, 0))[:, :, p], - ), - ) - - print_pandas_ymp( - 'After "In years with no sowing, pretend the first no-harvest is meaningful"', - ["sdate", "hdate"], - ( - np.transpose(sdates_pym2, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym2, (1, 2, 0))[:, :, p], - ), - ) - - print_pandas_ymp( - ( - 'After "In years with sowing that are followed by inactive years, check whether the' - " last sowing was harvested before the patch was deactivated. If not, pretend the" - ' LAST no-harvest is meaningful."' - ), - ["sdate", "hdate"], - ( - np.transpose(sdates_pym3, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym3, (1, 2, 0))[:, :, p], - ), - ) - - def print_pandas_pg(msg, cols, arrs_tuple): - print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") - arrs_list = list(arrs_tuple) - for i, a in enumerate(arrs_tuple): - arrs_list[i] = np.reshape(a, (-1)) - arrs_tuple2 = tuple(arrs_list) - df = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) - df.columns = cols - print(df) - - print_pandas_pg( - "Same, but converted to gs axis", ["sdate", "hdate"], (sdates_pg[p, :], hdates_pg[p, :]) - ) - - print_pandas_pg( - ( - 'After "Ignore any harvests that were planted in the final year, because some cells' - ' will have incomplete growing seasons for the final year"' - ), - ["sdate", "hdate"], - (sdates_pg2[p, :], hdates_pg2[p, :]), - ) - else: - - def print_nopandas(a1, a2, msg): - print(msg) - if a1.ndim == 1: - # I don't know why these aren't side-by-side! - print(np.stack((a1, a2), axis=1)) - else: - print(np.concatenate((a1, a2), axis=1)) - - print_nopandas(sdates_ymp[:, :, p], hdates_ymp[:, :, p], "Masked:") - - print_nopandas( - np.transpose(sdates_pym, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym, (1, 2, 0))[:, :, p], - 'After "Ignore harvests from before this output began"', - ) - - print_nopandas( - np.transpose(sdates_pym2, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym2, (1, 2, 0))[:, :, p], - 'After "In years with no sowing, pretend the first no-harvest is meaningful"', +def handle_zombie_crops(this_ds): + """ + When doing transient runs, it's somehow possible for crops in newly-active patches to be + *already alive*. They even have a sowing date (idop)! This will of course not show up in + SDATES, but it does show up in SDATES_PERHARV. + I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going + to invalidate those "seasons." + """ + # In all but the last calendar year, which patches had no sowing? + no_sowing_yp = np.all(np.isnan(this_ds.SDATES.values[:-1, :, :]), axis=1) + # In all but the first calendar year, which harvests' jdays are < their sowings' jdays? + # (Indicates sowing the previous calendar year.) + with np.errstate(invalid="ignore"): + hsdate1_gt_hdate1_yp = ( + this_ds.SDATES_PERHARV.values[1:, 0, :] > this_ds.HDATES.values[1:, 0, :] ) - - print_nopandas( - np.transpose(sdates_pym3, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym3, (1, 2, 0))[:, :, p], - ( - 'After "In years with sowing that are followed by inactive years, check whether the' - " last sowing was harvested before the patch was deactivated. If not, pretend the" - ' LAST [easier to implement!] no-harvest is meaningful."' - ), + # Where both, we have the problem. + falsely_alive_yp = no_sowing_yp & hsdate1_gt_hdate1_yp + if np.any(falsely_alive_yp): + print( + f"Warning: {np.sum(falsely_alive_yp)} patch-seasons being ignored: Seemingly sown the " + + "year before harvest, but no sowings occurred that year." ) - - print_nopandas(sdates_pg[p, :], hdates_pg[p, :], "Same, but converted to gs axis") - - print_nopandas( - sdates_pg2[p, :], - hdates_pg2[p, :], - ( - 'After "Ignore any harvests that were planted in the final year, because some cells' - ' will have incomplete growing seasons for the final year"' - ), + falsely_alive_yp = np.concatenate( + (np.full((1, this_ds.dims["patch"]), False), falsely_alive_yp), axis=0 ) - - print("\n\n") - - -# Set up empty Dataset with time axis as "gs" (growing season) instead of what CLM puts out. -# Includes all the same variables as the input dataset, minus any that had dimensions mxsowings or mxharvests. -def set_up_ds_with_gs_axis(ds_in): - # Get the data variables to include in the new dataset - data_vars = dict() - for v in ds_in.data_vars: - if not any([x in ["mxsowings", "mxharvests"] for x in ds_in[v].dims]): - data_vars[v] = ds_in[v] - # Set up the new dataset - gs_years = [t.year - 1 for t in ds_in.time.values[:-1]] - coords = ds_in.coords - coords["gs"] = gs_years - ds_out = xr.Dataset(data_vars=data_vars, coords=coords, attrs=ds_in.attrs) - return ds_out + falsely_alive_y1p = np.expand_dims(falsely_alive_yp, axis=1) + dummy_false_y1p = np.expand_dims(np.full_like(falsely_alive_yp, False), axis=1) + falsely_alive_yhp = np.concatenate((falsely_alive_y1p, dummy_false_y1p), axis=1) + for var in this_ds.data_vars: + if this_ds[var].dims != ("time", "mxharvests", "patch"): + continue + this_ds[var] = this_ds[var].where(~falsely_alive_yhp) + return this_ds diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index ba6c0b6e41..00ed2413d2 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -1,57 +1,15 @@ -"""utility functions""" -"""copied from klindsay, https://github.com/klindsay28/CESM2_coup_carb_cycle_JAMES/blob/master/utils.py""" - -import re -import warnings -import importlib - -with warnings.catch_warnings(): - warnings.filterwarnings(action="ignore", category=DeprecationWarning) - if importlib.find_loader("cf_units") is not None: - import cf_units as cf - if importlib.find_loader("cartopy") is not None: - from cartopy.util import add_cyclic_point -import cftime +""" +utility functions +copied from klindsay, https://github.com/klindsay28/CESM2_coup_carb_cycle_JAMES/blob/master/utils.py +""" import numpy as np import xarray as xr -# from xr_ds_ex import xr_ds_ex - - -# generate annual means, weighted by days / month -def weighted_annual_mean(array, time_in="time", time_out="time"): - if isinstance(array[time_in].values[0], cftime.datetime): - month_length = array[time_in].dt.days_in_month - - # After https://docs.xarray.dev/en/v0.5.1/examples/monthly-means.html - group = f"{time_in}.year" - weights = month_length.groupby(group) / month_length.groupby(group).sum() - np.testing.assert_allclose(weights.groupby(group).sum().values, 1) - array = (array * weights).groupby(group).sum(dim=time_in, skipna=True) - if time_out != "year": - array = array.rename({"year": time_out}) - - else: - mon_day = xr.DataArray( - np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]), dims=["month"] - ) - mon_wgt = mon_day / mon_day.sum() - array = ( - array.rolling({time_in: 12}, center=False) # rolling - .construct("month") # construct the array - .isel( - {time_in: slice(11, None, 12)} - ) # slice so that the first element is [1..12], second is [13..24] - .dot(mon_wgt, dims=["month"]) - ) - if time_in != time_out: - array = array.rename({time_in: time_out}) - return array - - -# List of PFTs used in CLM def define_pftlist(): + """ + Return list of PFTs used in CLM + """ pftlist = [ "not_vegetated", "needleleaf_evergreen_temperate_tree", @@ -136,12 +94,14 @@ def define_pftlist(): return pftlist -# Get CLM ivt number corresponding to a given name def ivt_str2int(ivt_str): + """ + Get CLM ivt number corresponding to a given name + """ pftlist = define_pftlist() if isinstance(ivt_str, str): ivt_int = pftlist.index(ivt_str) - elif isinstance(ivt_str, list) or isinstance(ivt_str, np.ndarray): + elif isinstance(ivt_str, (list, np.ndarray)): ivt_int = [ivt_str2int(x) for x in ivt_str] if isinstance(ivt_str, np.ndarray): ivt_int = np.array(ivt_int) @@ -153,12 +113,14 @@ def ivt_str2int(ivt_str): return ivt_int -# Get CLM ivt name corresponding to a given number def ivt_int2str(ivt_int): + """ + Get CLM ivt name corresponding to a given number + """ pftlist = define_pftlist() if np.issubdtype(type(ivt_int), np.integer) or int(ivt_int) == ivt_int: ivt_str = pftlist[int(ivt_int)] - elif isinstance(ivt_int, list) or isinstance(ivt_int, np.ndarray): + elif isinstance(ivt_int, (list, np.ndarray)): ivt_str = [ivt_int2str(x) for x in ivt_int] if isinstance(ivt_int, np.ndarray): ivt_str = np.array(ivt_str) @@ -172,23 +134,23 @@ def ivt_int2str(ivt_int): return ivt_str -# Does this vegetation type's name match (for a given comparison method) any member of a filtering list? -""" -Methods: +def is_this_vegtype(this_vegtype, this_filter, this_method): + """ + Does this vegetation type's name match (for a given comparison method) any member of a filtering + list? + + Methods: ok_contains: True if any member of this_filter is found in this_vegtype. notok_contains: True of no member of this_filter is found in this_vegtype. - ok_exact: True if this_vegtype matches any member of this_filter + ok_exact: True if this_vegtype matches any member of this_filter exactly. - notok_exact: True if this_vegtype does not match any member of + notok_exact: True if this_vegtype does not match any member of this_filter exactly. -""" - - -def is_this_vegtype(this_vegtype, this_filter, this_method): + """ # Make sure data type of this_vegtype is acceptable if isinstance(this_vegtype, float) and int(this_vegtype) == this_vegtype: this_vegtype = int(this_vegtype) - data_type_ok = lambda x: isinstance(x, str) or isinstance(x, int) or isinstance(x, np.int64) + data_type_ok = lambda x: isinstance(x, (int, np.int64, str)) ok_input = True if not data_type_ok(this_vegtype): if isinstance(this_vegtype, xr.core.dataarray.DataArray): @@ -221,43 +183,44 @@ def is_this_vegtype(this_vegtype, this_filter, this_method): # Perform the comparison if this_method == "ok_contains": return any(n in this_vegtype for n in this_filter) - elif this_method == "notok_contains": + if this_method == "notok_contains": return not any(n in this_vegtype for n in this_filter) - elif this_method == "ok_exact": + if this_method == "ok_exact": return any(n == this_vegtype for n in this_filter) - elif this_method == "notok_exact": + if this_method == "notok_exact": return not any(n == this_vegtype for n in this_filter) - else: - raise ValueError(f"Unknown comparison method: '{this_method}'") - - -# Get boolean list of whether each vegetation type in list is a managed crop -""" - this_vegtypelist: The list of vegetation types whose members you want to - test. - this_filter: The list of strings against which you want to compare - each member of this_vegtypelist. - this_method: How you want to do the comparison. See is_this_vegtype(). -""" + raise ValueError(f"Unknown comparison method: '{this_method}'") def is_each_vegtype(this_vegtypelist, this_filter, this_method): + """ + Get boolean list of whether each vegetation type in list is a managed crop + + this_vegtypelist: The list of vegetation types whose members you want to test. + this_filter: The list of strings against which you want to compare each member of + this_vegtypelist. + this_method: How you want to do the comparison. See is_this_vegtype(). + """ if isinstance(this_vegtypelist, xr.DataArray): this_vegtypelist = this_vegtypelist.values return [is_this_vegtype(x, this_filter, this_method) for x in this_vegtypelist] -# List (strings) of managed crops in CLM. def define_mgdcrop_list(): + """ + List (strings) of managed crops in CLM. + """ notcrop_list = ["tree", "grass", "shrub", "unmanaged", "not_vegetated"] defined_pftlist = define_pftlist() is_crop = is_each_vegtype(defined_pftlist, notcrop_list, "notok_contains") return [defined_pftlist[i] for i, x in enumerate(is_crop) if x] -# Convert list of vegtype strings to integer index equivalents. def vegtype_str2int(vegtype_str, vegtype_mainlist=None): + """ + Convert list of vegtype strings to integer index equivalents. + """ convert_to_ndarray = not isinstance(vegtype_str, np.ndarray) if convert_to_ndarray: vegtype_str = np.array(vegtype_str) @@ -266,222 +229,34 @@ def vegtype_str2int(vegtype_str, vegtype_mainlist=None): vegtype_mainlist = vegtype_mainlist.vegtype_str.values elif isinstance(vegtype_mainlist, xr.DataArray): vegtype_mainlist = vegtype_mainlist.values - elif vegtype_mainlist == None: + elif vegtype_mainlist is None: vegtype_mainlist = define_pftlist() if not isinstance(vegtype_mainlist, list) and isinstance(vegtype_mainlist[0], str): if isinstance(vegtype_mainlist, list): raise TypeError( f"Not sure how to handle vegtype_mainlist as list of {type(vegtype_mainlist[0])}" ) - else: - raise TypeError( - f"Not sure how to handle vegtype_mainlist as type {type(vegtype_mainlist[0])}" - ) + raise TypeError( + f"Not sure how to handle vegtype_mainlist as type {type(vegtype_mainlist[0])}" + ) if vegtype_str.shape == (): indices = np.array([-1]) else: indices = np.full(len(vegtype_str), -1) - for v in np.unique(vegtype_str): - indices[np.where(vegtype_str == v)] = vegtype_mainlist.index(v) + for vegtype_str_2 in np.unique(vegtype_str): + indices[np.where(vegtype_str == vegtype_str_2)] = vegtype_mainlist.index(vegtype_str_2) if convert_to_ndarray: indices = [int(x) for x in indices] return indices -# Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. Keyword arguments like dimension=selection. Selections can be individual values or slice()s. Optimize memory usage by beginning keyword argument list with the selections that will result in the largest reduction of object size. Use dimension "vegtype" to extract patches of designated vegetation type (can be string or integer). -# Can also do dimension=function---e.g., time=np.mean will take the mean over the time dimension. -def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=True, **kwargs): - # Setup - havewarned = False - delimiter = "__" - - for key, selection in kwargs.items(): - if callable(selection): - # It would have been really nice to do selection(xr_object, axis=key), but numpy methods and xarray methods disagree on "axis" vs. "dimension." So instead, just do this manually. - if selection == np.mean: - try: - xr_object = xr_object.mean(dim=key) - except: - raise ValueError( - f"Failed to take mean of dimension {key}. Try doing so outside of" - " xr_flexsel()." - ) - else: - raise ValueError(f"xr_flexsel() doesn't recognize function {selection}") - - elif key == "vegtype": - # Convert to list, if needed - if not isinstance(selection, list): - selection = [selection] - - # Convert to indices, if needed - if isinstance(selection[0], str): - selection = vegtype_str2int(selection) - - # Get list of boolean(s) - if isinstance(selection[0], int): - if isinstance(patches1d_itype_veg, type(None)): - patches1d_itype_veg = xr_object.patches1d_itype_veg.values - elif isinstance(patches1d_itype_veg, xr.core.dataarray.DataArray): - patches1d_itype_veg = patches1d_itype_veg.values - is_vegtype = is_each_vegtype(patches1d_itype_veg, selection, "ok_exact") - elif isinstance(selection[0], bool): - if len(selection) != len(xr_object.patch): - raise ValueError( - "If providing boolean 'vegtype' argument to xr_flexsel(), it must be the" - f" same length as xr_object.patch ({len(selection)} vs." - f" {len(xr_object.patch)})" - ) - is_vegtype = selection - else: - raise TypeError(f"Not sure how to handle 'vegtype' of type {type(selection[0])}") - xr_object = xr_object.isel(patch=[i for i, x in enumerate(is_vegtype) if x]) - if "ivt" in xr_object: - xr_object = xr_object.isel( - ivt=is_each_vegtype(xr_object.ivt.values, selection, "ok_exact") - ) - - else: - # Parse selection type, if provided - if delimiter in key: - key, selection_type = key.split(delimiter) - - # Check type of selection - else: - is_inefficient = False - if isinstance(selection, slice): - slice_members = [] - if selection == slice(0): - raise ValueError("slice(0) will be empty") - if selection.start != None: - slice_members = slice_members + [selection.start] - if selection.stop != None: - slice_members = slice_members + [selection.stop] - if selection.step != None: - slice_members = slice_members + [selection.step] - if slice_members == []: - raise TypeError("slice is all None?") - this_type = int - for x in slice_members: - if x < 0 or not isinstance(x, int): - this_type = "values" - break - elif isinstance(selection, np.ndarray): - if selection.dtype.kind in np.typecodes["AllInteger"]: - this_type = int - else: - is_inefficient = True - this_type = None - for x in selection: - if x < 0 or x % 1 > 0: - if isinstance(x, int): - this_type = "values" - else: - this_type = type(x) - break - if this_type == None: - this_type = int - selection = selection.astype(int) - else: - this_type = type(selection) - - warn_about_this_seltype_interp = warn_about_seltype_interp - if this_type == list and isinstance(selection[0], str): - selection_type = "values" - warn_about_this_seltype_interp = False - elif this_type == int: - selection_type = "indices" - else: - selection_type = "values" - - if warn_about_this_seltype_interp: - # Suggest suppressing selection type interpretation warnings - if not havewarned: - print( - "xr_flexsel(): Suppress all 'selection type interpretation' messages by" - " specifying warn_about_seltype_interp=False" - ) - havewarned = True - if is_inefficient: - extra = " This will also improve efficiency for large selections." - else: - extra = "" - print( - f"xr_flexsel(): Selecting {key} as {selection_type} because selection was" - f" interpreted as {this_type}. If not correct, specify selection type" - " ('indices' or 'values') in keyword like" - f" '{key}{delimiter}SELECTIONTYPE=...' instead of '{key}=...'.{extra}" - ) - - # Trim along relevant 1d axes - if isinstance(xr_object, xr.Dataset) and key in ["lat", "lon"]: - if selection_type == "indices": - inclCoords = xr_object[key].values[selection] - elif selection_type == "values": - if isinstance(selection, slice): - inclCoords = xr_object.sel({key: selection}, drop=False)[key].values - else: - inclCoords = selection - else: - raise TypeError(f"selection_type {selection_type} not recognized") - if key == "lat": - thisXY = "jxy" - elif key == "lon": - thisXY = "ixy" - else: - raise KeyError( - f"Key '{key}' not recognized: What 1d_ suffix should I use for variable" - " name?" - ) - pattern = re.compile(f"1d_{thisXY}") - matches = [x for x in list(xr_object.keys()) if pattern.search(x) != None] - for thisVar in matches: - if len(xr_object[thisVar].dims) != 1: - raise RuntimeError( - f"Expected {thisVar} to have 1 dimension, but it has" - f" {len(xr_object[thisVar].dims)}: {xr_object[thisVar].dims}" - ) - thisVar_dim = xr_object[thisVar].dims[0] - # print(f"Variable {thisVar} has dimension {thisVar_dim}") - thisVar_coords = xr_object[key].values[ - xr_object[thisVar].values.astype(int) - 1 - ] - # print(f"{thisVar_dim} size before: {xr_object.sizes[thisVar_dim]}") - ok_ind = [] - new_1d_thisXY = [] - for i, x in enumerate(thisVar_coords): - if x in inclCoords: - ok_ind = ok_ind + [i] - new_1d_thisXY = new_1d_thisXY + [(inclCoords == x).nonzero()[0] + 1] - xr_object = xr_object.isel({thisVar_dim: ok_ind}) - new_1d_thisXY = np.array(new_1d_thisXY).squeeze() - xr_object[thisVar].values = new_1d_thisXY - # print(f"{thisVar_dim} size after: {xr_object.sizes[thisVar_dim]}") - - # Perform selection - if selection_type == "indices": - # Have to select like this instead of with index directly because otherwise assign_coords() will throw an error. Not sure why. - if isinstance(selection, int): - # Single integer? Turn it into a slice. - selection = slice(selection, selection + 1) - elif ( - isinstance(selection, np.ndarray) - and not selection.dtype.kind in np.typecodes["AllInteger"] - ): - selection = selection.astype(int) - xr_object = xr_object.isel({key: selection}) - elif selection_type == "values": - xr_object = xr_object.sel({key: selection}) - else: - raise TypeError(f"selection_type {selection_type} not recognized") - - return xr_object - - -# Get PFT of each patch, in both integer and string forms. def get_patch_ivts(this_ds, this_pftlist): - # First, get all the integer values; should be time*pft or pft*time. We will eventually just take the first timestep. + """ + Get PFT of each patch, in both integer and string forms. + """ + # First, get all the integer values; should be time*pft or pft*time. We will eventually just + # take the first timestep. vegtype_int = this_ds.patches1d_itype_veg vegtype_int.values = vegtype_int.values.astype(int) @@ -492,379 +267,63 @@ def get_patch_ivts(this_ds, this_pftlist): return {"int": vegtype_int, "str": vegtype_str, "all_str": this_pftlist} -# Convert a list of strings with vegetation type names into a DataArray. Used to add vegetation type info in import_ds(). def get_vegtype_str_da(vegtype_str): + """ + Convert a list of strings with vegetation type names into a DataArray. + """ nvt = len(vegtype_str) - thisName = "vegtype_str" vegtype_str_da = xr.DataArray( - vegtype_str, coords={"ivt": np.arange(0, nvt)}, dims=["ivt"], name=thisName + vegtype_str, coords={"ivt": np.arange(0, nvt)}, dims=["ivt"], name="vegtype_str" ) return vegtype_str_da -# Function to drop unwanted variables in preprocessing of open_mfdataset(), making sure to NOT drop any unspecified variables that will be useful in gridding. Also adds vegetation type info in the form of a DataArray of strings. -# Also renames "pft" dimension (and all like-named variables, e.g., pft1d_itype_veg_str) to be named like "patch". This can later be reversed, for compatibility with other code, using patch2pft(). -def mfdataset_preproc(ds, vars_to_import, vegtypes_to_import, timeSlice): - # Rename "pft" dimension and variables to "patch", if needed - if "pft" in ds.dims: - pattern = re.compile("pft.*1d") - matches = [x for x in list(ds.keys()) if pattern.search(x) != None] - pft2patch_dict = {"pft": "patch"} - for m in matches: - pft2patch_dict[m] = m.replace("pft", "patch").replace("patchs", "patches") - ds = ds.rename(pft2patch_dict) - - derived_vars = [] - if vars_to_import != None: - # Split vars_to_import into variables that are vs. aren't already in ds - derived_vars = [v for v in vars_to_import if v not in ds] - present_vars = [v for v in vars_to_import if v in ds] - vars_to_import = present_vars - - # Get list of dimensions present in variables in vars_to_import. - dimList = [] - for thisVar in vars_to_import: - # list(set(x)) returns a list of the unique items in x - dimList = list(set(dimList + list(ds.variables[thisVar].dims))) - - # Get any _1d variables that are associated with those dimensions. These will be useful in gridding. Also, if any dimension is "pft", set up to rename it and all like-named variables to "patch" - onedVars = [] - for thisDim in dimList: - pattern = re.compile(f"{thisDim}.*1d") - matches = [x for x in list(ds.keys()) if pattern.search(x) != None] - onedVars = list(set(onedVars + matches)) - - # Add dimensions and _1d variables to vars_to_import - vars_to_import = list(set(vars_to_import + list(ds.dims) + onedVars)) - - # Add any _bounds variables - bounds_vars = [] - for v in vars_to_import: - bounds_var = v + "_bounds" - if bounds_var in ds: - bounds_vars = bounds_vars + [bounds_var] - vars_to_import = vars_to_import + bounds_vars - - # Get list of variables to drop - varlist = list(ds.variables) - vars_to_drop = list(np.setdiff1d(varlist, vars_to_import)) - - # Drop them - ds = ds.drop_vars(vars_to_drop) - - # Add vegetation type info - if "patches1d_itype_veg" in list(ds): - this_pftlist = define_pftlist() - get_patch_ivts( - ds, this_pftlist - ) # Includes check of whether vegtype changes over time anywhere - vegtype_da = get_vegtype_str_da(this_pftlist) - patches1d_itype_veg_str = vegtype_da.values[ - ds.isel(time=0).patches1d_itype_veg.values.astype(int) - ] - npatch = len(patches1d_itype_veg_str) - patches1d_itype_veg_str = xr.DataArray( - patches1d_itype_veg_str, - coords={"patch": np.arange(0, npatch)}, - dims=["patch"], - name="patches1d_itype_veg_str", - ) - ds = xr.merge([ds, vegtype_da, patches1d_itype_veg_str]) - - # Restrict to veg. types of interest, if any - if vegtypes_to_import != None: - ds = xr_flexsel(ds, vegtype=vegtypes_to_import) - - # Restrict to time slice, if any - if timeSlice: - ds = safer_timeslice(ds, timeSlice) - - # Finish import - ds = xr.decode_cf(ds, decode_times=True) - - # Compute derived variables - for v in derived_vars: - if v == "HYEARS" and "HDATES" in ds and ds.HDATES.dims == ("time", "mxharvests", "patch"): - yearList = np.array([np.float32(x.year - 1) for x in ds.time.values]) - hyears = ds["HDATES"].copy() - hyears.values = np.tile( - np.expand_dims(yearList, (1, 2)), (1, ds.dims["mxharvests"], ds.dims["patch"]) - ) - with np.errstate(invalid="ignore"): - is_le_zero = ~np.isnan(ds.HDATES.values) & (ds.HDATES.values <= 0) - hyears.values[is_le_zero] = ds.HDATES.values[is_le_zero] - hyears.values[np.isnan(ds.HDATES.values)] = np.nan - hyears.attrs["long_name"] = "DERIVED: actual crop harvest years" - hyears.attrs["units"] = "year" - ds["HYEARS"] = hyears - - return ds - - -# Import a dataset that can be spread over multiple files, only including specified variables and/or vegetation types and/or timesteps, concatenating by time. DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or vegetation types. -def import_ds( - filelist, - myVars=None, - myVegtypes=None, - timeSlice=None, - myVars_missing_ok=[], - only_active_patches=False, - rename_lsmlatlon=False, - chunks=None, -): - # Convert myVegtypes here, if needed, to avoid repeating the process each time you read a file in xr.open_mfdataset(). - if myVegtypes is not None: - if not isinstance(myVegtypes, list): - myVegtypes = [myVegtypes] - if isinstance(myVegtypes[0], str): - myVegtypes = vegtype_str2int(myVegtypes) - - # Same for these variables. - if myVars != None: - if not isinstance(myVars, list): - myVars = [myVars] - if myVars_missing_ok: - if not isinstance(myVars_missing_ok, list): - myVars_missing_ok = [myVars_missing_ok] - - # Make sure lists are actually lists - if not isinstance(filelist, list): - filelist = [filelist] - if not isinstance(myVars_missing_ok, list): - myVars_missing_ok = [myVars_missing_ok] - - # Remove files from list if they don't contain requested timesteps. - # timeSlice should be in the format slice(start,end[,step]). start or end can be None to be unbounded on one side. Note that the standard slice() documentation suggests that only elements through end-1 will be selected, but that seems not to be the case in the xarray implementation. - if timeSlice: - new_filelist = [] - for file in sorted(filelist): - filetime = xr.open_dataset(file).time - filetime_sel = safer_timeslice(filetime, timeSlice) - include_this_file = filetime_sel.size - if include_this_file: - new_filelist.append(file) - - # If you found some matching files, but then you find one that doesn't, stop going through the list. - elif new_filelist: - break - if not new_filelist: - raise RuntimeError(f"No files found in timeSlice {timeSlice}") - filelist = new_filelist - - # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function allows this. Could also just allow mfdataset_preproc() to access myVars and myVegtypes directly, but that's bad practice as it could lead to scoping issues. - mfdataset_preproc_closure = lambda ds: mfdataset_preproc(ds, myVars, myVegtypes, timeSlice) - - # Import - if isinstance(filelist, list) and len(filelist) == 1: - filelist = filelist[0] - if isinstance(filelist, list): - with warnings.catch_warnings(): - warnings.filterwarnings(action="ignore", category=DeprecationWarning) - if importlib.find_loader("dask") is None: - raise ModuleNotFoundError( - "You have asked xarray to import a list of files as a single Dataset using" - " open_mfdataset(), but this requires dask, which is not available.\nFile" - f" list: {filelist}" - ) - this_ds = xr.open_mfdataset( - sorted(filelist), - data_vars="minimal", - preprocess=mfdataset_preproc_closure, - compat="override", - coords="all", - concat_dim="time", - combine="nested", - chunks=chunks, - ) - elif isinstance(filelist, str): - this_ds = xr.open_dataset(filelist, chunks=chunks) - this_ds = mfdataset_preproc(this_ds, myVars, myVegtypes, timeSlice) - this_ds = this_ds.compute() - - # Include only active patches (or whatever) - if only_active_patches: - is_active = this_ds.patches1d_active.values - p_active = np.where(is_active)[0] - this_ds_active = this_ds.isel(patch=p_active) - - # Warn and/or error about variables that couldn't be imported or derived - if myVars: - missing_vars = [v for v in myVars if v not in this_ds] - ok_missing_vars = [v for v in missing_vars if v in myVars_missing_ok] - bad_missing_vars = [v for v in missing_vars if v not in myVars_missing_ok] - if ok_missing_vars: - print( - "Could not import some variables; either not present or not deriveable:" - f" {ok_missing_vars}" - ) - if bad_missing_vars: - raise RuntimeError( - "Could not import some variables; either not present or not deriveable:" - f" {bad_missing_vars}" - ) - - if rename_lsmlatlon: - if "lsmlat" in this_ds.dims: - this_ds = this_ds.rename({"lsmlat": "lat"}) - if "lsmlon" in this_ds.dims: - this_ds = this_ds.rename({"lsmlon": "lon"}) - - return this_ds - - -# Return a DataArray, with defined coordinates, for a given variable in a dataset. -def get_thisVar_da(thisVar, this_ds): - # Make DataArray for this variable - thisvar_da = np.array(this_ds.variables[thisVar]) - theseDims = this_ds.variables[thisVar].dims - thisvar_da = xr.DataArray(thisvar_da, dims=theseDims) - - # Define coordinates of this variable's DataArray - dimsDict = dict() - for thisDim in theseDims: - dimsDict[thisDim] = this_ds[thisDim] - thisvar_da = thisvar_da.assign_coords(dimsDict) - thisvar_da.attrs = this_ds[thisVar].attrs - - return thisvar_da - - -# Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, lon) of one variable within a Dataset. Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices along the specified ax(ie)s. -# -# fillValue: Default None means grid will be filled with NaN, unless the variable in question already has a fillValue, in which case that will be used. -def grid_one_variable(this_ds, thisVar, fillValue=None, **kwargs): - # Get this Dataset's values for selection(s), if provided - this_ds = xr_flexsel(this_ds, **kwargs) - - # Get DataArrays needed for gridding - thisvar_da = get_thisVar_da(thisVar, this_ds) - vt_da = None - if "patch" in thisvar_da.dims: - spatial_unit = "patch" - xy_1d_prefix = "patches" - if "patches1d_itype_veg" in this_ds: - vt_da = get_thisVar_da("patches1d_itype_veg", this_ds) - elif "gridcell" in thisvar_da.dims: - spatial_unit = "gridcell" - xy_1d_prefix = "grid" - else: - raise RuntimeError( - f"What variables to use for _ixy and _jxy of variable with dims {thisvar_da.dims}?" - ) - ixy_da = get_thisVar_da(xy_1d_prefix + "1d_ixy", this_ds) - jxy_da = get_thisVar_da(xy_1d_prefix + "1d_jxy", this_ds) - - if not fillValue and "_FillValue" in thisvar_da.attrs: - fillValue = thisvar_da.attrs["_FillValue"] - - # Renumber vt_da to work as indices on new ivt dimension, if needed. - ### Ensures that the unique set of vt_da values begins with 1 and - ### contains no missing steps. - if "ivt" in this_ds and vt_da is not None: - vt_da.values = np.array([np.where(this_ds.ivt.values == x)[0][0] for x in vt_da.values]) - - # Get new dimension list - new_dims = list(thisvar_da.dims) - ### Remove "[spatial_unit]". - if spatial_unit in new_dims: - new_dims.remove(spatial_unit) - # Add "ivt_str" (vegetation type, as string). This needs to go at the end, to avoid a possible situation where you wind up with multiple Ellipsis members of fill_indices. - if "ivt" in this_ds and spatial_unit == "patch": - new_dims.append("ivt_str") - ### Add lat and lon to end of list - new_dims = new_dims + ["lat", "lon"] - - # Set up empty array - n_list = [] - for dim in new_dims: - if dim == "ivt_str": - n = this_ds.sizes["ivt"] - elif dim in thisvar_da.coords: - n = thisvar_da.sizes[dim] - else: - n = this_ds.sizes[dim] - n_list = n_list + [n] - thisvar_gridded = np.empty(n_list) - if fillValue: - thisvar_gridded[:] = fillValue - else: - thisvar_gridded[:] = np.NaN - - # Fill with this variable - fill_indices = [] - for dim in new_dims: - if dim == "lat": - fill_indices.append(jxy_da.values.astype(int) - 1) - elif dim == "lon": - fill_indices.append(ixy_da.values.astype(int) - 1) - elif dim == "ivt_str": - fill_indices.append(vt_da) - elif not fill_indices: - # I.e., if fill_indices is empty. Could also do "elif len(fill_indices)==0". - fill_indices.append(Ellipsis) - try: - thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values - except: - thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values.transpose() - if not np.any(np.bitwise_not(np.isnan(thisvar_gridded))): - if np.all(np.isnan(thisvar_da.values)): - print("Warning: This DataArray (and thus map) is all NaN") - else: - raise RuntimeError("thisvar_gridded was not filled!") - - # Assign coordinates, attributes and name - thisvar_gridded = xr.DataArray(thisvar_gridded, dims=tuple(new_dims), attrs=thisvar_da.attrs) - for dim in new_dims: - if dim == "ivt_str": - values = this_ds.vegtype_str.values - elif dim in thisvar_da.coords: - values = thisvar_da[dim] - else: - values = this_ds[dim].values - thisvar_gridded = thisvar_gridded.assign_coords({dim: values}) - thisvar_gridded.name = thisVar - - # Add FillValue attribute - if fillValue: - thisvar_gridded.attrs["_FillValue"] = fillValue - - return thisvar_gridded - - -# ctsm_pylib can't handle time slicing like Dataset.sel(time=slice("1998-01-01", "2005-12-31")) for some reason. This function tries to fall back to slicing by integers. It should work with both Datasets and DataArrays. -def safer_timeslice(ds, timeSlice, timeVar="time"): +def safer_timeslice(ds_in, time_slice, time_var="time"): + """ + ctsm_pylib can't handle time slicing like Dataset.sel(time=slice("1998-01-01", "2005-12-31")) + for some reason. This function tries to fall back to slicing by integers. It should work with + both Datasets and DataArrays. + """ try: - ds = ds.sel({timeVar: timeSlice}) - except: + ds_in = ds_in.sel({time_var: time_slice}) + except: # pylint: disable=bare-except # If the issue might have been slicing using strings, try to fall back to integer slicing - if ( - isinstance(timeSlice.start, str) - and isinstance(timeSlice.stop, str) - and len(timeSlice.start.split("-")) == 3 - and timeSlice.start.split("-")[1:] == ["01", "01"] - and len(timeSlice.stop.split("-")) == 3 + can_try_integer_slicing = ( + isinstance(time_slice.start, str) + and isinstance(time_slice.stop, str) + and len(time_slice.start.split("-")) == 3 + and time_slice.start.split("-")[1:] == ["01", "01"] + and len(time_slice.stop.split("-")) == 3 and ( - timeSlice.stop.split("-")[1:] == ["12", "31"] - or timeSlice.stop.split("-")[1:] == ["01", "01"] + time_slice.stop.split("-")[1:] == ["12", "31"] + or time_slice.stop.split("-")[1:] == ["01", "01"] ) - ): - fileyears = np.array([x.year for x in ds.time.values]) + ) + if can_try_integer_slicing: + fileyears = np.array([x.year for x in ds_in.time.values]) if len(np.unique(fileyears)) != len(fileyears): print("Could not fall back to integer slicing of years: Time axis not annual") raise - yStart = int(timeSlice.start.split("-")[0]) - yStop = int(timeSlice.stop.split("-")[0]) - where_in_timeSlice = np.where((fileyears >= yStart) & (fileyears <= yStop))[0] - ds = ds.isel({timeVar: where_in_timeSlice}) + y_start = int(time_slice.start.split("-")[0]) + y_stop = int(time_slice.stop.split("-")[0]) + where_in_timeslice = np.where((fileyears >= y_start) & (fileyears <= y_stop))[0] + ds_in = ds_in.isel({time_var: where_in_timeslice}) else: - print(f"Could not fall back to integer slicing for timeSlice {timeSlice}") + print(f"Could not fall back to integer slicing for time_slice {time_slice}") raise - return ds + return ds_in -# Convert a longitude axis that's -180 to 180 around the international date line to one that's 0 to 360 around the prime meridian. If you pass in a Dataset or DataArray, the "lon" coordinates will be changed. Otherwise, it assumes you're passing in numeric data. def lon_idl2pm(lons_in, fail_silently=False): + """ + Convert a longitude axis that's -180 to 180 around the international date line to one that's 0 + to 360 around the prime meridian. + + - If you pass in a Dataset or DataArray, the "lon" coordinates will be changed. Otherwise, it + assumes you're passing in numeric data. + """ + def check_ok(tmp, fail_silently): msg = "" @@ -875,10 +334,9 @@ def check_ok(tmp, fail_silently): if msg == "": return True - elif fail_silently: + if fail_silently: return False - else: - raise ValueError(msg) + raise ValueError(msg) def do_it(tmp): tmp = tmp + 360 @@ -909,14 +367,19 @@ def do_it(tmp): return lons_out -# Helper function to check that a list is strictly increasing -def is_strictly_increasing(L): - # https://stackoverflow.com/a/4983359/2965321 - return all(x < y for x, y in zip(L, L[1:])) +def is_strictly_increasing(this_list): + """ + Helper function to check that a list is strictly increasing + + https://stackoverflow.com/a/4983359/2965321 + """ + return all(x < y for x, y in zip(this_list, this_list[1:])) -# Ensure that longitude axis coordinates are monotonically increasing def make_lon_increasing(xr_obj): + """ + Ensure that longitude axis coordinates are monotonically increasing + """ if not "lon" in xr_obj.dims: return xr_obj diff --git a/python/ctsm/crop_calendars/generate_gdds.py b/python/ctsm/crop_calendars/generate_gdds.py index 16e3e130da..156ebfb20e 100644 --- a/python/ctsm/crop_calendars/generate_gdds.py +++ b/python/ctsm/crop_calendars/generate_gdds.py @@ -1,32 +1,29 @@ -paramfile_dir = "/glade/campaign/cesm/cesmdata/cseg/inputdata/lnd/clm2/paramdata" - -# Import other shared functions +""" +Generate maturity requirements (GDD) from outputs of a GDD-generating run +""" import os -import inspect import sys +import pickle +import datetime as dt +import argparse +import logging +import numpy as np +import xarray as xr # Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script in the RUN phase seems to require the python/ directory to be manually added to path. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. _CTSM_PYTHON = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" ) sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_module as cc -import ctsm.crop_calendars.generate_gdds_functions as gddfn - -# Import everything else -import os -import sys -import numpy as np -import xarray as xr -import pickle -import datetime as dt -import argparse -import logging +import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position +import ctsm.crop_calendars.generate_gdds_functions as gddfn # pylint: disable=wrong-import-position -# Info re: PFT parameter set -my_clm_ver = 51 -my_clm_subver = "c211112" +# Global constants +PARAMFILE_DIR = "/glade/campaign/cesm/cesmdata/cseg/inputdata/lnd/clm2/paramdata" +MY_CLM_VER = 51 +MY_CLM_SUBVER = "c211112" def main( @@ -47,6 +44,7 @@ def main( skip_crops=None, logger=None, ): + # pylint: disable=missing-function-docstring,too-many-statements # Directories to save output files and figures if not output_dir: if only_make_figs: @@ -73,11 +71,14 @@ def main( # Disable plotting if any plotting module is unavailable if save_figs: try: + # pylint: disable=import-outside-toplevel,unused-import,import-error import cartopy import matplotlib - except: + except ModuleNotFoundError as exc: if only_make_figs: - raise RuntimeError("only_make_figs True but not all plotting modules are available") + raise RuntimeError( + "only_make_figs True but not all plotting modules are available" + ) from exc gddfn.log(logger, "Not all plotting modules are available; disabling save_figs") save_figs = False @@ -95,19 +96,21 @@ def main( ########################## if not only_make_figs: - # Keep 1 extra year to avoid incomplete final growing season for crops harvested after Dec. 31. - y1_import_str = f"{first_season+1}-01-01" - yN_import_str = f"{last_season+2}-01-01" + # Keep 1 extra year to avoid incomplete final growing season for crops + # harvested after Dec. 31. + yr_1_import_str = f"{first_season+1}-01-01" + yr_n_import_str = f"{last_season+2}-01-01" gddfn.log( logger, - f"Importing netCDF time steps {y1_import_str} through {yN_import_str} (years are +1 because of CTSM output naming)", + f"Importing netCDF time steps {yr_1_import_str} through {yr_n_import_str} " + + "(years are +1 because of CTSM output naming)", ) pickle_file = os.path.join(output_dir, f"{first_season}-{last_season}.pickle") h2_ds_file = os.path.join(output_dir, f"{first_season}-{last_season}.h2_ds.nc") if os.path.exists(pickle_file): - with open(pickle_file, "rb") as f: + with open(pickle_file, "rb") as file: ( first_season, last_season, @@ -115,14 +118,14 @@ def main( gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + lastyear_active_patch_indices_list, incorrectly_daily, save_figs, incl_vegtypes_str, incl_patches1d_itype_veg, mxsowings, skip_crops, - ) = pickle.load(f) + ) = pickle.load(file) print(f"Will resume import at {pickle_year+1}") h2_ds = None else: @@ -132,17 +135,17 @@ def main( gddaccum_yp_list = [] gddharv_yp_list = [] incl_vegtypes_str = None - lastYear_active_patch_indices_list = None + lastyear_active_patch_indices_list = None sdates_rx = sdates_file hdates_rx = hdates_file if not unlimited_season_length: - mxmats = cc.import_max_gs_length(paramfile_dir, my_clm_ver, my_clm_subver) + mxmats = cc.import_max_gs_length(PARAMFILE_DIR, MY_CLM_VER, MY_CLM_SUBVER) else: mxmats = None - for y, thisYear in enumerate(np.arange(first_season + 1, last_season + 3)): - if thisYear <= pickle_year: + for yr_index, this_yr in enumerate(np.arange(first_season + 1, last_season + 3)): + if this_yr <= pickle_year: continue ( @@ -152,7 +155,7 @@ def main( gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + lastyear_active_patch_indices_list, incorrectly_daily, incl_vegtypes_str, incl_patches1d_itype_veg, @@ -160,14 +163,14 @@ def main( ) = gddfn.import_and_process_1yr( first_season, last_season, - y, - thisYear, + yr_index, + this_yr, sdates_rx, hdates_rx, gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + lastyear_active_patch_indices_list, incorrectly_daily, input_dir, incl_vegtypes_str, @@ -179,16 +182,16 @@ def main( ) gddfn.log(logger, f" Saving pickle file ({pickle_file})...") - with open(pickle_file, "wb") as f: + with open(pickle_file, "wb") as file: pickle.dump( [ first_season, last_season, - thisYear, + this_yr, gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + lastyear_active_patch_indices_list, incorrectly_daily, save_figs, incl_vegtypes_str, @@ -196,7 +199,7 @@ def main( mxsowings, skip_crops, ], - f, + file, protocol=-1, ) @@ -248,35 +251,35 @@ def main( ] dummy_vars = [] dummy_longnames = [] - for v, thisVar in enumerate(all_vars): - if thisVar not in gdd_maps_ds: - dummy_vars.append(thisVar) - dummy_longnames.append(all_longnames[v]) + for var_index, this_var in enumerate(all_vars): + if this_var not in gdd_maps_ds: + dummy_vars.append(this_var) + dummy_longnames.append(all_longnames[var_index]) - def make_dummy(thisCrop_gridded, addend): - dummy_gridded = thisCrop_gridded + def make_dummy(this_crop_gridded, addend): + dummy_gridded = this_crop_gridded dummy_gridded.values = dummy_gridded.values * 0 + addend return dummy_gridded - for v in gdd_maps_ds: - thisCrop_gridded = gdd_maps_ds[v].copy() + for var in gdd_maps_ds: + this_crop_gridded = gdd_maps_ds[var].copy() break - dummy_gridded = make_dummy(thisCrop_gridded, -1) + dummy_gridded = make_dummy(this_crop_gridded, -1) - for v, thisVar in enumerate(dummy_vars): - if thisVar in gdd_maps_ds: + for var_index, this_var in enumerate(dummy_vars): + if this_var in gdd_maps_ds: gddfn.error( - logger, f"{thisVar} is already in gdd_maps_ds. Why overwrite it with dummy?" + logger, f"{this_var} is already in gdd_maps_ds. Why overwrite it with dummy?" ) - dummy_gridded.name = thisVar - dummy_gridded.attrs["long_name"] = dummy_longnames[v] - gdd_maps_ds[thisVar] = dummy_gridded + dummy_gridded.name = this_var + dummy_gridded.attrs["long_name"] = dummy_longnames[var_index] + gdd_maps_ds[this_var] = dummy_gridded # Add lon/lat attributes - def add_lonlat_attrs(ds): - ds.lon.attrs = {"long_name": "coordinate_longitude", "units": "degrees_east"} - ds.lat.attrs = {"long_name": "coordinate_latitude", "units": "degrees_north"} - return ds + def add_lonlat_attrs(this_ds): + this_ds.lon.attrs = {"long_name": "coordinate_longitude", "units": "degrees_east"} + this_ds.lat.attrs = {"long_name": "coordinate_latitude", "units": "degrees_north"} + return this_ds gdd_maps_ds = add_lonlat_attrs(gdd_maps_ds) gddharv_maps_ds = add_lonlat_attrs(gddharv_maps_ds) @@ -297,14 +300,17 @@ def add_lonlat_attrs(ds): def save_gdds(sdates_file, hdates_file, outfile, gdd_maps_ds, sdates_rx): # Set up output file from template (i.e., prescribed sowing dates). template_ds = xr.open_dataset(sdates_file, decode_times=True) - for v in template_ds: - if "sdate" in v: - template_ds = template_ds.drop(v) + for var in template_ds: + if "sdate" in var: + template_ds = template_ds.drop(var) template_ds.to_netcdf(path=outfile, format="NETCDF3_CLASSIC") template_ds.close() # Add global attributes - comment = f"Derived from CLM run plus crop calendar input files {os.path.basename(sdates_file) and {os.path.basename(hdates_file)}}." + comment = ( + "Derived from CLM run plus crop calendar input files " + + f"{os.path.basename(sdates_file) and {os.path.basename(hdates_file)}}." + ) gdd_maps_ds.attrs = { "author": "Sam Rabin (sam.rabin@gmail.com)", "comment": comment, @@ -384,7 +390,11 @@ def add_attrs_to_map_ds( parser.add_argument( "-i", "--input-dir", - help="Directory where run outputs can be found (and where outputs will go). If --only-make-figs, this is the directory with the preprocessed files (e.g., *.pickle file).", + help=( + "Directory where run outputs can be found (and where outputs will go). If " + + "--only-make-figs, this is the directory with the preprocessed files (e.g., *.pickle " + + "file)." + ), required=True, ) parser.add_argument( @@ -464,7 +474,6 @@ def add_attrs_to_map_ds( args = parser.parse_args(sys.argv[1:]) for k, v in sorted(vars(args).items()): print(f"{k}: {v}") - save_figs = not args.dont_save_figs # Call main() main( @@ -474,7 +483,7 @@ def add_attrs_to_map_ds( sdates_file=args.sdates_file, hdates_file=args.hdates_file, output_dir=args.output_dir, - save_figs=save_figs, + save_figs=not args.dont_save_figs, only_make_figs=args.only_make_figs, run1_name=args.run1_name, run2_name=args.run2_name, @@ -484,9 +493,3 @@ def add_attrs_to_map_ds( unlimited_season_length=args.unlimited_season_length, skip_crops=args.skip_crops, ) - -# main(input_dir="/Users/Shared/CESM_runs/tests_10x15_20230329_gddgen/202303301820", -# sdates_file="/Users/Shared/CESM_work/crop_dates_mostrice/sdates_ggcmi_crop_calendar_phase3_v1.01_nninterp-f10_f10_mg37.2000-2000.20230330_165301.nc", -# hdates_file="/Users/Shared/CESM_work/crop_dates_mostrice/hdates_ggcmi_crop_calendar_phase3_v1.01_nninterp-f10_f10_mg37.2000-2000.20230330_165301.nc", -# first_season=1997, last_season=2003, -# save_figs=False) diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index cb05f1920d..8af2fdc049 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -1,85 +1,102 @@ -import numpy as np -import xarray as xr +""" +Functions to support generate_gdds.py +""" +# pylint: disable=too-many-lines,too-many-statements import warnings import os import glob import datetime as dt from importlib import util as importlib_util +import numpy as np +import xarray as xr -# Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script in the RUN phase seems to require the python/ directory to be manually added to path. -_CTSM_PYTHON = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" -) -import sys - -sys.path.insert(1, _CTSM_PYTHON) import ctsm.crop_calendars.cropcal_utils as utils import ctsm.crop_calendars.cropcal_module as cc +from ctsm.crop_calendars.xr_flexsel import xr_flexsel +from ctsm.crop_calendars.grid_one_variable import grid_one_variable +from ctsm.crop_calendars.import_ds import import_ds -can_plot = True +CAN_PLOT = True try: + # pylint: disable=wildcard-import,unused-wildcard-import + # pylint: disable=import-error from ctsm.crop_calendars.cropcal_figs_module import * from matplotlib.transforms import Bbox warnings.filterwarnings( "ignore", - message="__len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry.", + message=( + "__len__ for multi-part geometries is deprecated and will be removed in Shapely " + + "2.0. Check the length of the `geoms` property instead to get the number of " + + "parts of a multi-part geometry." + ), ) warnings.filterwarnings( "ignore", - message="Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.", + message=( + "Iteration over multi-part geometries is deprecated and will be removed in Shapely " + + "2.0. Use the `geoms` property to access the constituent parts of a multi-part " + + "geometry." + ), ) print("Will (attempt to) produce harvest requirement map figure files.") -except: +except ModuleNotFoundError: print("Will NOT produce harvest requirement map figure files.") - can_plot = False + CAN_PLOT = False -# Functions to simultaneously print to console and to log file def log(logger, string): + """ + Simultaneously print INFO messages to console and to log file + """ print(string) logger.info(string) def error(logger, string): + """ + Simultaneously print ERROR messages to console and to log file + """ logger.error(string) raise RuntimeError(string) def check_sdates(dates_ds, sdates_rx, logger, verbose=False): + """ + Checking that input and output sdates match + """ log(logger, " Checking that input and output sdates match...") - sdates_grid = utils.grid_one_variable(dates_ds, "SDATES") + sdates_grid = grid_one_variable(dates_ds, "SDATES") all_ok = True any_found = False vegtypes_skipped = [] vegtypes_included = [] - for i, vt_str in enumerate(dates_ds.vegtype_str.values): + for i, vegtype_str in enumerate(dates_ds.vegtype_str.values): # Input - vt = dates_ds.ivt.values[i] - thisVar = f"gs1_{vt}" - if thisVar not in sdates_rx: - vegtypes_skipped = vegtypes_skipped + [vt_str] + vegtype_int = dates_ds.ivt.values[i] + this_var = f"gs1_{vegtype_int}" + if this_var not in sdates_rx: + vegtypes_skipped = vegtypes_skipped + [vegtype_str] # log(logger, f" {vt_str} ({vt}) SKIPPED...") continue - vegtypes_included = vegtypes_included + [vt_str] + vegtypes_included = vegtypes_included + [vegtype_str] any_found = True if verbose: - log(logger, f" {vt_str} ({vt})...") - in_map = sdates_rx[thisVar].squeeze(drop=True) + log(logger, f" {vegtype_str} ({vegtype_int})...") + in_map = sdates_rx[this_var].squeeze(drop=True) # Output - out_map = sdates_grid.sel(ivt_str=vt_str).squeeze(drop=True) + out_map = sdates_grid.sel(ivt_str=vegtype_str).squeeze(drop=True) # Check for differences diff_map = out_map - in_map diff_map_notnan = diff_map.values[np.invert(np.isnan(diff_map.values))] if np.any(diff_map_notnan): - log(logger, f"Difference(s) found in {vt_str}") + log(logger, f"Difference(s) found in {vegtype_str}") here = np.where(diff_map_notnan) log(logger, "in:") in_map_notnan = in_map.values[np.invert(np.isnan(diff_map.values))] @@ -91,7 +108,7 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): log(logger, diff_map_notnan[here][0:4]) all_ok = False - if not (any_found): + if not any_found: error(logger, "No matching variables found in sdates_rx!") # Sanity checks for included vegetation types @@ -102,7 +119,8 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): elif vegtypes_skipped_weird: log( logger, - f"\nWarning: Some crop types had output rainfed patches but no irrigated patches: {vegtypes_skipped_weird}", + "\nWarning: Some crop types had output rainfed patches but no irrigated patches: " + + f"{vegtypes_skipped_weird}", ) if all_ok: @@ -111,34 +129,42 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): error(logger, " ❌ Input and output sdates differ.") -def import_rx_dates(s_or_h, date_inFile, incl_patches1d_itype_veg, mxsowings, logger): - if isinstance(date_inFile, xr.Dataset): - return date_inFile - elif not isinstance(date_inFile, str): +def import_rx_dates(s_or_h, date_infile, incl_patches1d_itype_veg, mxsowings, logger): + """ + Import prescribed sowing or harvest dates + """ + if isinstance(date_infile, xr.Dataset): + return date_infile + if not isinstance(date_infile, str): error( logger, - f"Importing {s_or_h}dates_rx: Expected date_inFile to be str or DataArray, not {type(date_inFile)}", + f"Importing {s_or_h}dates_rx: Expected date_infile to be str or DataArray," + + f"not {type(date_infile)}", ) # Which vegetation types were simulated? - itype_veg_toImport = np.unique(incl_patches1d_itype_veg) + itype_veg_to_import = np.unique(incl_patches1d_itype_veg) - date_varList = [] - for i in itype_veg_toImport: - for g in np.arange(mxsowings): - thisVar = f"{s_or_h}date{g+1}_{i}" - date_varList = date_varList + [thisVar] + date_var_list = [] + for i in itype_veg_to_import: + for n_sowing in np.arange(mxsowings): + this_var = f"{s_or_h}date{n_sowing+1}_{i}" + date_var_list = date_var_list + [this_var] - ds = utils.import_ds(date_inFile, myVars=date_varList) + this_ds = import_ds(date_infile, my_vars=date_var_list) - for v in ds: - ds = ds.rename({v: v.replace(f"{s_or_h}date", "gs")}) + for var in this_ds: + this_ds = this_ds.rename({var: var.replace(f"{s_or_h}date", "gs")}) - return ds + return this_ds -def thisCrop_map_to_patches(lon_points, lat_points, map_ds, vegtype_int): - # xarray pointwise indexing; see https://xarray.pydata.org/en/stable/user-guide/indexing.html#more-advanced-indexing +def this_crop_map_to_patches(lon_points, lat_points, map_ds, vegtype_int): + """ + Given a map, get a vector of patches + """ + # xarray pointwise indexing; + # see https://xarray.pydata.org/en/stable/user-guide/indexing.html#more-advanced-indexing return ( map_ds[f"gs1_{vegtype_int}"] .sel(lon=xr.DataArray(lon_points, dims="patch"), lat=xr.DataArray(lat_points, dims="patch")) @@ -146,8 +172,10 @@ def thisCrop_map_to_patches(lon_points, lat_points, map_ds, vegtype_int): ) -# Get and grid mean GDDs in GGCMI growing season def yp_list_to_ds(yp_list, daily_ds, incl_vegtypes_str, dates_rx, longname_prefix, logger): + """ + Get and grid mean GDDs in GGCMI growing season + """ # Get means warnings.filterwarnings( "ignore", message="Mean of empty slice" @@ -160,44 +188,45 @@ def yp_list_to_ds(yp_list, daily_ds, incl_vegtypes_str, dates_rx, longname_prefi # Grid ds_out = xr.Dataset() - for c, ra in enumerate(p_list): - if isinstance(ra, type(None)): + for this_crop_int, data in enumerate(p_list): + if isinstance(data, type(None)): continue - thisCrop_str = incl_vegtypes_str[c] - log(logger, f" {thisCrop_str}...") - newVar = f"gdd1_{utils.ivt_str2int(thisCrop_str)}" - ds = daily_ds.isel( - patch=np.where(daily_ds.patches1d_itype_veg_str.values == thisCrop_str)[0] + this_crop_str = incl_vegtypes_str[this_crop_int] + log(logger, f" {this_crop_str}...") + new_var = f"gdd1_{utils.ivt_str2int(this_crop_str)}" + this_ds = daily_ds.isel( + patch=np.where(daily_ds.patches1d_itype_veg_str.values == this_crop_str)[0] ) - template_da = ds.patches1d_itype_veg_str - da = xr.DataArray( - data=ra, + template_da = this_ds.patches1d_itype_veg_str + this_da = xr.DataArray( + data=data, coords=template_da.coords, - attrs={"units": "GDD", "long_name": f"{longname_prefix}{thisCrop_str}"}, + attrs={"units": "GDD", "long_name": f"{longname_prefix}{this_crop_str}"}, ) # Grid this crop - ds["tmp"] = da - da_gridded = utils.grid_one_variable(ds, "tmp", vegtype=thisCrop_str).squeeze(drop=True) + this_ds["tmp"] = this_da + da_gridded = grid_one_variable(this_ds, "tmp", vegtype=this_crop_str) + da_gridded = da_gridded.squeeze(drop=True) # Add singleton time dimension and save to output Dataset da_gridded = da_gridded.expand_dims(time=dates_rx.time) - ds_out[newVar] = da_gridded + ds_out[new_var] = da_gridded return ds_out def import_and_process_1yr( - y1, - yN, - y, - thisYear, + year_1, + year_n, + year_index, + this_year, sdates_rx, hdates_rx, gddaccum_yp_list, gddharv_yp_list, - skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + skip_patches_for_isel_nan_last_year, + last_year_active_patch_indices_list, incorrectly_daily, indir, incl_vegtypes_str_in, @@ -207,8 +236,11 @@ def import_and_process_1yr( skip_crops, logger, ): + """ + Import one year of CLM output data for GDD generation + """ save_figs = True - log(logger, f"netCDF year {thisYear}...") + log(logger, f"netCDF year {this_year}...") log(logger, dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) # Without dask, this can take a LONG time at resolutions finer than 2-deg @@ -233,11 +265,11 @@ def import_and_process_1yr( crops_to_read = utils.define_mgdcrop_list() print(h1_filelist) - dates_ds = utils.import_ds( + dates_ds = import_ds( h1_filelist, - myVars=["SDATES", "HDATES"], - myVegtypes=crops_to_read, - timeSlice=slice(f"{thisYear}-01-01", f"{thisYear}-12-31"), + my_vars=["SDATES", "HDATES"], + my_vegtypes=crops_to_read, + time_slice=slice(f"{this_year}-01-01", f"{this_year}-12-31"), chunks=chunks, ) @@ -261,8 +293,8 @@ def import_and_process_1yr( np.sum(~np.isnan(dates_ds.HDATES.values), axis=dates_ds.HDATES.dims.index("mxharvests")) == 0 ) - N_unmatched_nans = np.sum(sdates_all_nan != hdates_all_nan) - if N_unmatched_nans > 0: + n_unmatched_nans = np.sum(sdates_all_nan != hdates_all_nan) + if n_unmatched_nans > 0: error(logger, "Output SDATE and HDATE NaN masks do not match.") if np.sum(~np.isnan(dates_ds.SDATES.values)) == 0: error(logger, "All SDATES are NaN!") @@ -270,15 +302,15 @@ def import_and_process_1yr( # Just work with non-NaN patches for now skip_patches_for_isel_nan = np.where(sdates_all_nan)[0] incl_patches_for_isel_nan = np.where(~sdates_all_nan)[0] - different_nan_mask = y > 0 and not np.array_equal( - skip_patches_for_isel_nan_lastyear, skip_patches_for_isel_nan + different_nan_mask = year_index > 0 and not np.array_equal( + skip_patches_for_isel_nan_last_year, skip_patches_for_isel_nan ) if different_nan_mask: log(logger, " Different NaN mask than last year") incl_thisyr_but_nan_lastyr = [ dates_ds.patch.values[p] for p in incl_patches_for_isel_nan - if p in skip_patches_for_isel_nan_lastyear + if p in skip_patches_for_isel_nan_last_year ] else: incl_thisyr_but_nan_lastyr = [] @@ -286,14 +318,15 @@ def import_and_process_1yr( if skipping_patches_for_isel_nan: log( logger, - f" Ignoring {len(skip_patches_for_isel_nan)} patches with all-NaN sowing and harvest dates.", + f" Ignoring {len(skip_patches_for_isel_nan)} patches with all-NaN sowing and " + + "harvest dates.", ) dates_incl_ds = dates_ds.isel(patch=incl_patches_for_isel_nan) else: dates_incl_ds = dates_ds incl_patches1d_itype_veg = dates_incl_ds.patches1d_itype_veg - if y == 0: + if year_index == 0: incl_vegtypes_str = [c for c in dates_incl_ds.vegtype_str.values if c not in skip_crops] else: incl_vegtypes_str = incl_vegtypes_str_in @@ -304,13 +337,15 @@ def import_and_process_1yr( if incl_vegtypes_str != list(dates_incl_ds.vegtype_str.values): error( logger, - f"Included veg types differ. Previously {incl_vegtypes_str}, now {dates_incl_ds.vegtype_str.values}", + f"Included veg types differ. Previously {incl_vegtypes_str}, " + + f"now {dates_incl_ds.vegtype_str.values}", ) if np.sum(~np.isnan(dates_incl_ds.SDATES.values)) == 0: error(logger, "All SDATES are NaN after ignoring those patches!") - # Some patches can have -1 sowing date?? Hopefully just an artifact of me incorrectly saving SDATES/HDATES daily. + # Some patches can have -1 sowing date?? Hopefully just an artifact of me incorrectly saving + # SDATES/HDATES daily. mxsowings = dates_ds.dims["mxsowings"] mxsowings_dim = dates_ds.SDATES.dims.index("mxsowings") skip_patches_for_isel_sdatelt1 = np.where(dates_incl_ds.SDATES.values < 1)[1] @@ -322,7 +357,8 @@ def import_and_process_1yr( if incorrectly_daily and list(unique_hdates) == [364]: log( logger, - f" ❗ {len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1, but this might have just been because of incorrectly daily outputs. Setting them to 365.", + f" ❗ {len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1, but this" + + "might have just been because of incorrectly daily outputs. Setting them to 365.", ) new_sdates_ar = dates_incl_ds.SDATES.values if mxsowings_dim != 0: @@ -336,13 +372,16 @@ def import_and_process_1yr( else: error( logger, - f"{len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1. Unique affected hdates: {unique_hdates}", + f"{len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1. " + + f"Unique affected hdates: {unique_hdates}", ) - # Some patches can have -1 harvest date?? Hopefully just an artifact of me incorrectly saving SDATES/HDATES daily. Can also happen if patch wasn't active last year + # Some patches can have -1 harvest date?? Hopefully just an artifact of me incorrectly saving + # SDATES/HDATES daily. Can also happen if patch wasn't active last year mxharvests = dates_ds.dims["mxharvests"] mxharvests_dim = dates_ds.HDATES.dims.index("mxharvests") - # If a patch was inactive last year but was either (a) harvested the last time it was active or (b) was never active, it will have -1 as its harvest date this year. Such instances are okay. + # If a patch was inactive last year but was either (a) harvested the last time it was active or + # (b) was never active, it will have -1 as its harvest date this year. Such instances are okay. hdates_thisyr = dates_incl_ds.HDATES.isel(mxharvests=0) skip_patches_for_isel_hdatelt1 = np.where(hdates_thisyr.values < 1)[0] skipping_patches_for_isel_hdatelt1 = len(skip_patches_for_isel_hdatelt1) > 0 @@ -352,7 +391,6 @@ def import_and_process_1yr( patch=incl_thisyr_but_nan_lastyr ) if np.any(hdates_thisyr_where_nan_lastyr < 1): - # patches_to_fix = hdates_thisyr_where_nan_lastyr.isel(patch=np.where(hdates_thisyr_where_nan_lastyr < 1)[0]).patch.values new_hdates = dates_incl_ds.HDATES.values if mxharvests_dim != 0: error(logger, "Code this up") @@ -360,7 +398,10 @@ def import_and_process_1yr( here = [patch_list.index(x) for x in incl_thisyr_but_nan_lastyr] log( logger, - f" ❗ {len(here)} patches have harvest date -1 because they weren't active last year (and were either never active or were harvested when last active). Ignoring, but you should have done a run with patches always active if they are ever active in the real LU timeseries.", + f" ❗ {len(here)} patches have harvest date -1 because they weren't active last" + + "year (and were either never active or were harvested when last active). " + + "Ignoring, but you should have done a run with patches always active if they are " + + "ever active in the real LU timeseries.", ) new_hdates[0, here] = sdates_thisyr_where_nan_lastyr.values - 1 dates_incl_ds["HDATES"] = xr.DataArray( @@ -382,7 +423,9 @@ def import_and_process_1yr( if incorrectly_daily and list(unique_sdates) == [1]: log( logger, - f" ❗ {len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1??? Seems like this might have just been because of incorrectly daily outputs; setting them to 365.", + f" ❗ {len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1??? Seems like " + + "this might have just been because of incorrectly daily outputs; setting them to " + + "365.", ) new_hdates_ar = dates_incl_ds.HDATES.values if mxharvests_dim != 0: @@ -396,18 +439,21 @@ def import_and_process_1yr( else: error( logger, - f"{len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1. Possible causes:\n * Not using constant crop areas (e.g., flanduse_timeseries from make_lu_for_gddgen.py)\n * Not skipping the first 2 years of output\nUnique affected sdates: {unique_sdates}", + f"{len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1. Possible causes:\n" + + "* Not using constant crop areas (e.g., flanduse_timeseries from " + + "make_lu_for_gddgen.py)\n * Not skipping the first 2 years of output\n" + + f"Unique affected sdates: {unique_sdates}", ) # Make sure there was only one harvest per year - N_extra_harv = np.sum( + n_extra_harv = np.sum( np.nanmax( dates_incl_ds.HDATES.isel(mxharvests=slice(1, mxharvests)).values, axis=mxharvests_dim ) >= 1 ) - if N_extra_harv > 0: - error(logger, f"{N_extra_harv} patches have >1 harvest.") + if n_extra_harv > 0: + error(logger, f"{n_extra_harv} patches have >1 harvest.") # Make sure harvest happened the day before sowing sdates_clm = dates_incl_ds.SDATES.values.squeeze() @@ -432,13 +478,13 @@ def import_and_process_1yr( if mxmats and (imported_sdates or imported_hdates): print(" Limiting growing season length...") hdates_rx = hdates_rx_orig.copy() - for v in hdates_rx_orig: - if v == "time_bounds": + for var in hdates_rx_orig: + if var == "time_bounds": continue # Get max growing season length vegtype_int = int( - v.split("_")[1] + var.split("_")[1] ) # netCDF variable name v should be something like gs1_17 vegtype_str = utils.ivt_int2str(vegtype_int) if vegtype_str == "soybean": @@ -452,42 +498,46 @@ def import_and_process_1yr( continue # Get "prescribed" growing season length - gs_len_rx_da = get_gs_len_da(hdates_rx_orig[v] - sdates_rx[v]) + gs_len_rx_da = get_gs_len_da(hdates_rx_orig[var] - sdates_rx[var]) not_ok = gs_len_rx_da.values > mxmat if not np.any(not_ok): print(f" Not limiting {vegtype_str}: No rx season > {mxmat} days") continue - hdates_limited = hdates_rx_orig[v].copy().values - hdates_limited[np.where(not_ok)] = sdates_rx[v].values[np.where(not_ok)] + mxmat + hdates_limited = hdates_rx_orig[var].copy().values + hdates_limited[np.where(not_ok)] = sdates_rx[var].values[np.where(not_ok)] + mxmat hdates_limited[np.where(hdates_limited > 365)] -= 365 if np.any(hdates_limited < 1): raise RuntimeError("Limited hdates < 1") - elif np.any(hdates_limited > 365): + if np.any(hdates_limited > 365): raise RuntimeError("Limited hdates > 365") - hdates_rx[v] = xr.DataArray( - data=hdates_limited, coords=hdates_rx_orig[v].coords, attrs=hdates_rx_orig[v].attrs + hdates_rx[var] = xr.DataArray( + data=hdates_limited, + coords=hdates_rx_orig[var].coords, + attrs=hdates_rx_orig[var].attrs, ) print( - f" Limited {vegtype_str} growing season length to {mxmat}. Longest was {int(np.max(gs_len_rx_da.values))}, now {int(np.max(get_gs_len_da(hdates_rx[v] - sdates_rx[v]).values))}." + f" Limited {vegtype_str} growing season length to {mxmat}. Longest was " + + f"{int(np.max(gs_len_rx_da.values))}, now " + + f"{int(np.max(get_gs_len_da(hdates_rx[var] - sdates_rx[var]).values))}." ) else: hdates_rx = hdates_rx_orig - log(logger, f" Importing accumulated GDDs...") + log(logger, " Importing accumulated GDDs...") clm_gdd_var = "GDDACCUM" - myVars = [clm_gdd_var, "GDDHARV"] - pattern = os.path.join(indir, f"*h2.{thisYear-1}-01-01*.nc") + my_vars = [clm_gdd_var, "GDDHARV"] + pattern = os.path.join(indir, f"*h2.{this_year-1}-01-01*.nc") h2_files = glob.glob(pattern) if not h2_files: - pattern = os.path.join(indir, f"*h2.{thisYear-1}-01-01*.nc.base") + pattern = os.path.join(indir, f"*h2.{this_year-1}-01-01*.nc.base") h2_files = glob.glob(pattern) if not h2_files: - error(logger, f"No files found matching pattern '*h2.{thisYear-1}-01-01*.nc(.base)'") - h2_ds = utils.import_ds( + error(logger, f"No files found matching pattern '*h2.{this_year-1}-01-01*.nc(.base)'") + h2_ds = import_ds( h2_files, - myVars=myVars, - myVegtypes=crops_to_read, + my_vars=my_vars, + my_vegtypes=crops_to_read, chunks=chunks, ) @@ -503,181 +553,209 @@ def import_and_process_1yr( error(logger, f"All {clm_gdd_var} values are zero!") # Get standard datetime axis for outputs - Nyears = yN - y1 + 1 + n_years = year_n - year_1 + 1 if len(gddaccum_yp_list) == 0: - lastYear_active_patch_indices_list = [None for vegtype_str in incl_vegtypes_str] + last_year_active_patch_indices_list = [None for vegtype_str in incl_vegtypes_str] gddaccum_yp_list = [None for vegtype_str in incl_vegtypes_str] if save_figs: gddharv_yp_list = [None for vegtype_str in incl_vegtypes_str] incl_vegtype_indices = [] - for v, vegtype_str in enumerate(incl_vegtypes_str): + for var, vegtype_str in enumerate(incl_vegtypes_str): if vegtype_str in skip_crops: log(logger, f" SKIPPING {vegtype_str}") continue vegtype_int = utils.vegtype_str2int(vegtype_str)[0] - thisCrop_full_patchlist = list(utils.xr_flexsel(h2_ds, vegtype=vegtype_str).patch.values) + this_crop_full_patchlist = list(xr_flexsel(h2_ds, vegtype=vegtype_str).patch.values) # Get time series for each patch of this type - thisCrop_ds = utils.xr_flexsel(h2_incl_ds, vegtype=vegtype_str) - thisCrop_gddaccum_da = thisCrop_ds[clm_gdd_var] + this_crop_ds = xr_flexsel(h2_incl_ds, vegtype=vegtype_str) + this_crop_gddaccum_da = this_crop_ds[clm_gdd_var] if save_figs: - thisCrop_gddharv_da = thisCrop_ds["GDDHARV"] - if not thisCrop_gddaccum_da.size: + this_crop_gddharv_da = this_crop_ds["GDDHARV"] + if not this_crop_gddaccum_da.size: continue log(logger, f" {vegtype_str}...") - incl_vegtype_indices = incl_vegtype_indices + [v] + incl_vegtype_indices = incl_vegtype_indices + [var] # Get prescribed harvest dates for these patches - lon_points = thisCrop_ds.patches1d_lon.values - lat_points = thisCrop_ds.patches1d_lat.values - thisCrop_hdates_rx = thisCrop_map_to_patches(lon_points, lat_points, hdates_rx, vegtype_int) + lon_points = this_crop_ds.patches1d_lon.values + lat_points = this_crop_ds.patches1d_lat.values + this_crop_hdates_rx = this_crop_map_to_patches( + lon_points, lat_points, hdates_rx, vegtype_int + ) - if isinstance(gddaccum_yp_list[v], type(None)): - gddaccum_yp_list[v] = np.full((Nyears + 1, len(thisCrop_full_patchlist)), np.nan) + if isinstance(gddaccum_yp_list[var], type(None)): + gddaccum_yp_list[var] = np.full((n_years + 1, len(this_crop_full_patchlist)), np.nan) if save_figs: - gddharv_yp_list[v] = np.full((Nyears + 1, len(thisCrop_full_patchlist)), np.nan) + gddharv_yp_list[var] = np.full((n_years + 1, len(this_crop_full_patchlist)), np.nan) # Get the accumulated GDDs at each prescribed harvest date - gddaccum_atharv_p = np.full(thisCrop_hdates_rx.shape, np.nan) + gddaccum_atharv_p = np.full(this_crop_hdates_rx.shape, np.nan) if save_figs: - gddharv_atharv_p = np.full(thisCrop_hdates_rx.shape, np.nan) - unique_rx_hdates = np.unique(thisCrop_hdates_rx.values) + gddharv_atharv_p = np.full(this_crop_hdates_rx.shape, np.nan) + unique_rx_hdates = np.unique(this_crop_hdates_rx.values) # Build an indexing tuple patches = [] i_patches = [] i_times = [] - for i, hdate in enumerate(unique_rx_hdates): - here = np.where(thisCrop_hdates_rx.values == hdate)[0] - patches += list(thisCrop_gddaccum_da.patch.values[here]) + for hdate in unique_rx_hdates: + here = np.where(this_crop_hdates_rx.values == hdate)[0] + patches += list(this_crop_gddaccum_da.patch.values[here]) i_patches += list(here) i_times += list(np.full((len(here),), int(hdate - 1))) # Sort back to correct order if not np.all( - thisCrop_gddaccum_da.patch.values[:-1] <= thisCrop_gddaccum_da.patch.values[1:] + this_crop_gddaccum_da.patch.values[:-1] <= this_crop_gddaccum_da.patch.values[1:] ): error(logger, "This code depends on DataArray patch list being sorted.") sortorder = np.argsort(patches) i_patches = list(np.array(i_patches)[np.array(sortorder)]) i_times = list(np.array(i_times)[np.array(sortorder)]) # Select using the indexing tuple - gddaccum_atharv_p = thisCrop_gddaccum_da.values[(i_times, i_patches)] + gddaccum_atharv_p = this_crop_gddaccum_da.values[(i_times, i_patches)] if save_figs: - gddharv_atharv_p = thisCrop_gddharv_da.values[(i_times, i_patches)] + gddharv_atharv_p = this_crop_gddharv_da.values[(i_times, i_patches)] if np.any(np.isnan(gddaccum_atharv_p)): log( logger, - f" ❗ {np.sum(np.isnan(gddaccum_atharv_p))}/{len(gddaccum_atharv_p)} NaN after extracting GDDs accumulated at harvest", + f" ❗ {np.sum(np.isnan(gddaccum_atharv_p))}/{len(gddaccum_atharv_p)} " + + "NaN after extracting GDDs accumulated at harvest", ) if save_figs and np.any(np.isnan(gddharv_atharv_p)): log( logger, - f" ❗ {np.sum(np.isnan(gddharv_atharv_p))}/{len(gddharv_atharv_p)} NaN after extracting GDDHARV", + f" ❗ {np.sum(np.isnan(gddharv_atharv_p))}/{len(gddharv_atharv_p)} " + + "NaN after extracting GDDHARV", ) # Assign these to growing seasons based on whether gs crossed new year - thisYear_active_patch_indices = [ - thisCrop_full_patchlist.index(x) for x in thisCrop_ds.patch.values + this_year_active_patch_indices = [ + this_crop_full_patchlist.index(x) for x in this_crop_ds.patch.values ] - thisCrop_sdates_rx = thisCrop_map_to_patches(lon_points, lat_points, sdates_rx, vegtype_int) - where_gs_thisyr = np.where(thisCrop_sdates_rx < thisCrop_hdates_rx)[0] - tmp_gddaccum = np.full(thisCrop_sdates_rx.shape, np.nan) + this_crop_sdates_rx = this_crop_map_to_patches( + lon_points, lat_points, sdates_rx, vegtype_int + ) + where_gs_thisyr = np.where(this_crop_sdates_rx < this_crop_hdates_rx)[0] + tmp_gddaccum = np.full(this_crop_sdates_rx.shape, np.nan) tmp_gddaccum[where_gs_thisyr] = gddaccum_atharv_p[where_gs_thisyr] if save_figs: tmp_gddharv = np.full(tmp_gddaccum.shape, np.nan) tmp_gddharv[where_gs_thisyr] = gddharv_atharv_p[where_gs_thisyr] - if y > 0: - lastYear_active_patch_indices = lastYear_active_patch_indices_list[v] - where_gs_lastyr = np.where(thisCrop_sdates_rx > thisCrop_hdates_rx)[0] - active_thisYear_where_gs_lastyr_indices = [ - thisYear_active_patch_indices[x] for x in where_gs_lastyr + if year_index > 0: + last_year_active_patch_indices = last_year_active_patch_indices_list[var] + where_gs_lastyr = np.where(this_crop_sdates_rx > this_crop_hdates_rx)[0] + active_this_year_where_gs_lastyr_indices = [ + this_year_active_patch_indices[x] for x in where_gs_lastyr ] - if not np.array_equal(lastYear_active_patch_indices, thisYear_active_patch_indices): + if not np.array_equal(last_year_active_patch_indices, this_year_active_patch_indices): if incorrectly_daily: log( logger, - " ❗ This year's active patch indices differ from last year's. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ This year's active patch indices differ from last year's. " + + "Allowing because this might just be an artifact of incorrectly daily " + + "outputs, BUT RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "This year's active patch indices differ from last year's.") # Make sure we're not about to overwrite any existing values. if np.any( - ~np.isnan(gddaccum_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices]) + ~np.isnan( + gddaccum_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] + ) ): if incorrectly_daily: log( logger, - " ❗ Unexpected non-NaN for last season's GDD accumulation. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ Unexpected non-NaN for last season's GDD accumulation. " + + "Allowing because this might just be an artifact of incorrectly daily " + + "outputs, BUT RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "Unexpected non-NaN for last season's GDD accumulation") if save_figs and np.any( - ~np.isnan(gddharv_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices]) + ~np.isnan( + gddharv_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] + ) ): if incorrectly_daily: log( logger, - " ❗ Unexpected non-NaN for last season's GDDHARV. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ Unexpected non-NaN for last season's GDDHARV. Allowing " + + "because this might just be an artifact of incorrectly daily outputs, " + + "BUT RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "Unexpected non-NaN for last season's GDDHARV") # Fill. - gddaccum_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices] = gddaccum_atharv_p[ - where_gs_lastyr - ] + gddaccum_yp_list[var][ + year_index - 1, active_this_year_where_gs_lastyr_indices + ] = gddaccum_atharv_p[where_gs_lastyr] if save_figs: - gddharv_yp_list[v][ - y - 1, active_thisYear_where_gs_lastyr_indices + gddharv_yp_list[var][ + year_index - 1, active_this_year_where_gs_lastyr_indices ] = gddharv_atharv_p[where_gs_lastyr] # Last year's season should be filled out now; make sure. if np.any( - np.isnan(gddaccum_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices]) + np.isnan( + gddaccum_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] + ) ): if incorrectly_daily: log( logger, - " ❗ Unexpected NaN for last season's GDD accumulation. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ Unexpected NaN for last season's GDD accumulation. Allowing " + + "because this might just be an artifact of incorrectly daily outputs, " + + "BUT RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "Unexpected NaN for last season's GDD accumulation.") if save_figs and np.any( - np.isnan(gddharv_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices]) + np.isnan( + gddharv_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] + ) ): if incorrectly_daily: log( logger, - " ❗ Unexpected NaN for last season's GDDHARV. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ Unexpected NaN for last season's GDDHARV. Allowing because " + + "this might just be an artifact of incorrectly daily outputs, BUT " + + "RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "Unexpected NaN for last season's GDDHARV.") - gddaccum_yp_list[v][y, thisYear_active_patch_indices] = tmp_gddaccum + gddaccum_yp_list[var][year_index, this_year_active_patch_indices] = tmp_gddaccum if save_figs: - gddharv_yp_list[v][y, thisYear_active_patch_indices] = tmp_gddharv + gddharv_yp_list[var][year_index, this_year_active_patch_indices] = tmp_gddharv - # Make sure that NaN masks are the same for this year's sdates and 'filled-out' GDDs from last year - if y > 0: + # Make sure that NaN masks are the same for this year's sdates and 'filled-out' GDDs from + # last year + if year_index > 0: nanmask_output_sdates = np.isnan( dates_ds.SDATES.isel( mxsowings=0, patch=np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] ).values ) - nanmask_output_gdds_lastyr = np.isnan(gddaccum_yp_list[v][y - 1, :]) + nanmask_output_gdds_lastyr = np.isnan(gddaccum_yp_list[var][year_index - 1, :]) if not np.array_equal(nanmask_output_gdds_lastyr, nanmask_output_sdates): if incorrectly_daily: log( logger, - " ❗ NaN masks differ between this year's sdates and 'filled-out' GDDs from last year. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ NaN masks differ between this year's sdates and 'filled-out' " + + "GDDs from last year. Allowing because this might just be an artifact of " + + "incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", ) else: error( logger, - "NaN masks differ between this year's sdates and 'filled-out' GDDs from last year", + "NaN masks differ between this year's sdates and 'filled-out' GDDs from " + + "last year", ) - lastYear_active_patch_indices_list[v] = thisYear_active_patch_indices + last_year_active_patch_indices_list[var] = this_year_active_patch_indices - skip_patches_for_isel_nan_lastyear = skip_patches_for_isel_nan + skip_patches_for_isel_nan_last_year = skip_patches_for_isel_nan # Could save space by only saving variables needed for gridding log(logger, " Saving h2_ds...") @@ -689,8 +767,8 @@ def import_and_process_1yr( hdates_rx, gddaccum_yp_list, gddharv_yp_list, - skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + skip_patches_for_isel_nan_last_year, + last_year_active_patch_indices_list, incorrectly_daily, incl_vegtypes_str, incl_patches1d_itype_veg, @@ -698,35 +776,37 @@ def import_and_process_1yr( ) -def get_multicrop_maps(ds, theseVars, crop_fracs_yx, dummy_fill, gdd_units): +def get_multicrop_maps(this_ds, these_vars, crop_fracs_yx, dummy_fill, gdd_units): + # pylint: disable=missing-function-docstring # Get GDDs for these crops - da_eachCFT = xr.concat((ds[x] for i, x in enumerate(theseVars)), dim="cft") - if "time" in ds.dims: - da_eachCFT = da_eachCFT.isel(time=0, drop=True) - da_eachCFT = da_eachCFT.where(da_eachCFT != dummy_fill) - da_eachCFT.attrs["units"] = gdd_units + da_each_cft = xr.concat((this_ds[x] for i, x in enumerate(these_vars)), dim="cft") + if "time" in this_ds.dims: + da_each_cft = da_each_cft.isel(time=0, drop=True) + da_each_cft = da_each_cft.where(da_each_cft != dummy_fill) + da_each_cft.attrs["units"] = gdd_units # What are the maximum differences seen between different crop types? - if len(theseVars) > 1: - maxDiff = np.nanmax(da_eachCFT.max(dim="cft") - da_eachCFT.min(dim="cft")) - if maxDiff > 0: - print(f" Max difference among crop types: {np.round(maxDiff)}") + if len(these_vars) > 1: + max_diff = np.nanmax(da_each_cft.max(dim="cft") - da_each_cft.min(dim="cft")) + if max_diff > 0: + print(f" Max difference among crop types: {np.round(max_diff)}") if crop_fracs_yx is None: - return da_eachCFT.isel(cft=0, drop=True) + return da_each_cft.isel(cft=0, drop=True) # Warn if GDD is NaN anywhere that there is area - da_eachCFT["cft"] = crop_fracs_yx["cft"] - gddNaN_areaPos = np.isnan(da_eachCFT) & (crop_fracs_yx > 0) - if np.any(gddNaN_areaPos): - total_bad_croparea = np.nansum(crop_fracs_yx.where(gddNaN_areaPos).values) + da_each_cft["cft"] = crop_fracs_yx["cft"] + gdd_nan_area_pos = np.isnan(da_each_cft) & (crop_fracs_yx > 0) + if np.any(gdd_nan_area_pos): + total_bad_croparea = np.nansum(crop_fracs_yx.where(gdd_nan_area_pos).values) total_croparea = np.nansum(crop_fracs_yx.values) print( - f" GDD reqt NaN but area positive ({np.round(total_bad_croparea/total_croparea*100, 1)}% of this crop's area)" + " GDD reqt NaN but area positive " + + f"({np.round(total_bad_croparea/total_croparea*100, 1)}% of this crop's area)" ) # Get areas and weights, masking cell-crops with NaN GDDs - crop_fracs_yx = crop_fracs_yx.where(~np.isnan(da_eachCFT)) + crop_fracs_yx = crop_fracs_yx.where(~np.isnan(da_each_cft)) crop_area_yx = crop_fracs_yx.sum(dim="cft") weights_yx = crop_fracs_yx / crop_area_yx weights_sum_gt0 = weights_yx.sum(dim="cft").where(weights_yx > 0) @@ -734,45 +814,48 @@ def get_multicrop_maps(ds, theseVars, crop_fracs_yx, dummy_fill, gdd_units): assert np.isclose(np.nanmax(weights_sum_gt0.values), 1.0) # Mask GDDs and weights where there is no area - da_eachCFT = da_eachCFT.where(crop_fracs_yx > 0) - if len(theseVars) == 1: - return da_eachCFT.isel(cft=0, drop=True) + da_each_cft = da_each_cft.where(crop_fracs_yx > 0) + if len(these_vars) == 1: + return da_each_cft.isel(cft=0, drop=True) weights_yx = weights_yx.where(crop_fracs_yx > 0) weights_sum = weights_yx.sum(dim="cft").where(crop_area_yx > 0) assert np.isclose(np.nanmin(weights_sum.values), 1.0) assert np.isclose(np.nanmax(weights_sum.values), 1.0) # Ensure grid match between GDDs and weights - if not np.array_equal(da_eachCFT["lon"].values, weights_yx["lon"].values): + if not np.array_equal(da_each_cft["lon"].values, weights_yx["lon"].values): raise RuntimeError("lon mismatch") - if not np.array_equal(da_eachCFT["lat"].values, weights_yx["lat"].values): + if not np.array_equal(da_each_cft["lat"].values, weights_yx["lat"].values): raise RuntimeError("lat mismatch") # Get area-weighted mean GDD requirements for all crops - da = (da_eachCFT * weights_yx).sum(dim="cft") - da.attrs["units"] = gdd_units - da = da.where(crop_area_yx > 0) + this_da = (da_each_cft * weights_yx).sum(dim="cft") + this_da.attrs["units"] = gdd_units + this_da = this_da.where(crop_area_yx > 0) # Ensure that weighted mean is between each cell's min and max - whereBad = (da < da_eachCFT.min(dim="cft")) | (da > da_eachCFT.max(dim="cft")) - if np.any(whereBad): - where_belowMin = da.where(da < da_eachCFT.min(dim="cft")) - worst_belowMin = np.min((da_eachCFT.min(dim="cft") - where_belowMin).values) - where_aboveMax = da.where(da > da_eachCFT.max(dim="cft")) - worst_aboveMax = np.max((where_aboveMax - da_eachCFT.max(dim="cft")).values) - worst = max(worst_belowMin, worst_aboveMax) + where_bad = (this_da < da_each_cft.min(dim="cft")) | (this_da > da_each_cft.max(dim="cft")) + if np.any(where_bad): + where_below_min = this_da.where(this_da < da_each_cft.min(dim="cft")) + worst_below_min = np.min((da_each_cft.min(dim="cft") - where_below_min).values) + where_above_max = this_da.where(this_da > da_each_cft.max(dim="cft")) + worst_above_max = np.max((where_above_max - da_each_cft.max(dim="cft")).values) + worst = max(worst_below_min, worst_above_max) tol = 1e-12 if worst > 1e-12: raise RuntimeError( f"Some value is outside expected range by {worst} (exceeds tolerance {tol})" ) - return da + return this_da -if can_plot: +if CAN_PLOT: def get_bounds_ncolors(gdd_spacing, diff_map_yx): + """ + Get information about color bar + """ vmax = np.floor(np.nanmax(diff_map_yx.values) / gdd_spacing) * gdd_spacing vmin = -vmax epsilon = np.nextafter(0, 1) @@ -781,11 +864,11 @@ def get_bounds_ncolors(gdd_spacing, diff_map_yx): bounds.remove(0) bounds[bounds.index(-gdd_spacing)] /= 2 bounds[bounds.index(gdd_spacing)] /= 2 - Ncolors = len(bounds) + 1 - return vmax, bounds, Ncolors + n_colors = len(bounds) + 1 + return vmax, bounds, n_colors - def make_map( - ax, + def make_gengdd_map( + this_axis, this_map, this_title, vmax, @@ -798,11 +881,14 @@ def make_map( cbar_ticks=None, vmin=None, ): + """ + Make maps + """ if bounds: if not cmap: raise RuntimeError("Calling make_map() with bounds requires cmap to be specified") norm = mcolors.BoundaryNorm(bounds, cmap.N, extend=extend) - im1 = ax.pcolormesh( + im1 = this_axis.pcolormesh( this_map.lon.values, this_map.lat.values, this_map, @@ -817,11 +903,11 @@ def make_map( if vmin is not None: raise RuntimeError("Do not specify vmin in this call of make_map()") vmin = -vmax - Ncolors = vmax / gdd_spacing - if Ncolors % 2 == 0: - Ncolors += 1 + n_colors = vmax / gdd_spacing + if n_colors % 2 == 0: + n_colors += 1 if not cmap: - cmap = cm.get_cmap(cropcal_colors["div_other_nonnorm"], Ncolors) + cmap = cm.get_cmap(cropcal_colors["div_other_nonnorm"], n_colors) if np.any(this_map.values > vmax) and np.any(this_map.values < vmin): extend = "both" @@ -838,15 +924,15 @@ def make_map( else: vmin = np.floor(vmin / 500) * 500 vmax = np.floor(vmax / 500) * 500 - Ncolors = int(vmax / 500) + n_colors = int(vmax / 500) if not cmap: - cmap = cm.get_cmap(cropcal_colors["seq_other"], Ncolors + 1) + cmap = cm.get_cmap(cropcal_colors["seq_other"], n_colors + 1) extend = "max" extend_color = cmap.colors[-1] - cmap = mcolors.ListedColormap(cmap.colors[:Ncolors]) + cmap = mcolors.ListedColormap(cmap.colors[:n_colors]) cmap.set_over(extend_color) - im1 = ax.pcolormesh( + im1 = this_axis.pcolormesh( this_map.lon.values, this_map.lat.values, this_map, @@ -856,9 +942,9 @@ def make_map( cmap=cmap, ) - ax.set_extent([-180, 180, -63, 90], crs=ccrs.PlateCarree()) - ax.coastlines(linewidth=0.3) - ax.set_title(this_title, fontsize=fontsize_titles, fontweight="bold", y=0.96) + this_axis.set_extent([-180, 180, -63, 90], crs=ccrs.PlateCarree()) + this_axis.coastlines(linewidth=0.3) + this_axis.set_title(this_title, fontsize=fontsize_titles, fontweight="bold", y=0.96) cbar = plt.colorbar( im1, orientation="horizontal", @@ -876,24 +962,30 @@ def make_map( ticks = np.arange(-60, 91, bin_width) ticklabels = [str(x) for x in ticks] - for i, x in enumerate(ticks): - if x % 2: + for i, tick in enumerate(ticks): + if tick % 2: ticklabels[i] = "" plt.yticks(np.arange(-60, 91, 15), labels=ticklabels, fontsize=fontsize_ticklabels) plt.axis("off") - def get_non_nans(in_da, fillValue): - in_da = in_da.where(in_da != fillValue) + def get_non_nans(in_da, fill_value): + """ + Get non-NaN, non-fill values of a DataArray + """ + in_da = in_da.where(in_da != fill_value) return in_da.values[~np.isnan(in_da.values)] - def set_boxplot_props(bp, color, linewidth): + def set_boxplot_props(bpl, color, linewidth): + """ + Set boxplot properties + """ linewidth = 1.5 - plt.setp(bp["boxes"], color=color, linewidth=linewidth) - plt.setp(bp["whiskers"], color=color, linewidth=linewidth) - plt.setp(bp["caps"], color=color, linewidth=linewidth) - plt.setp(bp["medians"], color=color, linewidth=linewidth) + plt.setp(bpl["boxes"], color=color, linewidth=linewidth) + plt.setp(bpl["whiskers"], color=color, linewidth=linewidth) + plt.setp(bpl["caps"], color=color, linewidth=linewidth) + plt.setp(bpl["medians"], color=color, linewidth=linewidth) plt.setp( - bp["fliers"], + bpl["fliers"], markeredgecolor=color, markersize=6, linewidth=linewidth, @@ -901,16 +993,19 @@ def set_boxplot_props(bp, color, linewidth): ) def make_plot(data, offset, linewidth): + """ + Make boxplot + """ offset = 0.4 * offset bpl = plt.boxplot( data, positions=np.array(range(len(data))) * 2.0 + offset, widths=0.6, - boxprops=dict(linewidth=linewidth), - whiskerprops=dict(linewidth=linewidth), - capprops=dict(linewidth=linewidth), - medianprops=dict(linewidth=linewidth), - flierprops=dict(markeredgewidth=0.5), + boxprops={"linewidth": linewidth}, + whiskerprops={"linewidth": linewidth}, + capprops={"linewidth": linewidth}, + medianprops={"linewidth": linewidth}, + flierprops={"markeredgewidth": 0.5}, ) return bpl @@ -921,26 +1016,31 @@ def make_figures( run1_name, run2_name, logger, - thisDir=None, + this_dir=None, gdd_maps_ds=None, gddharv_maps_ds=None, outdir_figs=None, linewidth=1.5, ): + """ + Make map-and-boxplot figures + """ if not gdd_maps_ds: - if not thisDir: + if not this_dir: error( logger, - "If not providing gdd_maps_ds, you must provide thisDir (location of gdd_maps.nc)", + "If not providing gdd_maps_ds, you must provide thisDir (location of " + + "gdd_maps.nc)", ) - gdd_maps_ds = xr.open_dataset(thisDir + "gdd_maps.nc") + gdd_maps_ds = xr.open_dataset(this_dir + "gdd_maps.nc") if not gddharv_maps_ds: - if not thisDir: + if not this_dir: error( logger, - "If not providing gddharv_maps_ds, you must provide thisDir (location of gddharv_maps.nc)", + "If not providing gddharv_maps_ds, you must provide thisDir (location of " + + "gddharv_maps.nc)", ) - gddharv_maps_ds = xr.open_dataset(thisDir + "gdd_maps.nc") + gddharv_maps_ds = xr.open_dataset(this_dir + "gdd_maps.nc") # Get info incl_vegtypes_str = gdd_maps_ds.attrs["incl_vegtypes_str"] @@ -952,19 +1052,19 @@ def make_figures( if not outdir_figs: outdir_figs = gdd_maps_ds.attrs["outdir_figs"] try: - y1 = gdd_maps_ds.attrs["y1"] - yN = gdd_maps_ds.attrs["yN"] + year_1 = gdd_maps_ds.attrs["y1"] + year_n = gdd_maps_ds.attrs["yN"] # Backwards compatibility with a bug (fixed 2023-01-03) - except: - y1 = gdd_maps_ds.attrs["first_season"] - yN = gdd_maps_ds.attrs["last_season"] + except KeyError: + year_1 = gdd_maps_ds.attrs["first_season"] + year_n = gdd_maps_ds.attrs["last_season"] # Import LU data, if doing so if land_use_file: - y1_lu = y1 if first_land_use_year == None else first_land_use_year - yN_lu = yN if last_land_use_year == None else last_land_use_year - lu_ds = cc.open_lu_ds(land_use_file, y1_lu, yN_lu, gdd_maps_ds, ungrid=False) - lu_years_text = f" (masked by {y1_lu}-{yN_lu} area)" - lu_years_file = f"_mask{y1_lu}-{yN_lu}" + year_1_lu = year_1 if first_land_use_year is None else first_land_use_year + year_n_lu = year_n if last_land_use_year is None else last_land_use_year + lu_ds = cc.open_lu_ds(land_use_file, year_1_lu, year_n_lu, gdd_maps_ds, ungrid=False) + lu_years_text = f" (masked by {year_1_lu}-{year_n_lu} area)" + lu_years_file = f"_mask{year_1_lu}-{year_n_lu}" else: lu_ds = None lu_years_text = "" @@ -980,11 +1080,11 @@ def make_figures( fontsize_axislabels = 12 fontsize_ticklabels = 12 - Nbins = len(lat_bin_edges) - 1 + n_bins = len(lat_bin_edges) - 1 bin_names = ["All"] - for b in np.arange(Nbins): - lower = lat_bin_edges[b] - upper = lat_bin_edges[b + 1] + for this_bin in np.arange(n_bins): + lower = lat_bin_edges[this_bin] + upper = lat_bin_edges[this_bin + 1] bin_names.append(f"{lower}–{upper}") color_old = cropcal_colors_cases(run1_name) @@ -996,13 +1096,13 @@ def make_figures( gdd_units = "GDD (°C • day)" # Maps - ny = 3 - nx = 1 + nplot_y = 3 + nplot_x = 1 log(logger, "Making before/after maps...") vegtype_list = incl_vegtypes_str if land_use_file: vegtype_list += ["Corn", "Cotton", "Rice", "Soybean", "Sugarcane", "Wheat"] - for v, vegtype_str in enumerate(vegtype_list): + for vegtype_str in vegtype_list: print(f"{vegtype_str}...") # Get component types @@ -1025,12 +1125,12 @@ def make_figures( else: crop_fracs_yx = None - theseVars = [f"gdd1_{x}" for x in vegtypes_int] + these_vars = [f"gdd1_{x}" for x in vegtypes_int] gddharv_map_yx = get_multicrop_maps( - gddharv_maps_ds, theseVars, crop_fracs_yx, dummy_fill, gdd_units + gddharv_maps_ds, these_vars, crop_fracs_yx, dummy_fill, gdd_units ) gdd_map_yx = get_multicrop_maps( - gdd_maps_ds, theseVars, crop_fracs_yx, dummy_fill, gdd_units + gdd_maps_ds, these_vars, crop_fracs_yx, dummy_fill, gdd_units ) # Get figure title @@ -1048,25 +1148,25 @@ def make_figures( # Set up figure and first subplot if layout == "3x1": fig = plt.figure(figsize=(7.5, 14)) - ax = fig.add_subplot(ny, nx, 1, projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(nplot_y, nplot_x, 1, projection=ccrs.PlateCarree()) elif layout == "2x2": fig = plt.figure(figsize=(12, 6)) spec = fig.add_gridspec(nrows=2, ncols=2, width_ratios=[0.4, 0.6]) - ax = fig.add_subplot(spec[0, 0], projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(spec[0, 0], projection=ccrs.PlateCarree()) elif layout == "3x2": fig = plt.figure(figsize=(14, 9)) spec = fig.add_gridspec(nrows=3, ncols=2, width_ratios=[0.5, 0.5], wspace=0.2) - ax = fig.add_subplot(spec[0, 0], projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(spec[0, 0], projection=ccrs.PlateCarree()) else: error(logger, f"layout {layout} not recognized") - thisMin = int(np.round(np.nanmin(gddharv_map_yx))) - thisMax = int(np.round(np.nanmax(gddharv_map_yx))) - thisTitle = f"{run1_name} (range {thisMin}–{thisMax})" - make_map( - ax, + this_min = int(np.round(np.nanmin(gddharv_map_yx))) + this_max = int(np.round(np.nanmax(gddharv_map_yx))) + this_title = f"{run1_name} (range {this_min}–{this_max})" + make_gengdd_map( + this_axis, gddharv_map_yx, - thisTitle, + this_title, vmax, bin_width, fontsize_ticklabels, @@ -1075,18 +1175,18 @@ def make_figures( ) if layout == "3x1": - ax = fig.add_subplot(ny, nx, 2, projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(nplot_y, nplot_x, 2, projection=ccrs.PlateCarree()) elif layout in ["2x2", "3x2"]: - ax = fig.add_subplot(spec[1, 0], projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(spec[1, 0], projection=ccrs.PlateCarree()) else: error(logger, f"layout {layout} not recognized") - thisMin = int(np.round(np.nanmin(gdd_map_yx))) - thisMax = int(np.round(np.nanmax(gdd_map_yx))) - thisTitle = f"{run2_name} (range {thisMin}–{thisMax})" - make_map( - ax, + this_min = int(np.round(np.nanmin(gdd_map_yx))) + this_max = int(np.round(np.nanmax(gdd_map_yx))) + this_title = f"{run2_name} (range {this_min}–{this_max})" + make_gengdd_map( + this_axis, gdd_map_yx, - thisTitle, + this_title, vmax, bin_width, fontsize_ticklabels, @@ -1096,22 +1196,22 @@ def make_figures( # Difference if layout == "3x2": - ax = fig.add_subplot(spec[2, 0], projection=ccrs.PlateCarree()) - thisMin = int(np.round(np.nanmin(gdd_map_yx))) - thisMax = int(np.round(np.nanmax(gdd_map_yx))) - thisTitle = f"{run2_name} minus {run1_name}" + this_axis = fig.add_subplot(spec[2, 0], projection=ccrs.PlateCarree()) + this_min = int(np.round(np.nanmin(gdd_map_yx))) + this_max = int(np.round(np.nanmax(gdd_map_yx))) + this_title = f"{run2_name} minus {run1_name}" diff_map_yx = gdd_map_yx - gddharv_map_yx diff_map_yx.attrs["units"] = gdd_units gdd_spacing = 500 - vmax, bounds, Ncolors = get_bounds_ncolors(gdd_spacing, diff_map_yx) - if Ncolors < 9: + vmax, bounds, n_colors = get_bounds_ncolors(gdd_spacing, diff_map_yx) + if n_colors < 9: gdd_spacing = 250 - vmax, bounds, Ncolors = get_bounds_ncolors(gdd_spacing, diff_map_yx) + vmax, bounds, n_colors = get_bounds_ncolors(gdd_spacing, diff_map_yx) - cmap = cm.get_cmap(cropcal_colors["div_other_nonnorm"], Ncolors) + cmap = cm.get_cmap(cropcal_colors["div_other_nonnorm"], n_colors) cbar_ticks = [] - include_0bin_ticks = Ncolors <= 13 + include_0bin_ticks = n_colors <= 13 if vmax <= 3000: tick_spacing = gdd_spacing * 2 elif vmax <= 5000: @@ -1119,17 +1219,19 @@ def make_figures( else: tick_spacing = 2000 previous = -np.inf - for x in bounds: - if (not include_0bin_ticks) and (x > 0) and (previous < 0): + for bound in bounds: + if (not include_0bin_ticks) and (previous < 0 < bound): cbar_ticks.append(0) - if x % tick_spacing == 0 or (include_0bin_ticks and abs(x) == gdd_spacing / 2): - cbar_ticks.append(x) - previous = x - - make_map( - ax, + if bound % tick_spacing == 0 or ( + include_0bin_ticks and abs(bound) == gdd_spacing / 2 + ): + cbar_ticks.append(bound) + previous = bound + + make_gengdd_map( + this_axis, diff_map_yx, - thisTitle, + this_title, vmax, bin_width, fontsize_ticklabels, @@ -1148,25 +1250,25 @@ def make_figures( lat_abs = np.abs(gdd_map_yx.lat.values) gdd_bybin_old = [gddharv_vector] gdd_bybin_new = [gdd_vector] - for b in np.arange(Nbins): - lower = lat_bin_edges[b] - upper = lat_bin_edges[b + 1] + for this_bin in np.arange(n_bins): + lower = lat_bin_edges[this_bin] + upper = lat_bin_edges[this_bin + 1] lat_inds = np.where((lat_abs >= lower) & (lat_abs < upper))[0] - gdd_vector_thisBin = get_non_nans(gdd_map_yx[lat_inds, :], dummy_fill) - gddharv_vector_thisBin = get_non_nans(gddharv_map_yx[lat_inds, :], dummy_fill) - gdd_bybin_old.append(gddharv_vector_thisBin) - gdd_bybin_new.append(gdd_vector_thisBin) + this_bin_gdd_vector = get_non_nans(gdd_map_yx[lat_inds, :], dummy_fill) + this_bin_gddharv_vector = get_non_nans(gddharv_map_yx[lat_inds, :], dummy_fill) + gdd_bybin_old.append(this_bin_gddharv_vector) + gdd_bybin_new.append(this_bin_gdd_vector) if layout == "3x1": - ax = fig.add_subplot(ny, nx, 3) + this_axis = fig.add_subplot(nplot_y, nplot_x, 3) elif layout in ["2x2", "3x2"]: - ax = fig.add_subplot(spec[:, 1]) + this_axis = fig.add_subplot(spec[:, 1]) else: error(logger, f"layout {layout} not recognized") # Shift bottom of plot up to make room for legend - ax_pos = ax.get_position() - ax.set_position(Bbox.from_extents(ax_pos.x0, 0.19, ax_pos.x1, ax_pos.y1)) + ax_pos = this_axis.get_position() + this_axis.set_position(Bbox.from_extents(ax_pos.x0, 0.19, ax_pos.x1, ax_pos.y1)) # Define legend position legend_bbox_to_anchor = (0, -0.15, 1, 0.2) @@ -1188,13 +1290,13 @@ def make_figures( plt.xticks(range(0, len(bin_names) * 2, 2), bin_names, fontsize=fontsize_ticklabels) plt.yticks(fontsize=fontsize_ticklabels) - ax.spines["right"].set_visible(False) - ax.spines["top"].set_visible(False) + this_axis.spines["right"].set_visible(False) + this_axis.spines["top"].set_visible(False) plt.xlabel("Latitude zone (absolute value)", fontsize=fontsize_axislabels) plt.ylabel(gdd_units, fontsize=fontsize_axislabels) - ax.yaxis.set_label_coords(-0.11, 0.5) - plt.title(f"Zonal changes", fontsize=fontsize_titles, fontweight="bold") + this_axis.yaxis.set_label_coords(-0.11, 0.5) + plt.title("Zonal changes", fontsize=fontsize_titles, fontweight="bold") plt.suptitle( f"Maturity requirements: {vegtype_str_title}" + lu_years_text, @@ -1205,10 +1307,13 @@ def make_figures( if vegtype_str in incl_vegtypes_str: outfile = os.path.join( - outdir_figs, f"{theseVars[0]}_{vegtype_str}_gs{y1}-{yN}{lu_years_file}.png" + outdir_figs, + f"{these_vars[0]}_{vegtype_str}_gs{year_1}-{year_n}{lu_years_file}.png", ) else: - outfile = os.path.join(outdir_figs, f"{vegtype_str}_gs{y1}-{yN}{lu_years_file}.png") + outfile = os.path.join( + outdir_figs, f"{vegtype_str}_gs{year_1}-{year_n}{lu_years_file}.png" + ) plt.savefig(outfile, dpi=300, transparent=False, facecolor="white", bbox_inches="tight") plt.close() diff --git a/python/ctsm/crop_calendars/grid_one_variable.py b/python/ctsm/crop_calendars/grid_one_variable.py new file mode 100644 index 0000000000..cb5d330032 --- /dev/null +++ b/python/ctsm/crop_calendars/grid_one_variable.py @@ -0,0 +1,179 @@ +""" +Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, +lon) of one variable within a Dataset. + +- Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices + along the specified ax(ie)s. +- fill_value: Default None means grid will be filled with NaN, unless the variable in question + already has a _FillValue, in which case that will be used. +""" +import numpy as np +import xarray as xr +from ctsm.crop_calendars.xr_flexsel import xr_flexsel + + +def get_thisvar_da(var, this_ds): + """ + Return a DataArray, with defined coordinates, for a given variable in a dataset. + """ + # Make DataArray for this variable + thisvar_da = np.array(this_ds.variables[var]) + these_dims = this_ds.variables[var].dims + thisvar_da = xr.DataArray(thisvar_da, dims=these_dims) + + # Define coordinates of this variable's DataArray + dims_dict = dict() + for dim in these_dims: + dims_dict[dim] = this_ds[dim] + thisvar_da = thisvar_da.assign_coords(dims_dict) + thisvar_da.attrs = this_ds[var].attrs + + return thisvar_da + + +def convert_to_da(this_ds, var, fill_value, thisvar_da, new_dims, thisvar_gridded): + """ + Convert Numpy array to DataArray with coordinates, attributes and name + """ + thisvar_gridded = xr.DataArray(thisvar_gridded, dims=tuple(new_dims), attrs=thisvar_da.attrs) + for dim in new_dims: + if dim == "ivt_str": + values = this_ds.vegtype_str.values + elif dim in thisvar_da.coords: + values = thisvar_da[dim] + else: + values = this_ds[dim].values + thisvar_gridded = thisvar_gridded.assign_coords({dim: values}) + thisvar_gridded.name = var + + # Add FillValue attribute + if fill_value: + thisvar_gridded.attrs["_FillValue"] = fill_value + return thisvar_gridded + + +def grid_the_data(thisvar_da, vt_da, ixy_da, jxy_da, new_dims, thisvar_gridded): + """ + Fill lat-lon array with previously-ungridded data + """ + fill_indices = [] + for dim in new_dims: + if dim == "lat": + fill_indices.append(jxy_da.values.astype(int) - 1) + elif dim == "lon": + fill_indices.append(ixy_da.values.astype(int) - 1) + elif dim == "ivt_str": + fill_indices.append(vt_da) + elif not fill_indices: + # I.e., if fill_indices is empty. Could also do "elif len(fill_indices)==0". + fill_indices.append(Ellipsis) + try: + thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values + except: # pylint: disable=bare-except + thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values.transpose() + if not np.any(np.bitwise_not(np.isnan(thisvar_gridded))): + if np.all(np.isnan(thisvar_da.values)): + print("Warning: This DataArray (and thus map) is all NaN") + else: + raise RuntimeError("thisvar_gridded was not filled!") + + +def create_filled_array(this_ds, fill_value, thisvar_da, new_dims): + """ + Create a Numpy array to be filled with gridded data + """ + dim_size_list = [] + for dim in new_dims: + if dim == "ivt_str": + dim_size = this_ds.sizes["ivt"] + elif dim in thisvar_da.coords: + dim_size = thisvar_da.sizes[dim] + else: + dim_size = this_ds.sizes[dim] + dim_size_list = dim_size_list + [dim_size] + thisvar_gridded = np.empty(dim_size_list) + if fill_value: + thisvar_gridded[:] = fill_value + else: + thisvar_gridded[:] = np.NaN + return thisvar_gridded + + +def get_ixy_jxy_das(this_ds, var): + """ + Get DataArrays needed for gridding + """ + thisvar_da = get_thisvar_da(var, this_ds) + vt_da = None + if "patch" in thisvar_da.dims: + spatial_unit = "patch" + xy_1d_prefix = "patches" + if "patches1d_itype_veg" in this_ds: + vt_da = get_thisvar_da("patches1d_itype_veg", this_ds) + elif "gridcell" in thisvar_da.dims: + spatial_unit = "gridcell" + xy_1d_prefix = "grid" + else: + raise RuntimeError( + f"What variables to use for _ixy and _jxy of variable with dims {thisvar_da.dims}?" + ) + ixy_da = get_thisvar_da(xy_1d_prefix + "1d_ixy", this_ds) + jxy_da = get_thisvar_da(xy_1d_prefix + "1d_jxy", this_ds) + return thisvar_da, vt_da, spatial_unit, ixy_da, jxy_da + + +def get_new_dim_list(this_ds, thisvar_da, spatial_unit): + """ + Get new dimension list + """ + new_dims = list(thisvar_da.dims) + ### Remove "[spatial_unit]". + if spatial_unit in new_dims: + new_dims.remove(spatial_unit) + # Add "ivt_str" (vegetation type, as string). This needs to go at the end, to avoid a possible + # situation where you wind up with multiple Ellipsis members of fill_indices. + if "ivt" in this_ds and spatial_unit == "patch": + new_dims.append("ivt_str") + ### Add lat and lon to end of list + new_dims = new_dims + ["lat", "lon"] + return new_dims + + +def grid_one_variable(this_ds, var, fill_value=None, **kwargs): + """ + Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, + lon) of one variable within a Dataset. + + - Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices + along the specified ax(ie)s. + - fill_value: Default None means grid will be filled with NaN, unless the variable in question + already has a _FillValue, in which case that will be used. + """ + # Get this Dataset's values for selection(s), if provided + this_ds = xr_flexsel(this_ds, **kwargs) + + # Get DataArrays needed for gridding + thisvar_da, vt_da, spatial_unit, ixy_da, jxy_da = get_ixy_jxy_das(this_ds, var) + + if not fill_value and "_FillValue" in thisvar_da.attrs: + fill_value = thisvar_da.attrs["_FillValue"] + + # Renumber vt_da to work as indices on new ivt dimension, if needed. + ### Ensures that the unique set of vt_da values begins with 1 and + ### contains no missing steps. + if "ivt" in this_ds and vt_da is not None: + vt_da.values = np.array([np.where(this_ds.ivt.values == x)[0][0] for x in vt_da.values]) + + # Get new dimension list + new_dims = get_new_dim_list(this_ds, thisvar_da, spatial_unit) + + # Create a Numpy array to be filled with gridded data + thisvar_gridded = create_filled_array(this_ds, fill_value, thisvar_da, new_dims) + + # Fill lat-lon array with previously-ungridded data + grid_the_data(thisvar_da, vt_da, ixy_da, jxy_da, new_dims, thisvar_gridded) + + # Convert Numpy array to DataArray with coordinates, attributes and name + thisvar_gridded = convert_to_da(this_ds, var, fill_value, thisvar_da, new_dims, thisvar_gridded) + + return thisvar_gridded diff --git a/python/ctsm/crop_calendars/import_ds.py b/python/ctsm/crop_calendars/import_ds.py new file mode 100644 index 0000000000..77a22b626b --- /dev/null +++ b/python/ctsm/crop_calendars/import_ds.py @@ -0,0 +1,267 @@ +""" +Import a dataset that can be spread over multiple files, only including specified variables +and/or vegetation types and/or timesteps, concatenating by time. + +- DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or + vegetation types. +""" +import re +import warnings +from importlib.util import find_spec +import numpy as np +import xarray as xr +import ctsm.crop_calendars.cropcal_utils as utils +from ctsm.crop_calendars.xr_flexsel import xr_flexsel + + +def compute_derived_vars(ds_in, var): + """ + Compute derived variables + """ + if ( + var == "HYEARS" + and "HDATES" in ds_in + and ds_in.HDATES.dims == ("time", "mxharvests", "patch") + ): + year_list = np.array([np.float32(x.year - 1) for x in ds_in.time.values]) + hyears = ds_in["HDATES"].copy() + hyears.values = np.tile( + np.expand_dims(year_list, (1, 2)), + (1, ds_in.dims["mxharvests"], ds_in.dims["patch"]), + ) + with np.errstate(invalid="ignore"): + is_le_zero = ~np.isnan(ds_in.HDATES.values) & (ds_in.HDATES.values <= 0) + hyears.values[is_le_zero] = ds_in.HDATES.values[is_le_zero] + hyears.values[np.isnan(ds_in.HDATES.values)] = np.nan + hyears.attrs["long_name"] = "DERIVED: actual crop harvest years" + hyears.attrs["units"] = "year" + ds_in["HYEARS"] = hyears + else: + raise RuntimeError(f"Unable to compute derived variable {var}") + return ds_in + + +def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): + """ + Function to drop unwanted variables in preprocessing of open_mfdataset(). + + - Makes sure to NOT drop any unspecified variables that will be useful in gridding. + - Also adds vegetation type info in the form of a DataArray of strings. + - Also renames "pft" dimension (and all like-named variables, e.g., pft1d_itype_veg_str) to be + named like "patch". This can later be reversed, for compatibility with other code, using + patch2pft(). + """ + # Rename "pft" dimension and variables to "patch", if needed + if "pft" in ds_in.dims: + pattern = re.compile("pft.*1d") + matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] + pft2patch_dict = {"pft": "patch"} + for match in matches: + pft2patch_dict[match] = match.replace("pft", "patch").replace("patchs", "patches") + ds_in = ds_in.rename(pft2patch_dict) + + derived_vars = [] + if vars_to_import is not None: + # Split vars_to_import into variables that are vs. aren't already in ds + derived_vars = [v for v in vars_to_import if v not in ds_in] + present_vars = [v for v in vars_to_import if v in ds_in] + vars_to_import = present_vars + + # Get list of dimensions present in variables in vars_to_import. + dim_list = [] + for var in vars_to_import: + # list(set(x)) returns a list of the unique items in x + dim_list = list(set(dim_list + list(ds_in.variables[var].dims))) + + # Get any _1d variables that are associated with those dimensions. These will be useful in + # gridding. Also, if any dimension is "pft", set up to rename it and all like-named + # variables to "patch" + oned_vars = [] + for dim in dim_list: + pattern = re.compile(f"{dim}.*1d") + matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] + oned_vars = list(set(oned_vars + matches)) + + # Add dimensions and _1d variables to vars_to_import + vars_to_import = list(set(vars_to_import + list(ds_in.dims) + oned_vars)) + + # Add any _bounds variables + bounds_vars = [] + for var in vars_to_import: + bounds_var = var + "_bounds" + if bounds_var in ds_in: + bounds_vars = bounds_vars + [bounds_var] + vars_to_import = vars_to_import + bounds_vars + + # Get list of variables to drop + varlist = list(ds_in.variables) + vars_to_drop = list(np.setdiff1d(varlist, vars_to_import)) + + # Drop them + ds_in = ds_in.drop_vars(vars_to_drop) + + # Add vegetation type info + if "patches1d_itype_veg" in list(ds_in): + this_pftlist = utils.define_pftlist() + utils.get_patch_ivts( + ds_in, this_pftlist + ) # Includes check of whether vegtype changes over time anywhere + vegtype_da = utils.get_vegtype_str_da(this_pftlist) + patches1d_itype_veg_str = vegtype_da.values[ + ds_in.isel(time=0).patches1d_itype_veg.values.astype(int) + ] + npatch = len(patches1d_itype_veg_str) + patches1d_itype_veg_str = xr.DataArray( + patches1d_itype_veg_str, + coords={"patch": np.arange(0, npatch)}, + dims=["patch"], + name="patches1d_itype_veg_str", + ) + ds_in = xr.merge([ds_in, vegtype_da, patches1d_itype_veg_str]) + + # Restrict to veg. types of interest, if any + if vegtypes_to_import is not None: + ds_in = xr_flexsel(ds_in, vegtype=vegtypes_to_import) + + # Restrict to time slice, if any + if time_slice: + ds_in = utils.safer_timeslice(ds_in, time_slice) + + # Finish import + ds_in = xr.decode_cf(ds_in, decode_times=True) + + # Compute derived variables + for var in derived_vars: + ds_in = compute_derived_vars(ds_in, var) + + return ds_in + + +def process_inputs(filelist, my_vars, my_vegtypes, my_vars_missing_ok): + """ + Process inputs to import_ds() + """ + if my_vars_missing_ok is None: + my_vars_missing_ok = [] + # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file + # in xr.open_mfdataset(). + if my_vegtypes is not None: + if not isinstance(my_vegtypes, list): + my_vegtypes = [my_vegtypes] + if isinstance(my_vegtypes[0], str): + my_vegtypes = utils.vegtype_str2int(my_vegtypes) + + # Same for these variables. + if my_vars is not None: + if not isinstance(my_vars, list): + my_vars = [my_vars] + if my_vars_missing_ok: + if not isinstance(my_vars_missing_ok, list): + my_vars_missing_ok = [my_vars_missing_ok] + + # Make sure lists are actually lists + if not isinstance(filelist, list): + filelist = [filelist] + if not isinstance(my_vars_missing_ok, list): + my_vars_missing_ok = [my_vars_missing_ok] + return filelist, my_vars, my_vegtypes, my_vars_missing_ok + + +def import_ds( + filelist, + my_vars=None, + my_vegtypes=None, + time_slice=None, + my_vars_missing_ok=None, + rename_lsmlatlon=False, + chunks=None, +): + """ + Import a dataset that can be spread over multiple files, only including specified variables + and/or vegetation types and/or timesteps, concatenating by time. + + - DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or + vegetation types. + """ + filelist, my_vars, my_vegtypes, my_vars_missing_ok = process_inputs( + filelist, my_vars, my_vegtypes, my_vars_missing_ok + ) + + # Remove files from list if they don't contain requested timesteps. + # time_slice should be in the format slice(start,end[,step]). start or end can be None to be + # unbounded on one side. Note that the standard slice() documentation suggests that only + # elements through end-1 will be selected, but that seems not to be the case in the xarray + # implementation. + if time_slice: + new_filelist = [] + for file in sorted(filelist): + filetime = xr.open_dataset(file).time + filetime_sel = utils.safer_timeslice(filetime, time_slice) + include_this_file = filetime_sel.size + if include_this_file: + new_filelist.append(file) + + # If you found some matching files, but then you find one that doesn't, stop going + # through the list. + elif new_filelist: + break + if not new_filelist: + raise RuntimeError(f"No files found in time_slice {time_slice}") + filelist = new_filelist + + # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one + # variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function + # allows this. Could also just allow mfdataset_preproc() to access my_vars and my_vegtypes + # directly, but that's bad practice as it could lead to scoping issues. + mfdataset_preproc_closure = lambda ds: mfdataset_preproc(ds, my_vars, my_vegtypes, time_slice) + + # Import + if isinstance(filelist, list) and len(filelist) == 1: + filelist = filelist[0] + if isinstance(filelist, list): + with warnings.catch_warnings(): + warnings.filterwarnings(action="ignore", category=DeprecationWarning) + if find_spec("dask") is None: + raise ModuleNotFoundError( + "You have asked xarray to import a list of files as a single Dataset using" + " open_mfdataset(), but this requires dask, which is not available.\nFile" + f" list: {filelist}" + ) + this_ds = xr.open_mfdataset( + sorted(filelist), + data_vars="minimal", + preprocess=mfdataset_preproc_closure, + compat="override", + coords="all", + concat_dim="time", + combine="nested", + chunks=chunks, + ) + elif isinstance(filelist, str): + this_ds = xr.open_dataset(filelist, chunks=chunks) + this_ds = mfdataset_preproc(this_ds, my_vars, my_vegtypes, time_slice) + this_ds = this_ds.compute() + + # Warn and/or error about variables that couldn't be imported or derived + if my_vars: + missing_vars = [v for v in my_vars if v not in this_ds] + ok_missing_vars = [v for v in missing_vars if v in my_vars_missing_ok] + bad_missing_vars = [v for v in missing_vars if v not in my_vars_missing_ok] + if ok_missing_vars: + print( + "Could not import some variables; either not present or not deriveable:" + f" {ok_missing_vars}" + ) + if bad_missing_vars: + raise RuntimeError( + "Could not import some variables; either not present or not deriveable:" + f" {bad_missing_vars}" + ) + + if rename_lsmlatlon: + if "lsmlat" in this_ds.dims: + this_ds = this_ds.rename({"lsmlat": "lat"}) + if "lsmlon" in this_ds.dims: + this_ds = this_ds.rename({"lsmlon": "lon"}) + + return this_ds diff --git a/python/ctsm/crop_calendars/process_ggcmi_shdates.py b/python/ctsm/crop_calendars/process_ggcmi_shdates.py index 835f91cb22..cada2b421b 100644 --- a/python/ctsm/crop_calendars/process_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/process_ggcmi_shdates.py @@ -1,16 +1,21 @@ -import numpy as np -import xarray as xr -import os -import datetime as dt -import cftime +""" +Convert GGCMI crop calendar files for use in CTSM +""" + import sys import argparse import logging +import os +import datetime as dt +import numpy as np +import xarray as xr +import cftime # -- add python/ctsm to path (needed if we want to run process_ggcmi_shdates stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) +# pylint: disable=wrong-import-position from ctsm import ctsm_logging import ctsm.crop_calendars.cropcal_utils as utils import ctsm.crop_calendars.regrid_ggcmi_shdates as regrid @@ -18,19 +23,28 @@ logger = logging.getLogger(__name__) -def get_cft(y): - return cftime.DatetimeNoLeap(y, 1, 1, 0, 0, 0, 0, has_year_zero=True) +def get_cft(year): + """ + Given a year, return the cftime.DatetimeNoLeap of Jan. 1 at 00:00. + """ + return cftime.DatetimeNoLeap(year, 1, 1, 0, 0, 0, 0, has_year_zero=True) -def get_dayssince_jan1y1(y1, y): - cft_y1 = get_cft(y1) - cft_y = get_cft(y) +def get_dayssince_jan1y1(year1, year): + """ + Get the number of days since Jan. 1 of year1 + """ + cft_y1 = get_cft(year1) + cft_y = get_cft(year) time_delta = cft_y - cft_y1 time_delta_secs = time_delta.total_seconds() return time_delta_secs / (60 * 60 * 24) def main(): + """ + main() function for calling process_ggcmi_shdates.py from command line. + """ ctsm_logging.setup_logging_pre_config() args = process_ggcmi_shdates_args() process_ggcmi_shdates( @@ -40,7 +54,6 @@ def main(): args.file_specifier, args.first_year, args.last_year, - args.verbose, args.ggcmi_author, args.regrid_resolution, args.regrid_template_file, @@ -50,8 +63,14 @@ def main(): def process_ggcmi_shdates_args(): + """ + Set up and parse input arguments for working with GGCMI crop calendar files + """ parser = argparse.ArgumentParser( - description="Converts raw sowing and harvest date files provided by GGCMI into a format that CLM can read, optionally at a target resolution." + description=( + "Converts raw sowing and harvest date files provided by GGCMI into " + + "a format that CLM can read, optionally at a target resolution." + ) ) # Required @@ -72,7 +91,10 @@ def process_ggcmi_shdates_args(): parser.add_argument( "-a", "--author", - help="String to be saved in author_thisfile attribute of output files. E.g., 'Author Name (authorname@ucar.edu)'", + help=( + "String to be saved in author_thisfile attribute of output files. " + + "E.g., 'Author Name (authorname@ucar.edu)'" + ), type=str, required=True, ) @@ -80,21 +102,30 @@ def process_ggcmi_shdates_args(): # Optional parser.add_argument( "--file-specifier", - help="String following CROP_IRR_ in input filenames. E.g., mai_ir_FILESPECIFIER.nc4. Will also be saved to output filenames.", + help=( + "String following CROP_IRR_ in input filenames. E.g., mai_ir_FILESPECIFIER.nc4. " + + "Will also be saved to output filenames." + ), type=str, default="ggcmi_crop_calendar_phase3_v1.01", ) parser.add_argument( "-y1", "--first-year", - help="First year in output files. Must be present in template file, unless it's the same as the last year.", + help=( + "First year in output files. Must be present in template file, " + + "unless it's the same as the last year." + ), type=int, default=2000, ) parser.add_argument( "-yN", "--last-year", - help="Last year in output files. Must be present in template file, unless it's the same as the first year.", + help=( + "Last year in output files. Must be present in template file, " + + "unless it's the same as the first year." + ), type=int, default=2000, ) @@ -117,53 +148,19 @@ def process_ggcmi_shdates_args(): return args -def process_ggcmi_shdates( - input_directory, - output_directory, - author, - file_specifier, - first_year, - last_year, - verbose, - ggcmi_author, - regrid_resolution, - regrid_template_file, - regrid_extension, - crop_list, -): - - input_directory = os.path.realpath(input_directory) - output_directory = os.path.realpath(output_directory) - - ############################################################ - ### Regrid original GGCMI files to target CLM resolution ### - ############################################################ - - regridded_ggcmi_files_dir = os.path.join( - output_directory, f"regridded_ggcmi_files-{regrid_resolution}" - ) +def setup_crop_dict(): + """ + Associate CLM crop names with (1) their integer counterpart and (2) their GGCMI counterpart. - regrid.regrid_ggcmi_shdates( - regrid_resolution, - regrid_template_file, - input_directory, - regridded_ggcmi_files_dir, - regrid_extension, - crop_list, - ) + Some notes: + - As "CLMname: {clm_num, thiscrop_ggcmi}" + - CLM names and numbers taken from commit 3dcbc7499a57904750a994672fc36b4221b9def5 + - Using one global GGCMI value for both temperate and tropical versions of corn and soybean. + - There is no GGCMI equivalent of CLM's winter barley and rye. Using winter wheat instead. + - Using GGCMI "pea" for CLM pulses, as suggested by GGCMI phase 3 protocol. + - Only using GGCMI "ri1" for rice; ignoring "ri2". + """ - ########################### - ### Define dictionaries ### - ########################### - - # First, we associate CLM crop names with (1) their integer counterpart and (2) their GGCMI counterpart. - # Some notes: - # - As "CLMname: {clm_num, thiscrop_ggcmi}" - # - CLM names and numbers taken from commit `3dcbc7499a57904750a994672fc36b4221b9def5` - # - Using one global GGCMI value for both temperate and tropical versions of corn and soybean. - # - There is no GGCMI equivalent of CLM's winter barley and rye. Using winter wheat instead. - # - Using GGCMI `pea` for CLM pulses, as suggested by GGCMI phase 3 protocol. - # - Only using GGCMI `ri1` for rice; ignoring `ri2`. def set_crop_dict(thisnum, thisname): return {"clm_num": thisnum, "thiscrop_ggcmi": thisname} @@ -234,8 +231,16 @@ def set_crop_dict(thisnum, thisname): "c3_irrigated": set_crop_dict(16, None), } - # Next, we associate CLM variable names with their GGCMI counterparts. We also save a placeholder for output file paths associated with each variable. - # As CLMname: {GGCMIname, output_file} + return crop_dict + + +def setup_var_dict(): + """ + Associate CLM variable names with their GGCMI counterparts. + - We also save a placeholder for output file paths associated with each variable. + - As CLMname: {GGCMIname, output_file} + """ + def set_var_dict(name_ggcmi, outfile): return {"name_ggcmi": name_ggcmi, "outfile": outfile} @@ -243,23 +248,178 @@ def set_var_dict(name_ggcmi, outfile): "sdate": set_var_dict("planting_day", ""), "hdate": set_var_dict("maturity_day", ""), } + return variable_dict + + +def set_var_attrs(thisvar_da, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue): + """ + Set output variable attributes + """ + + longname = thisvar_da.attrs["long_name"] + longname = longname.replace("rainfed", thiscrop_clm).replace("irrigated", thiscrop_clm) + thisvar_da.attrs["long_name"] = longname + + if thiscrop_ggcmi is None: + thisvar_da.attrs["crop_name_clm"] = "none" + thisvar_da.attrs["crop_name_ggcmi"] = "none" + else: + thisvar_da.attrs["crop_name_clm"] = thiscrop_clm + thisvar_da.attrs["crop_name_ggcmi"] = thiscrop_ggcmi + + thisvar_da.attrs["short_name_ggcmi"] = varname_ggcmi + thisvar_da.attrs["units"] = "day of year" + thisvar_da.encoding["_FillValue"] = new_fillvalue + + # scale_factor and add_offset are required by I/O library for short data + # From https://www.unidata.ucar.edu/software/netcdf/workshops/2010/bestpractices/Packing.html: + # unpacked_value = packed_value * scale_factor + add_offset + thisvar_da.attrs["scale_factor"] = np.int16(1) + thisvar_da.attrs["add_offset"] = np.int16(0) + return thisvar_da + + +def fill_convert_int(thisvar_ds, thiscrop_ggcmi, varname_ggcmi, new_fillvalue): + """ + Ensure fill value and real data are correct format + """ + dummyvalue = -1 + thisvar_ds.variables[varname_ggcmi].encoding["_FillValue"] = new_fillvalue + if thiscrop_ggcmi is None: + thisvar_ds.variables[varname_ggcmi].values.fill(dummyvalue) + else: + thisvar_ds.variables[varname_ggcmi].values[ + np.isnan(thisvar_ds.variables[varname_ggcmi].values) + ] = new_fillvalue + thisvar_ds.variables[varname_ggcmi].values = thisvar_ds.variables[ + varname_ggcmi + ].values.astype("int16") + + return thisvar_ds + + +def add_time_dim(thisvar_ds, template_ds, varname_ggcmi, varname_clm): + """ + Add time dimension (https://stackoverflow.com/a/62862440) + - Repeats original map for every timestep + - Probably not necessary to use this method, since I only end up extracting thisvar_ds.values + anyway---I could probably use some numpy method instead. + """ + + thisvar_ds = thisvar_ds.expand_dims(time=template_ds.time) + thisvar_da_tmp = thisvar_ds[varname_ggcmi] + thisvar_da = xr.DataArray( + data=thisvar_da_tmp.values.astype("int16"), + attrs=thisvar_da_tmp.attrs, + coords=thisvar_da_tmp.coords, + name=varname_clm, + ) + + return thisvar_da + + +def create_output_files( + regrid_resolution, + variable_dict, + output_directory, + file_specifier, + first_year, + last_year, + template_ds, +): + """ + Create output files, one for each variable + """ + datetime_string = dt.datetime.now().strftime("%year%m%d_%H%M%S") + nninterp_suffix = "nninterp-" + regrid_resolution + for var in variable_dict: + basename = ( + f"{var}s_{file_specifier}_{nninterp_suffix}." + + f"{first_year}-{last_year}.{datetime_string}.nc" + ) + outfile = os.path.join(output_directory, basename) + variable_dict[var]["outfile"] = outfile + template_ds.to_netcdf( + path=variable_dict[var]["outfile"], + format="NETCDF3_CLASSIC", + ) + + return nninterp_suffix + + +def strip_dataset(cropcal_ds, varname_ggcmi): + """ + Remove all variables except one from Dataset + """ + droplist = [] + for i in list(cropcal_ds.keys()): + if i != varname_ggcmi: + droplist.append(i) + thisvar_ds = cropcal_ds.drop(droplist) + return thisvar_ds + + +def process_ggcmi_shdates( + input_directory, + output_directory, + author, + file_specifier, + first_year, + last_year, + ggcmi_author, + regrid_resolution, + regrid_template_file, + regrid_extension, + crop_list, +): + """ + Convert GGCMI crop calendar files for use in CTSM + """ + + input_directory = os.path.realpath(input_directory) + output_directory = os.path.realpath(output_directory) + + ############################################################ + ### Regrid original GGCMI files to target CLM resolution ### + ############################################################ + + regridded_ggcmi_files_dir = os.path.join( + output_directory, f"regridded_ggcmi_files-{regrid_resolution}" + ) + + regrid.regrid_ggcmi_shdates( + regrid_resolution, + regrid_template_file, + input_directory, + regridded_ggcmi_files_dir, + regrid_extension, + crop_list, + ) + + # Set up dictionaries used in remapping crops and variables between GGCMI and CLM + crop_dict = setup_crop_dict() + variable_dict = setup_var_dict() ################################ ### Instantiate output files ### ################################ # Global attributes for output files + comment = ( + "Day of year is 1-indexed (i.e., Jan. 1 = 1). " + + "Filled using cdo -remapnn,$original -setmisstonn" + ) out_attrs = { "title": "GGCMI crop calendar for Phase 3, v1.01", "author_thisfile": author, "author_original": ggcmi_author, - "comment": "Day of year is 1-indexed (i.e., Jan. 1 = 1). Filled using cdo -remapnn,$original -setmisstonn", + "comment": comment, "created": dt.datetime.now().replace(microsecond=0).astimezone().isoformat(), } # Create template dataset time_array = np.array( - [get_dayssince_jan1y1(first_year, y) for y in np.arange(first_year, last_year + 1)] + [get_dayssince_jan1y1(first_year, year) for year in np.arange(first_year, last_year + 1)] ) time_coord = xr.IndexVariable( "time", @@ -273,18 +433,15 @@ def set_var_dict(name_ggcmi, outfile): template_ds = xr.Dataset(coords={"time": time_coord}, attrs=out_attrs) # Create output files - datetime_string = dt.datetime.now().strftime("%Y%m%d_%H%M%S") - nninterp_suffix = "nninterp-" + regrid_resolution - for v in variable_dict: - outfile = os.path.join( - output_directory, - f"{v}s_{file_specifier}_{nninterp_suffix}.{first_year}-{last_year}.{datetime_string}.nc", - ) - variable_dict[v]["outfile"] = outfile - template_ds.to_netcdf( - path=variable_dict[v]["outfile"], - format="NETCDF3_CLASSIC", - ) + nninterp_suffix = create_output_files( + regrid_resolution, + variable_dict, + output_directory, + file_specifier, + first_year, + last_year, + template_ds, + ) ######################### ### Process all crops ### @@ -293,7 +450,7 @@ def set_var_dict(name_ggcmi, outfile): for thiscrop_clm in crop_dict: # Which crop are we on? - c = list(crop_dict.keys()).index(thiscrop_clm) + 1 + crop_int = list(crop_dict.keys()).index(thiscrop_clm) + 1 # Get information about this crop this_dict = crop_dict[thiscrop_clm] @@ -306,18 +463,24 @@ def set_var_dict(name_ggcmi, outfile): # If no corresponding GGCMI crop, skip opening dataset. # Will use previous cropcal_ds as a template. - if thiscrop_ggcmi == None: - if c == 1: + if thiscrop_ggcmi is None: + if crop_int == 1: raise ValueError(f"First crop ({thiscrop_clm}) must have a GGCMI type") logger.info( - "Filling %s with dummy data (%d of %d)..." % (str(thiscrop_clm), c, len(crop_dict)) + "Filling %s with dummy data (%d of %d)...", + str(thiscrop_clm), + crop_int, + len(crop_dict), ) # Otherwise, import crop calendar file else: logger.info( - "Importing %s -> %s (%d of %d)..." - % (str(thiscrop_ggcmi), str(thiscrop_clm), c, len(crop_dict)) + "Importing %s -> %s (%d of %d)...", + str(thiscrop_ggcmi), + str(thiscrop_clm), + crop_int, + len(crop_dict), ) file_ggcmi = os.path.join( @@ -326,7 +489,7 @@ def set_var_dict(name_ggcmi, outfile): ) if not os.path.exists(file_ggcmi): logger.warning( - f"Skipping {thiscrop_ggcmi} because input file not found: {file_ggcmi}" + "Skipping %s because input file not found: %s", thiscrop_ggcmi, file_ggcmi ) continue cropcal_ds = xr.open_dataset(file_ggcmi) @@ -338,7 +501,7 @@ def set_var_dict(name_ggcmi, outfile): for thisvar_clm in variable_dict: # Get GGCMI netCDF info varname_ggcmi = variable_dict[thisvar_clm]["name_ggcmi"] - logger.info(" Processing %s..." % varname_ggcmi) + logger.info(" Processing %s...", varname_ggcmi) # Get CLM netCDF info varname_clm = thisvar_clm + "1_" + str(thiscrop_int) @@ -347,69 +510,21 @@ def set_var_dict(name_ggcmi, outfile): raise Exception("Output file not found: " + file_clm) # Strip dataset to just this variable - droplist = [] - for i in list(cropcal_ds.keys()): - if i != varname_ggcmi: - droplist.append(i) - thisvar_ds = cropcal_ds.drop(droplist) - thisvar_ds = thisvar_ds.load() + strip_dataset(cropcal_ds, varname_ggcmi) # Convert to integer new_fillvalue = -1 - dummyvalue = -1 - thisvar_ds.variables[varname_ggcmi].encoding["_FillValue"] = new_fillvalue - if thiscrop_ggcmi == None: - thisvar_ds.variables[varname_ggcmi].values.fill(dummyvalue) - else: - thisvar_ds.variables[varname_ggcmi].values[ - np.isnan(thisvar_ds.variables[varname_ggcmi].values) - ] = new_fillvalue - thisvar_ds.variables[varname_ggcmi].values = thisvar_ds.variables[ - varname_ggcmi - ].values.astype("int16") + thisvar_ds = fill_convert_int(thisvar_ds, thiscrop_ggcmi, varname_ggcmi, new_fillvalue) # Add time dimension (https://stackoverflow.com/a/62862440) - # (Repeats original map for every timestep) - # Probably not necessary to use this method, since I only end up extracting thisvar_ds.values anyway---I could probably use some numpy method instead. - thisvar_ds = thisvar_ds.expand_dims(time=template_ds.time) - thisvar_da_tmp = thisvar_ds[varname_ggcmi] - thisvar_da = xr.DataArray( - data=thisvar_da_tmp.values.astype("int16"), - attrs=thisvar_da_tmp.attrs, - coords=thisvar_da_tmp.coords, - name=varname_clm, - ) - - # Edit/add variable attributes etc. - longname = thisvar_da.attrs["long_name"] - longname = longname.replace("rainfed", thiscrop_clm).replace("irrigated", thiscrop_clm) - - def set_var_attrs( - thisvar_da, longname, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue - ): - thisvar_da.attrs["long_name"] = longname - if thiscrop_ggcmi == None: - thisvar_da.attrs["crop_name_clm"] = "none" - thisvar_da.attrs["crop_name_ggcmi"] = "none" - else: - thisvar_da.attrs["crop_name_clm"] = thiscrop_clm - thisvar_da.attrs["crop_name_ggcmi"] = thiscrop_ggcmi - thisvar_da.attrs["short_name_ggcmi"] = varname_ggcmi - thisvar_da.attrs["units"] = "day of year" - thisvar_da.encoding["_FillValue"] = new_fillvalue - # scale_factor and add_offset are required by I/O library for short data - # From https://www.unidata.ucar.edu/software/netcdf/workshops/2010/bestpractices/Packing.html: - # unpacked_value = packed_value * scale_factor + add_offset - thisvar_da.attrs["scale_factor"] = np.int16(1) - thisvar_da.attrs["add_offset"] = np.int16(0) - return thisvar_da + thisvar_da = add_time_dim(thisvar_ds, template_ds, varname_ggcmi, varname_clm) thisvar_da = set_var_attrs( - thisvar_da, longname, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue + thisvar_da, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue ) # Save - logger.info(" Saving %s..." % varname_ggcmi) + logger.info(" Saving %s...", varname_ggcmi) thisvar_da.to_netcdf(file_clm, mode="a", format="NETCDF3_CLASSIC") cropcal_ds.close() diff --git a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py index 911b2f93a1..b1988aa8b5 100644 --- a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py @@ -1,19 +1,25 @@ +""" +Regrid GGCMI sowing and harvest date files +""" from subprocess import run import os import glob import argparse import sys +import logging import xarray as xr import numpy as np -import logging # -- add python/ctsm to path (needed if we want to run regrid_ggcmi_shdates stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) -from ctsm.utils import abort -from ctsm.ctsm_pylib_dependent_utils import import_coord_1d, import_coord_2d -from ctsm import ctsm_logging +from ctsm.utils import abort # pylint: disable=wrong-import-position +from ctsm.ctsm_pylib_dependent_utils import ( # pylint: disable=wrong-import-position + import_coord_1d, + import_coord_2d, +) +from ctsm import ctsm_logging # pylint: disable=wrong-import-position logger = logging.getLogger(__name__) @@ -37,18 +43,26 @@ def main(): def run_and_check(cmd): + """ + Run a given shell command and check its result + """ result = run( cmd, shell=True, capture_output=True, text=True, + check=False, ) if result.returncode != 0: abort(f"Trouble running `{result.args}` in shell:\n{result.stdout}\n{result.stderr}") -# Functionized because these are shared by process_ggcmi_shdates def define_arguments(parser): + """ + Set up arguments shared between regrid_ggcmi_shdates and process_ggcmi_shdates + + Functionized because these are shared by process_ggcmi_shdates + """ # Required parser.add_argument( "-rr", @@ -60,7 +74,11 @@ def define_arguments(parser): parser.add_argument( "-rt", "--regrid-template-file", - help="Template netCDF file to be used in regridding of inputs. This can be a CLM output file (i.e., something with 1-d lat and lon variables) or a CLM surface dataset (i.e., something with 2-d LATIXY and LONGXY variables).", + help=( + "Template netCDF file to be used in regridding of inputs. This can be a CLM output " + + "file (i.e., something with 1-d lat and lon variables) or a CLM surface dataset " + + "(i.e., something with 2-d LATIXY and LONGXY variables)." + ), type=str, required=True, ) @@ -75,7 +93,10 @@ def define_arguments(parser): parser.add_argument( "-c", "--crop-list", - help="List of GGCMI crops to process; e.g., '--crop-list mai_rf,mai_ir'. If not provided, will process all GGCMI crops.", + help=( + "List of GGCMI crops to process; e.g., '--crop-list mai_rf,mai_ir'. If not provided, " + + "will process all GGCMI crops." + ), default=None, ) return parser @@ -89,7 +110,10 @@ def regrid_ggcmi_shdates( regrid_extension, crop_list, ): - logger.info(f"Regridding GGCMI crop calendars to {regrid_resolution}:") + """ + Regrid GGCMI sowing and harvest date files + """ + logger.info("Regridding GGCMI crop calendars to %s:", regrid_resolution) # Ensure we can call necessary shell script(s) for cmd in ["module load cdo; cdo"]: @@ -113,31 +137,7 @@ def regrid_ggcmi_shdates( regrid_extension = "." + regrid_extension # Import and format latitude - if "lat" in template_ds_in: - lat, Nlat = import_coord_1d(template_ds_in, "lat") - elif "LATIXY" in template_ds_in: - lat, Nlat = import_coord_2d(template_ds_in, "lat", "LATIXY") - lat.attrs["axis"] = "Y" - else: - abort("No latitude variable found in regrid template file") - - # Flip latitude, if needed - if lat.values[0] < lat.values[1]: - lat = lat.reindex(lat=list(reversed(lat["lat"]))) - - # Import and format longitude - if "lon" in template_ds_in: - lon, Nlon = import_coord_1d(template_ds_in, "lon") - elif "LONGXY" in template_ds_in: - lon, Nlon = import_coord_2d(template_ds_in, "lon", "LONGXY") - lon.attrs["axis"] = "Y" - else: - abort("No longitude variable found in regrid template file") - template_da_out = xr.DataArray( - data=np.full((Nlat, Nlon), 0.0), - dims={"lat": lat, "lon": lon}, - name="area", - ) + lat, lon, template_da_out = get_template_da_out(template_ds_in) # Save template Dataset for use by cdo template_ds_out = xr.Dataset( @@ -156,43 +156,75 @@ def regrid_ggcmi_shdates( if len(input_files) == 0: abort(f"No files found matching {os.path.join(os.getcwd(), pattern)}") input_files.sort() - for f in input_files: - this_crop = f[0:6] + for file in input_files: + this_crop = file[0:6] if crop_list is not None and this_crop not in crop_list: continue - logger.info(" " + this_crop) - f2 = os.path.join(regrid_output_directory, f) - f3 = f2.replace(regrid_extension, f"_nninterp-{regrid_resolution}{regrid_extension}") - - if os.path.exists(f3): - os.remove(f3) - - # Sometimes cdo fails for no apparent reason. In testing this never happened more than 3x in a row. + logger.info(" %s", this_crop) + file_2 = os.path.join(regrid_output_directory, file) + file_3 = file_2.replace( + regrid_extension, f"_nninterp-{regrid_resolution}{regrid_extension}" + ) + + if os.path.exists(file_3): + os.remove(file_3) + + # Sometimes cdo fails for no apparent reason. In testing this never happened more than 3x + # in a row. + cdo_cmd = ( + f"module load cdo; cdo -L -remapnn,'{templatefile}' " + + f"-setmisstonn '{file}' '{file_3}'" + ) try: - run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{f}' '{f3}'" - ) - except: + run_and_check(cdo_cmd) + except: # pylint: disable=bare-except try: - run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{f}' '{f3}'" - ) - except: + run_and_check(cdo_cmd) + except: # pylint: disable=bare-except try: - run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{f}' '{f3}'" - ) - except: - run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{f}' '{f3}'" - ) + run_and_check(cdo_cmd) + except: # pylint: disable=bare-except + run_and_check(cdo_cmd) # Delete template file, which is no longer needed os.remove(templatefile) os.chdir(previous_dir) +def get_template_da_out(template_ds_in): + """ + Get template output DataArray from input Dataset + """ + if "lat" in template_ds_in: + lat, n_lat = import_coord_1d(template_ds_in, "lat") + elif "LATIXY" in template_ds_in: + lat, n_lat = import_coord_2d(template_ds_in, "lat", "LATIXY") + lat.attrs["axis"] = "Y" + else: + abort("No latitude variable found in regrid template file") + + # Flip latitude, if needed + if lat.values[0] < lat.values[1]: + lat = lat.reindex(lat=list(reversed(lat["lat"]))) + + # Import and format longitude + if "lon" in template_ds_in: + lon, n_lon = import_coord_1d(template_ds_in, "lon") + elif "LONGXY" in template_ds_in: + lon, n_lon = import_coord_2d(template_ds_in, "lon", "LONGXY") + lon.attrs["axis"] = "Y" + else: + abort("No longitude variable found in regrid template file") + template_da_out = xr.DataArray( + data=np.full((n_lat, n_lon), 0.0), + dims={"lat": lat, "lon": lon}, + name="area", + ) + + return lat, lon, template_da_out + + def regrid_ggcmi_shdates_arg_process(): """Process input arguments @@ -204,7 +236,7 @@ def regrid_ggcmi_shdates_arg_process(): ctsm_logging.setup_logging_pre_config() parser = argparse.ArgumentParser( - description="Regrids raw sowing and harvest date files provided by GGCMI to a target CLM resolution." + description=("Regrid raw sowing/harvest date files from GGCMI to a target CLM resolution."), ) # Define arguments diff --git a/python/ctsm/crop_calendars/xr_flexsel.py b/python/ctsm/crop_calendars/xr_flexsel.py new file mode 100644 index 0000000000..d51d925985 --- /dev/null +++ b/python/ctsm/crop_calendars/xr_flexsel.py @@ -0,0 +1,263 @@ +""" +Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. +""" +import re +import numpy as np +import xarray as xr + +from ctsm.crop_calendars.cropcal_utils import vegtype_str2int, is_each_vegtype + + +def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=True, **kwargs): + """ + Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. + + - Keyword arguments like dimension=selection. + - Selections can be individual values or slice()s. + - Optimize memory usage by beginning keyword argument list with the selections that will result + in the largest reduction of object size. + - Use dimension "vegtype" to extract patches of designated vegetation type (can be string or + integer). + - Can also do dimension=function---e.g., time=np.mean will take the mean over the time + dimension. + """ + # Setup + havewarned = False + delimiter = "__" + + for key, selection in kwargs.items(): + if callable(selection): + xr_object = handle_callable(xr_object, key, selection) + + elif key == "vegtype": + xr_object = handle_vegtype(xr_object, patches1d_itype_veg, selection) + + else: + # Parse selection type, if provided + if delimiter in key: + key, selection_type = key.split(delimiter) + + # Check type of selection + else: + is_inefficient = False + if isinstance(selection, slice): + this_type = set_type_from_slice(selection) + elif isinstance(selection, np.ndarray): + selection, is_inefficient, this_type = set_type_from_ndarray(selection) + else: + this_type = type(selection) + + warn_about_this_seltype_interp = warn_about_seltype_interp + if this_type == list and isinstance(selection[0], str): + selection_type = "values" + warn_about_this_seltype_interp = False + elif this_type == int: + selection_type = "indices" + else: + selection_type = "values" + + if warn_about_this_seltype_interp: + do_warn_about_seltype_interp( + havewarned, delimiter, key, selection_type, is_inefficient, this_type + ) + + # Trim along relevant 1d axes + if isinstance(xr_object, xr.Dataset) and key in ["lat", "lon"]: + xr_object = trim_along_relevant_1d_axes(xr_object, selection, selection_type, key) + + # Perform selection + xr_object = perform_selection(xr_object, key, selection, selection_type) + + return xr_object + + +def perform_selection(xr_object, key, selection, selection_type): + """ + Perform selection + """ + if selection_type == "indices": + # Have to select like this instead of with index directly because otherwise assign_coords() + # will throw an error. Not sure why. + if isinstance(selection, int): + # Single integer? Turn it into a slice. + selection = slice(selection, selection + 1) + elif ( + isinstance(selection, np.ndarray) + and not selection.dtype.kind in np.typecodes["AllInteger"] + ): + selection = selection.astype(int) + xr_object = xr_object.isel({key: selection}) + elif selection_type == "values": + xr_object = xr_object.sel({key: selection}) + else: + raise TypeError(f"selection_type {selection_type} not recognized") + return xr_object + + +def trim_along_relevant_1d_axes(xr_object, selection, selection_type, key): + """ + Trim along relevant 1d axes + """ + if selection_type == "indices": + incl_coords = xr_object[key].values[selection] + elif selection_type == "values": + if isinstance(selection, slice): + incl_coords = xr_object.sel({key: selection}, drop=False)[key].values + else: + incl_coords = selection + else: + raise TypeError(f"selection_type {selection_type} not recognized") + if key == "lat": + this_xy = "jxy" + elif key == "lon": + this_xy = "ixy" + else: + raise KeyError( + f"Key '{key}' not recognized: What 1d_ suffix should I use for variable name?" + ) + pattern = re.compile(f"1d_{this_xy}") + matches = [x for x in list(xr_object.keys()) if pattern.search(x) is not None] + for var in matches: + if len(xr_object[var].dims) != 1: + raise RuntimeError( + f"Expected {var} to have 1 dimension, but it has" + f" {len(xr_object[var].dims)}: {xr_object[var].dims}" + ) + dim = xr_object[var].dims[0] + # print(f"Variable {var} has dimension {dim}") + coords = xr_object[key].values[xr_object[var].values.astype(int) - 1] + # print(f"{dim} size before: {xr_object.sizes[dim]}") + ok_ind = [] + new_1d_this_xy = [] + for i, member in enumerate(coords): + if member in incl_coords: + ok_ind = ok_ind + [i] + new_1d_this_xy = new_1d_this_xy + [(incl_coords == member).nonzero()[0] + 1] + xr_object = xr_object.isel({dim: ok_ind}) + new_1d_this_xy = np.array(new_1d_this_xy).squeeze() + xr_object[var].values = new_1d_this_xy + # print(f"{dim} size after: {xr_object.sizes[dim]}") + return xr_object + + +def do_warn_about_seltype_interp( + havewarned, delimiter, key, selection_type, is_inefficient, this_type +): + """ + Suggest suppressing selection type interpretation warnings + """ + if not havewarned: + print( + "xr_flexsel(): Suppress all 'selection type interpretation' messages by specifying" + + "warn_about_seltype_interp=False" + ) + havewarned = True + if is_inefficient: + extra = " This will also improve efficiency for large selections." + else: + extra = "" + print( + f"xr_flexsel(): Selecting {key} as {selection_type} because selection was" + f" interpreted as {this_type}. If not correct, specify selection type" + " ('indices' or 'values') in keyword like" + f" '{key}{delimiter}SELECTIONTYPE=...' instead of '{key}=...'.{extra}" + ) + + +def set_type_from_ndarray(selection): + """ + Sets selection type if given a Numpy array + """ + if selection.dtype.kind in np.typecodes["AllInteger"]: + this_type = int + else: + is_inefficient = True + this_type = None + for member in selection: + if member < 0 or member % 1 > 0: + if isinstance(member, int): + this_type = "values" + else: + this_type = type(member) + break + if this_type is None: + this_type = int + selection = selection.astype(int) + return selection, is_inefficient, this_type + + +def set_type_from_slice(selection): + """ + Sets selection type if given a slice + """ + slice_members = [] + if selection == slice(0): + raise ValueError("slice(0) will be empty") + if selection.start is not None: + slice_members = slice_members + [selection.start] + if selection.stop is not None: + slice_members = slice_members + [selection.stop] + if selection.step is not None: + slice_members = slice_members + [selection.step] + if not slice_members: + raise TypeError("slice is all None?") + this_type = int + for member in slice_members: + if member < 0 or not isinstance(member, int): + this_type = "values" + break + return this_type + + +def handle_vegtype(xr_object, patches1d_itype_veg, selection): + """ + Handle selection "vegtype + """ + # Convert to list, if needed + if not isinstance(selection, list): + selection = [selection] + + # Convert to indices, if needed + if isinstance(selection[0], str): + selection = vegtype_str2int(selection) + + # Get list of boolean(s) + if isinstance(selection[0], int): + if isinstance(patches1d_itype_veg, type(None)): + patches1d_itype_veg = xr_object.patches1d_itype_veg.values + elif isinstance(patches1d_itype_veg, xr.core.dataarray.DataArray): + patches1d_itype_veg = patches1d_itype_veg.values + is_vegtype = is_each_vegtype(patches1d_itype_veg, selection, "ok_exact") + elif isinstance(selection[0], bool): + if len(selection) != len(xr_object.patch): + raise ValueError( + "If providing boolean 'vegtype' argument to xr_flexsel(), it must be the" + f" same length as xr_object.patch ({len(selection)} vs." + f" {len(xr_object.patch)})" + ) + is_vegtype = selection + else: + raise TypeError(f"Not sure how to handle 'vegtype' of type {type(selection[0])}") + xr_object = xr_object.isel(patch=[i for i, x in enumerate(is_vegtype) if x]) + if "ivt" in xr_object: + xr_object = xr_object.isel(ivt=is_each_vegtype(xr_object.ivt.values, selection, "ok_exact")) + + return xr_object + + +def handle_callable(xr_object, key, selection): + """ + Handle selection that's a callable + """ + # It would have been really nice to do selection(xr_object, axis=key), but numpy methods and + # xarray methods disagree on "axis" vs. "dimension." So instead, just do this manually. + if selection == np.mean: # pylint: disable=comparison-with-callable + try: + xr_object = xr_object.mean(dim=key) + except: # pylint: disable=raise-missing-from + raise ValueError( + f"Failed to take mean of dimension {key}. Try doing so outside of xr_flexsel()." + ) + else: + raise ValueError(f"xr_flexsel() doesn't recognize function {selection}") + return xr_object diff --git a/python/ctsm/ctsm_pylib_dependent_utils.py b/python/ctsm/ctsm_pylib_dependent_utils.py index 13ccf7a969..59ca15155b 100644 --- a/python/ctsm/ctsm_pylib_dependent_utils.py +++ b/python/ctsm/ctsm_pylib_dependent_utils.py @@ -1,49 +1,64 @@ -from ctsm.utils import abort +""" +Utilities that are dependent on non-standard modules (i.e., require ctsm_pylib). +""" + import numpy as np +from ctsm.utils import abort -def import_coord_1d(ds, coordName): +def import_coord_1d(data_set, coord_name): """Import 1-d coordinate variable Args: - ds (xarray Dataset): Dataset whose coordinate you want to import. - coordName (str): Name of coordinate to import + data_set (xarray Dataset): Dataset whose coordinate you want to import. + coord_name (str): Name of coordinate to import Returns: xarray DataArray: DataArray corresponding to the requested coordinate. """ - da = ds[coordName] - if len(da.dims) != 1: - abort(f"Expected 1 dimension for {coordName}; found {len(da.dims)}: {da.dims}") - return da, len(da) + data_array = data_set[coord_name] + if len(data_array.dims) != 1: + abort( + f"Expected 1 dimension for {coord_name}; " + + f"found {len(data_array.dims)}: {data_array.dims}" + ) + return data_array, len(data_array) -def import_coord_2d(ds, coordName, varName): - """Import 2-d latitude or longitude variable from a CESM history file (e.g., name LATIXY or LONGXY) and return it as a 1-d DataArray that can be used as a coordinate for writing CESM input files +def import_coord_2d(data_set, coord_name, var_name): + """ + Import 2-d latitude or longitude variable from a CESM history file (e.g., name LATIXY + or LONGXY and return it as a 1-d DataArray that can be used as a coordinate for writing + CESM input files Args: - ds (xarray Dataset): Dataset whose coordinate you want to import. - coordName (str): Name of coordinate to import - varName (str): Name of variable with dimension coordName + data_set (xarray Dataset): Dataset whose coordinate you want to import. + coord_name (str): Name of coordinate to import + var_name (str): Name of variable with dimension coord_name Returns: xarray DataArray: 1-d variable that can be used as a coordinate for writing CESM input files int: Length of that variable """ - da = ds[varName] - thisDim = [x for x in da.dims if coordName in x] - if len(thisDim) != 1: - abort(f"Expected 1 dimension name containing {coordName}; found {len(thisDim)}: {thisDim}") - thisDim = thisDim[0] - otherDim = [x for x in da.dims if coordName not in x] - if len(otherDim) != 1: + data_array = data_set[var_name] + this_dim = [x for x in data_array.dims if coord_name in x] + if len(this_dim) != 1: + abort( + f"Expected 1 dimension name containing {coord_name}; " + + f"found {len(this_dim)}: {this_dim}" + ) + this_dim = this_dim[0] + other_dim = [x for x in data_array.dims if coord_name not in x] + if len(other_dim) != 1: abort( - f"Expected 1 dimension name not containing {coordName}; found {len(otherDim)}: {otherDim}" + f"Expected 1 dimension name not containing {coord_name}; " + + f"found {len(other_dim)}: {other_dim}" ) - otherDim = otherDim[0] - da = da.astype(np.float32) - da = da.isel({otherDim: [0]}).squeeze().rename({thisDim: coordName}).rename(coordName) - da = da.assign_coords({coordName: da.values}) - da.attrs["long_name"] = "coordinate " + da.attrs["long_name"] - da.attrs["units"] = da.attrs["units"].replace(" ", "_") - return da, len(da) + other_dim = other_dim[0] + data_array = data_array.astype(np.float32) + data_array = data_array.isel({other_dim: [0]}).squeeze() + data_array = data_array.rename({this_dim: coord_name}).rename(coord_name) + data_array = data_array.assign_coords({coord_name: data_array.values}) + data_array.attrs["long_name"] = "coordinate " + data_array.attrs["long_name"] + data_array.attrs["units"] = data_array.attrs["units"].replace(" ", "_") + return data_array, len(data_array) diff --git a/python/ctsm/site_and_regional/modify_singlept_site_neon.py b/python/ctsm/site_and_regional/modify_singlept_site_neon.py index ae1318e2f8..e69a8ab834 100755 --- a/python/ctsm/site_and_regional/modify_singlept_site_neon.py +++ b/python/ctsm/site_and_regional/modify_singlept_site_neon.py @@ -54,6 +54,9 @@ myname = getuser() +# Seconds to wait before requests.get() times out +TIMEOUT = 60 + # -- valid neon sites valid_neon_sites = glob.glob( @@ -176,7 +179,7 @@ def get_neon(neon_dir, site_name): + site_name + "_surfaceData.csv" ) - response = requests.get(url) + response = requests.get(url, timeout=TIMEOUT) with open(neon_file, "wb") as a_file: a_file.write(response.content) @@ -430,7 +433,7 @@ def download_file(url, fname): file name to save the downloaded file. """ try: - response = requests.get(url) + response = requests.get(url, timeout=TIMEOUT) with open(fname, "wb") as a_file: a_file.write(response.content) @@ -443,7 +446,7 @@ def download_file(url, fname): except Exception as err: print("The server could not fulfill the request.") print("Something went wrong in downloading", fname) - print("Error code:", err.code) + raise err def fill_interpolate(f_2, var, method): @@ -472,6 +475,129 @@ def fill_interpolate(f_2, var, method): print("=====================================") +def print_neon_data_soil_structure(obs_bot, soil_bot, bin_index): + """ + Print info about NEON data soil structure + """ + print("================================") + print(" Neon data soil structure: ") + print("================================") + + print("------------", "ground", "------------") + for i, this_obs_bot in enumerate(obs_bot): + print("layer", i) + print("-------------", "{0:.2f}".format(this_obs_bot), "-------------") + + print("================================") + print("Surface data soil structure: ") + print("================================") + + print("------------", "ground", "------------") + for this_bin in range(len(bin_index)): + print("layer", this_bin) + print("-------------", "{0:.2f}".format(soil_bot[this_bin]), "-------------") + + +def print_soil_quality( + inorganic, bin_index, soil_lev, layer_depth, carbon_tot, estimated_oc, bulk_den, f_2 +): + """ + Prints information about soil quality + """ + print("~~~~~~~~~~~~~~~~~~~~~~~~") + print("inorganic:") + print("~~~~~~~~~~~~~~~~~~~~~~~~") + print(inorganic) + print("~~~~~~~~~~~~~~~~~~~~~~~~") + + print("bin_index : ", bin_index[soil_lev]) + print("layer_depth : ", layer_depth) + print("carbon_tot : ", carbon_tot) + print("estimated_oc : ", estimated_oc) + print("bulk_den : ", bulk_den) + print("organic :", f_2["ORGANIC"][soil_lev].values) + print("--------------------------") + + +def update_agri_site_info(site_name, f_2): + """ + Updates agricultural sites + """ + ag_sites = ["KONA", "STER"] + if site_name not in ag_sites: + return f_2 + + print("Updating PCT_NATVEG") + print("Original : ", f_2.PCT_NATVEG.values) + f_2.PCT_NATVEG.values = [[0.0]] + print("Updated : ", f_2.PCT_NATVEG.values) + + print("Updating PCT_CROP") + print("Original : ", f_2.PCT_CROP.values) + f_2.PCT_CROP.values = [[100.0]] + print("Updated : ", f_2.PCT_CROP.values) + + print("Updating PCT_NAT_PFT") + print(f_2.PCT_NAT_PFT.values[0]) + print(f_2.PCT_NAT_PFT[0].values) + + return f_2 + + +def update_fields_with_neon(f_1, d_f, bin_index): + """ + update fields with neon + """ + f_2 = f_1 + soil_levels = f_2["PCT_CLAY"].size + for soil_lev in range(soil_levels): + print("--------------------------") + print("soil_lev:", soil_lev) + print(d_f["clayTotal"][bin_index[soil_lev]]) + f_2["PCT_CLAY"][soil_lev] = d_f["clayTotal"][bin_index[soil_lev]] + f_2["PCT_SAND"][soil_lev] = d_f["sandTotal"][bin_index[soil_lev]] + + bulk_den = d_f["bulkDensExclCoarseFrag"][bin_index[soil_lev]] + carbon_tot = d_f["carbonTot"][bin_index[soil_lev]] + estimated_oc = d_f["estimatedOC"][bin_index[soil_lev]] + + # -- estimated_oc in neon data is rounded to the nearest integer. + # -- Check to make sure the rounded oc is not higher than carbon_tot. + # -- Use carbon_tot if estimated_oc is bigger than carbon_tot. + + estimated_oc = min(estimated_oc, carbon_tot) + + layer_depth = ( + d_f["biogeoBottomDepth"][bin_index[soil_lev]] + - d_f["biogeoTopDepth"][bin_index[soil_lev]] + ) + + # f_2["ORGANIC"][soil_lev] = estimated_oc * bulk_den / 0.58 + + # -- after adding caco3 by NEON: + # -- if caco3 exists: + # -- inorganic = caco3/100.0869*12.0107 + # -- organic = carbon_tot - inorganic + # -- else: + # -- organic = estimated_oc * bulk_den /0.58 + + caco3 = d_f["caco3Conc"][bin_index[soil_lev]] + inorganic = caco3 / 100.0869 * 12.0107 + print("inorganic:", inorganic) + + if not np.isnan(inorganic): + actual_oc = carbon_tot - inorganic + else: + actual_oc = estimated_oc + + f_2["ORGANIC"][soil_lev] = actual_oc * bulk_den / 0.58 + + print_soil_quality( + inorganic, bin_index, soil_lev, layer_depth, carbon_tot, estimated_oc, bulk_den, f_2 + ) + return f_2 + + def main(): """modify_singlept_site_neon main function""" args = get_parser().parse_args() @@ -532,88 +658,10 @@ def main(): bins = d_f["biogeoTopDepth"] / 100 bin_index = np.digitize(soil_mid, bins) - 1 - """ - print ("================================") - print (" Neon data soil structure: ") - print ("================================") - - print ("------------","ground","------------") - for i in range(len(obs_bot)): - print ("layer",i) - print ("-------------", - "{0:.2f}".format(obs_bot[i]), - "-------------") - - print ("================================") - print ("Surface data soil structure: ") - print ("================================") - - print ("------------","ground","------------") - for b in range(len(bin_index)): - print ("layer",b) - print ("-------------", - "{0:.2f}".format(soil_bot[b]), - "-------------") - """ + print_neon_data_soil_structure(obs_bot, soil_bot, bin_index) # -- update fields with neon - f_2 = f_1 - soil_levels = f_2["PCT_CLAY"].size - for soil_lev in range(soil_levels): - print("--------------------------") - print("soil_lev:", soil_lev) - print(d_f["clayTotal"][bin_index[soil_lev]]) - f_2["PCT_CLAY"][soil_lev] = d_f["clayTotal"][bin_index[soil_lev]] - f_2["PCT_SAND"][soil_lev] = d_f["sandTotal"][bin_index[soil_lev]] - - bulk_den = d_f["bulkDensExclCoarseFrag"][bin_index[soil_lev]] - carbon_tot = d_f["carbonTot"][bin_index[soil_lev]] - estimated_oc = d_f["estimatedOC"][bin_index[soil_lev]] - - # -- estimated_oc in neon data is rounded to the nearest integer. - # -- Check to make sure the rounded oc is not higher than carbon_tot. - # -- Use carbon_tot if estimated_oc is bigger than carbon_tot. - - estimated_oc = min(estimated_oc, carbon_tot) - - layer_depth = ( - d_f["biogeoBottomDepth"][bin_index[soil_lev]] - - d_f["biogeoTopDepth"][bin_index[soil_lev]] - ) - - # f_2["ORGANIC"][soil_lev] = estimated_oc * bulk_den / 0.58 - - # -- after adding caco3 by NEON: - # -- if caco3 exists: - # -- inorganic = caco3/100.0869*12.0107 - # -- organic = carbon_tot - inorganic - # -- else: - # -- organic = estimated_oc * bulk_den /0.58 - - caco3 = d_f["caco3Conc"][bin_index[soil_lev]] - inorganic = caco3 / 100.0869 * 12.0107 - print("inorganic:", inorganic) - - if not np.isnan(inorganic): - actual_oc = carbon_tot - inorganic - else: - actual_oc = estimated_oc - - f_2["ORGANIC"][soil_lev] = actual_oc * bulk_den / 0.58 - - print("~~~~~~~~~~~~~~~~~~~~~~~~") - print("inorganic:") - print("~~~~~~~~~~~~~~~~~~~~~~~~") - print(inorganic) - print("~~~~~~~~~~~~~~~~~~~~~~~~") - - print("bin_index : ", bin_index[soil_lev]) - print("layer_depth : ", layer_depth) - print("carbon_tot : ", carbon_tot) - print("estimated_oc : ", estimated_oc) - print("bulk_den : ", bulk_den) - print("organic :", f_2["ORGANIC"][soil_lev].values) - print("--------------------------") + f_2 = update_fields_with_neon(f_1, d_f, bin_index) # -- Interpolate missing values method = "linear" @@ -633,22 +681,8 @@ def main(): sort_print_soil_layers(obs_bot, soil_bot) - # -- updates for ag sites : KONA and STER - ag_sites = ["KONA", "STER"] - if site_name in ag_sites: - print("Updating PCT_NATVEG") - print("Original : ", f_2.PCT_NATVEG.values) - f_2.PCT_NATVEG.values = [[0.0]] - print("Updated : ", f_2.PCT_NATVEG.values) - - print("Updating PCT_CROP") - print("Original : ", f_2.PCT_CROP.values) - f_2.PCT_CROP.values = [[100.0]] - print("Updated : ", f_2.PCT_CROP.values) - - print("Updating PCT_NAT_PFT") - print(f_2.PCT_NAT_PFT.values[0]) - print(f_2.PCT_NAT_PFT[0].values) + # -- updates for ag sites + update_agri_site_info(site_name, f_2) out_dir = args.out_dir diff --git a/python/ctsm/test/test_sys_regrid_ggcmi_shdates.py b/python/ctsm/test/test_sys_regrid_ggcmi_shdates.py index 7521ef09a5..6c2e230481 100755 --- a/python/ctsm/test/test_sys_regrid_ggcmi_shdates.py +++ b/python/ctsm/test/test_sys_regrid_ggcmi_shdates.py @@ -5,7 +5,6 @@ """ import os -import re import unittest import tempfile @@ -18,8 +17,7 @@ # -- add python/ctsm to path (needed if we want to run test stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) - - +# pylint: disable=wrong-import-position from ctsm.path_utils import path_to_ctsm_root from ctsm import unit_testing from ctsm.crop_calendars.regrid_ggcmi_shdates import regrid_ggcmi_shdates @@ -78,6 +76,9 @@ def tearDown(self): shutil.rmtree(self._tempdir, ignore_errors=True) def test_regrid_ggcmi_shdates(self): + """ + Tests regrid_ggcmi_shdates + """ # Call script sys.argv = self._function_call_list diff --git a/python/ctsm/test/test_unit_modify_singlept_site_neon.py b/python/ctsm/test/test_unit_modify_singlept_site_neon.py index ecd96357b3..3a9d7d424c 100755 --- a/python/ctsm/test/test_unit_modify_singlept_site_neon.py +++ b/python/ctsm/test/test_unit_modify_singlept_site_neon.py @@ -17,7 +17,7 @@ # -- add python/ctsm to path (needed if we want to run the test stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) - +# pylint: disable=wrong-import-position from ctsm.path_utils import path_to_ctsm_root # pylint: disable=wrong-import-position diff --git a/python/ctsm/test/test_unit_run_sys_tests.py b/python/ctsm/test/test_unit_run_sys_tests.py index 65ec1df5a5..98a9d54674 100755 --- a/python/ctsm/test/test_unit_run_sys_tests.py +++ b/python/ctsm/test/test_unit_run_sys_tests.py @@ -271,7 +271,7 @@ def test_withDryRun_nothingDone(self): def test_getTestmodList_suite(self): """Ensure that _get_testmod_list() works correctly with suite-style input""" - input = [ + testmod_list_input = [ "clm/default", "clm/default", "clm/crop", @@ -283,12 +283,12 @@ def test_getTestmodList_suite(self): "clm-crop", "clm-cropMonthlyOutput", ] - output = _get_testmod_list(input, unique=False) + output = _get_testmod_list(testmod_list_input, unique=False) self.assertEqual(output, target) def test_getTestmodList_suite_unique(self): """Ensure that _get_testmod_list() works correctly with unique=True""" - input = [ + testmod_list_input = [ "clm/default", "clm/default", "clm/crop", @@ -300,24 +300,29 @@ def test_getTestmodList_suite_unique(self): "clm-cropMonthlyOutput", ] - output = _get_testmod_list(input, unique=True) + output = _get_testmod_list(testmod_list_input, unique=True) self.assertEqual(output, target) def test_getTestmodList_testname(self): """Ensure that _get_testmod_list() works correctly with full test name(s) specified""" - input = [ + testmod_list_input = [ "ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-crop", "ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-default", ] target = ["clm-crop", "clm-default"] - output = _get_testmod_list(input) + output = _get_testmod_list(testmod_list_input) self.assertEqual(output, target) def test_getTestmodList_twomods(self): - """Ensure that _get_testmod_list() works correctly with full test name(s) specified and two mods in one test""" - input = ["ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-default--clm-crop"] + """ + Ensure that _get_testmod_list() works correctly with full test name(s) specified and two + mods in one test + """ + testmod_list_input = [ + "ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-default--clm-crop" + ] target = ["clm-default", "clm-crop"] - output = _get_testmod_list(input) + output = _get_testmod_list(testmod_list_input) self.assertEqual(output, target) diff --git a/python/ctsm/test/test_unit_utils_import_coord.py b/python/ctsm/test/test_unit_utils_import_coord.py index b7ec8f90ec..6e339a913f 100755 --- a/python/ctsm/test/test_unit_utils_import_coord.py +++ b/python/ctsm/test/test_unit_utils_import_coord.py @@ -16,7 +16,7 @@ # -- add python/ctsm to path (needed if we want to run test stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) - +# pylint: disable=wrong-import-position from ctsm import unit_testing from ctsm.path_utils import path_to_ctsm_root from ctsm.ctsm_pylib_dependent_utils import import_coord_1d, import_coord_2d @@ -33,7 +33,9 @@ # Allow all the instance attributes that we need # pylint: disable=too-many-instance-attributes class TestUtilsImportCoord(unittest.TestCase): - # Tests the importcoord* subroutines from utils.py + """ + Tests the importcoord* subroutines from utils.py + """ def setUp(self): """Setup for trying out the methods""" @@ -56,13 +58,19 @@ def tearDown(self): shutil.rmtree(self._tempdir, ignore_errors=True) def test_importcoord1d(self): + """ + Tests importing a 1-d lat/lon variable + """ ds = xr.open_dataset(self._1d_lonlat_file) - lat, Nlat = import_coord_1d(ds, "lat") - np.testing.assert_equal(Nlat, 360) + lat, n_lat = import_coord_1d(ds, "lat") + np.testing.assert_equal(n_lat, 360) np.testing.assert_array_equal(lat.values[:4], [89.75, 89.25, 88.75, 88.25]) np.testing.assert_array_equal(lat.values[-4:], [-88.25, -88.75, -89.25, -89.75]) def test_importcoord1d_attrs(self): + """ + Tests attributes of an imported 1-d lat/lon variable + """ ds = xr.open_dataset(self._1d_lonlat_file) lat, _ = import_coord_1d(ds, "lat") # Unlike import_coord_2d, import_coord_1d doesn't rename the long name. @@ -73,20 +81,29 @@ def test_importcoord1d_attrs(self): self.assertDictEqual(lat.attrs, expected_attributes) def test_importcoord1d_too_many_dims(self): + """ + Tests that 1d-importing function errors when given a 2d variable to import + """ ds = xr.open_dataset(self._2d_lonlat_file) - with self.assertRaisesRegex( + with self.assertRaises( SystemExit, - "Expected 1 dimension for LATIXY; found 2: \('lsmlat', 'lsmlon'\)", + msg="Expected 1 dimension for LATIXY; found 2: ('lsmlat', 'lsmlon')", ): import_coord_1d(ds, "LATIXY") def test_importcoord2d(self): + """ + Tests importing a 2-d lat/lon variable + """ ds = xr.open_dataset(self._2d_lonlat_file) lat, _ = import_coord_2d(ds, "lat", "LATIXY") expected_values = np.array([-13.9, -11.7, -9.5, -7.3, -5.1]).astype(np.float32) np.testing.assert_array_equal(lat.values, expected_values) def test_importcoord2d_attrs(self): + """ + Tests attributes of an imported 2-d lat/lon variable + """ ds = xr.open_dataset(self._2d_lonlat_file) lat, _ = import_coord_2d(ds, "lat", "LATIXY") expected_attributes = { @@ -96,25 +113,34 @@ def test_importcoord2d_attrs(self): self.assertDictEqual(lat.attrs, expected_attributes) def test_importcoord2d_rename_dim(self): + """ + Tests renaming of an imported 2-d lat/lon variable + """ ds = xr.open_dataset(self._2d_lonlat_file) lat, _ = import_coord_2d(ds, "lat", "LATIXY") self.assertTupleEqual(lat.dims, ("lat",)) def test_importcoord2d_no_dim_contains_coordName(self): + """ + Tests that 2d-importing function errors when given a nonexistent dim name + """ ds = xr.open_dataset(self._2d_lonlat_file) ds = ds.rename({"lsmlat": "abc"}) - with self.assertRaisesRegex( + with self.assertRaises( SystemExit, - "ERROR: Expected 1 dimension name containing lat; found 0: \[\]", + msg="ERROR: Expected 1 dimension name containing lat; found 0: []", ): import_coord_2d(ds, "lat", "LATIXY") def test_importcoord2d_1_dim_containing(self): + """ + Tests that 2d-importing function errors when given an ambiguous dim name + """ ds = xr.open_dataset(self._2d_lonlat_file) ds = ds.rename({"lsmlon": "lsmlat2"}) - with self.assertRaisesRegex( + with self.assertRaises( SystemExit, - "Expected 1 dimension name containing lat; found 2: \['lsmlat', 'lsmlat2'\]", + msg="Expected 1 dimension name containing lat; found 2: ['lsmlat', 'lsmlat2']", ): import_coord_2d(ds, "lat", "LATIXY")