Merge pull request #93 from IMMM-SFA/feature/multifile-support-and-er…

…ror-messages multi-file input support, non-standard calendar support, and better error messages for missing data
IMMM-SFA · Aug 18, 2022 · e1d1ac6 · e1d1ac6
2 parents aa06e07 + 4fe47a1
commit e1d1ac6
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -87,10 +87,19 @@ Currently, the gridded data is expected to be provided at the same spatial resol
 Runoff input can be provided at any time resolution; each timestep will select the runoff at the closest time in the past.
 Currently, demand input is read monthly but will also pad to the closest time in the past.
 Efforts are under way for more robust demand handling.
+
 Dams/reservoirs require four different input files: the physical characteristics, the average monthly flow expected during the simulation period, the average monthly demand expected during the simulation period, and a database mapping each GRanD ID to grid cell IDs allowed to extract water from it.
 These dam/reservoir input files can be generated from raw GRanD data, raw elevation data, and raw ISTARF data using the [provided utility](mosartwmpy/utilities/CREATE_GRAND_PARAMETERS.md).
 The best way to understand the expected format of the input files is to examine the sample inputs provided by the download utility: `python -m mosartwmpy.download`.
 
+#### multi-file input
+
+To use multi-file demand or runoff input, use year/month/day placeholders in the file path options like so:
+* If your files look like `runoff-1999.nc`, use `runoff-{Y}.nc` as the path
+* If your files look like `runoff-1999-02.nc`, use `runoff-{Y}-{M}.nc` as the path
+* If your files look like `runoff-1999-02-03`, use `runoff-{Y}-{M}-{D}.nc` as the path, but be sure to provide files for leap days as well!
+
+
 ## model output
 
 By default, key model variables are output on a monthly basis at a daily averaged resolution to `./output/<simulation name>/<simulation name>_<year>_<month>.nc`. See the configuration file for examples of how to modify the outputs, and the `./mosartwmpy/state/state.py` file for state variable names.

diff --git a/mosartwmpy/_version.py b/mosartwmpy/_version.py
@@ -1 +1 @@
-__version__ = "0.3.1"
+__version__ = "0.3.2"
diff --git a/mosartwmpy/input/demand.py b/mosartwmpy/input/demand.py
@@ -1,10 +1,10 @@
+from benedict.dicts import benedict as Benedict
+from datetime import datetime
 import numpy as np
+import pandas as pd
 import regex as re
-
-from datetime import datetime
 from xarray import open_dataset
 
-from benedict.dicts import benedict as Benedict
 from mosartwmpy.state.state import State
 from mosartwmpy.utilities.timing import timing
 
@@ -22,14 +22,24 @@ def load_demand(state: State, config: Benedict, current_time: datetime, mask: np
 
     # demand path can have placeholders for year and month and day, so check for those and replace if needed
     path = config.get('water_management.demand.path')
-    path = re.sub('\{y[^}]*}', current_time.strftime('%Y'), path)
-    path = re.sub('\{m[^}]*}', current_time.strftime('%m'), path)
-    path = re.sub('\{d[^}]*}', current_time.strftime('%d'), path)
+    path = re.sub('\{(?:Y|y)[^}]*}', current_time.strftime('%Y'), path)
+    path = re.sub('\{(?:M|m)[^}]*}', current_time.strftime('%m'), path)
+    path = re.sub('\{(?:D|d)[^}]*}', current_time.strftime('%d'), path)
 
     demand = open_dataset(path)
 
     # if the demand file has a time axis, use it; otherwise assume data is just 2d
     if config.get('water_management.demand.time', None) in demand:
+        # check for non-standard calendar and convert if needed
+        if not isinstance(demand.indexes[config.get('water_management.demand.time')], pd.DatetimeIndex):
+            demand[config.get('water_management.demand.time')] = demand.indexes[config.get('water_management.demand.time')].to_datetimeindex()
+        # check if time index includes current time (with some slack on the end)
+        if not (
+            demand[config.get('water_management.demand.time')].values.min() <= np.datetime64(current_time) <= (demand[config.get('water_management.demand.time')].values.max() + np.timedelta64(31, 'D'))
+        ):
+            raise ValueError(
+                f"Current simulation date {current_time.strftime('%Y-%m-%d')} not within time bounds of demand input file {path}. Aborting..."
+            )
         state.grid_cell_demand_rate = np.array(demand[config.get('water_management.demand.demand')].sel({config.get('water_management.demand.time'): current_time}, method='pad')).flatten()[mask]
     else:
         state.grid_cell_demand_rate = np.array(demand[config.get('water_management.demand.demand')]).flatten()[mask]

diff --git a/mosartwmpy/input/runoff.py b/mosartwmpy/input/runoff.py
@@ -1,5 +1,7 @@
 import numpy as np
-from datetime import datetime, time, timedelta
+from datetime import datetime
+import pandas as pd
+import regex as re
 from xarray import open_dataset
 
 from benedict.dicts import benedict as Benedict
@@ -23,8 +25,25 @@ def load_runoff(state: State, grid: Grid, config: Benedict, current_time: dateti
     # note that the forcing is provided in mm/s
     # the flood section needs m3/s, but the routing needs m/s, so be aware of the conversions
     # method="pad" means the closest time in the past is selected from the file
-
-    runoff = open_dataset(config.get('runoff.path'))
+
+    path = config.get('runoff.path')
+    path = re.sub('\{(?:Y|y)[^}]*}', current_time.strftime('%Y'), path)
+    path = re.sub('\{(?:M|m)[^}]*}', current_time.strftime('%m'), path)
+    path = re.sub('\{(?:D|d)[^}]*}', current_time.strftime('%d'), path)
+
+    runoff = open_dataset(path)
+
+    # check for non-standard calendar and convert if needed
+    if not isinstance(runoff.indexes[config.get('runoff.time')], pd.DatetimeIndex):
+        runoff[config.get('runoff.time')] = runoff.indexes[config.get('runoff.time')].to_datetimeindex()
+
+    # check if time index includes current time (with some slack)
+    if not (
+        runoff[config.get('runoff.time')].values.min() <= np.datetime64(current_time) <= (runoff[config.get('runoff.time')].values.max() + np.timedelta64(2, 'D'))
+    ):
+        raise ValueError(
+            f"Current simulation date {current_time.strftime('%Y-%m-%d')} not within time bounds of runoff input file {path}. Aborting..."
+        )
 
     sel = {
         config.get('runoff.time'): current_time