Merge pull request #3 from IMMM-SFA/paper

Paper setup, code cleanup, and walkthrough for JOSS submission
IMMM-SFA · Mar 31, 2021 · 67cb13b · 67cb13b
2 parents 88d9b6a + f783b7f
commit 67cb13b
Show file tree

Hide file tree

Showing 43 changed files with 952 additions and 406 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -12,7 +12,7 @@ jobs:
 
       env:
         OS: ${{ matrix.os }}
-        PYTHON: '3.8'
+        PYTHON: '3.9'
 
       steps:
 
@@ -21,7 +21,7 @@ jobs:
         - name: Set up Python
           uses: actions/setup-python@master
           with:
-            python-version: 3.8
+            python-version: 3.9
 
         - name: Install dependencies
           run: |

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,4 @@
+graft mosartwmpy/tests
+include mosartwmpy/*.yaml
+global-exclude *.py[cod]
+prune input output validation paper docs dask-worker-space **/.mypy_cache .github
diff --git a/README.md b/README.md
@@ -1,68 +1,252 @@
 ![build](https://github.com/IMMM-SFA/mosartwmpy/workflows/build/badge.svg) [![codecov](https://codecov.io/gh/IMMM-SFA/mosartwmpy/branch/main/graph/badge.svg?token=IPOY8984MB)](https://codecov.io/gh/IMMM-SFA/mosartwmpy)
 
-
-### mosartwmpy
+## mosartwmpy
 
 `mosartwmpy` is a python translation of Mosart-WM, a model for water routing and reservoir management written in Fortran. The original code can be found at [IWMM](https://github.com/IMMM-SFA/iwmm) and [E3SM](https://github.com/E3SM-Project/E3SM), in which Mosart is the hdyrological component of a larger suite of earth-science models. The motivation for rewriting is largely for developer convenience -- running, debugging, and adding new capabilities were becoming increasingly difficult due to the complexity of the codebase and lack of familiarity with Fortran. This version aims to be intuitive, lightweight, and well documented, while still being highly interoperable.
 
-Install requirements with `pip install -r requirements.txt`.
+## getting started
+
+Install `mosartwmpy` with:
+```shell
+pip install mosartwmpy
+```
+
+Download a sample input dataset spanning 1980-1985 by running the following and selecting option `1`. This will download and unpack the inputs to your current directory. Note that this data is about 1.5GB in size.
+
+```shell
+python -m mosartwmpy.download
+```
 
-`mosartwmpy` implements the [Basic Model Interface](https://csdms.colorado.edu/wiki/BMI) defined by the CSDMS, so driving it should be familiar to those accustomed to the BMI:
+Settings are defined by the merger of the `mosartwmpy/config_defaults.yaml` and a user specified file which can override any of the default settings. Create a `config.yaml` file that defines your simulation:
+
+> `config.yaml`
+> ```yaml
+> simulation:
+>   name: tutorial
+>   start_date: 1981-05-24
+>   end_date: 1981-05-26
+> 
+> grid:
+>   path: ./input/domains/MOSART_NLDAS_8th_20160426.nc
+>   land:
+>     path: ./input/domains/domain.lnd.nldas2_0224x0464_c110415.nc
+> 
+> runoff:
+>   read_from_file: true
+>   path: ./input/runoff/Livneh_NLDAS_1980_1985.nc
+> 
+> water_management:
+>   enabled: true
+>   demand:
+>     read_from_file: true
+>     path: ./input/demand/RCP8.5_GCAM_water_demand_1980_1985.nc
+>   reservoirs:
+>     path: ./input/reservoirs/US_reservoir_8th_NLDAS3_updated_20200421.nc
+> ```
+
+`mosartwmpy` implements the [Basic Model Interface](https://csdms.colorado.edu/wiki/BMI) defined by the CSDMS, so driving it should be familiar to those accustomed to the BMI. To launch the simulation, open a python shell and run the following:
 
 ```python
-from datetime import datetime, time
-from mosartwmpy.mosartwmpy import Model
+from mosartwmpy import Model
+
+# path to the configuration yaml file
+config_file = "config.yaml"
 
 # initialize the model
 mosart_wm = Model()
-mosart_wm.initialize()
+mosart_wm.initialize(config_file)
 
 # advance the model one timestep
 mosart_wm.update()
 
-# advance until a specificed timestamp
-mosart_wm.update_until(datetime.combine(datetime(2030, 12, 31), time.max).timestamp())
+# advance until the `simulation.end_date` specified in config.yaml
+mosart_wm.update_until(mosart_wm.get_end_time())
 ```
 
-Settings are defined by the merger of the `config_defaults.yaml` and an optional user specified file which can override any of the default settings:
+Alternatively, one can update the settings via code in the driving script using dot notation:
 
 ```python
-mosart_wm = Model('path/to/config/file.yaml')
+from mosartwmpy import Model
+from datetime import datetime
+
+mosart_wm = Model()
+mosart_wm.initialize()
+
+mosart_wm.config['simulation.name'] = 'Tutorial'
+mosart_wm.config['simulation.start_date'] = datetime(1981, 5, 24)
+mosart_wm.config['simulation.end_date'] = datetime(1985, 5, 26)
+# etc...
 ```
 
-Alternatively, one can update the settings via code in the driving script:
+One can use the usual python plotting libraries to visualize data. Model state and output are stored as one-dimensional numpy ndarrays, so they must be reshaped to visualize two-dimensionally:
 
 ```python
- mosart_wm = Model()
- mosart_wm.initialize()
-
- mosart_wm.config['simulation.name'] = 'Water Management'
- mosart_wm.config['simulation.start_date'] = datetime(1981, 1, 1)
- mosart_wm.config['simulation.end_date'] = datetime(1985, 12, 31)
+import xarray as xr
+import matplotlib.pyplot  as plt
+from mosartwmpy import Model
+
+mosart_wm = Model()
+mosart_wm.initialize('./config.yaml')
+
+mosart_wm.update_until(mosart_wm.get_end_time())
+
+surface_water = mosart_wm.get_value_ptr('surface_water_amount')
+
+# create an xarray from the data, which has some convenience wrappers for matplotlib methods
+data_array = xr.DataArray(
+    surface_water.reshape(mosart_wm.get_grid_shape()),
+    dims=['latitude', 'longitude'],
+    coords={'latitude': mosart_wm.get_grid_x(), 'longitude': mosart_wm.get_grid_y()},
+    name='Surface Water Amount',
+    attrs={'units': mosart_wm.get_var_units('surface_water_amount')}
+)
+
+# plot as a pcolormesh
+data_array.plot(robust=True, levels=32, cmap='winter_r')
+
+plt.show()
+
+```
+
+## model input
+
+Several input files in NetCDF format are required to successfully run a simulation, which are not shipped with this repository due to their large size. The grid files, reservoir files, and a small range of runoff and demand input files can be obtained using the download utility by running `python -m mosartwmpy.download` and choosing option 1 for "sample_input". Currently, all input files are assumed to be at the same resolution (for the sample files this is 1/8 degree over the CONUS). Below is a summary of the various input files:
+
+<table>
+<thead>
+<tr>
+<th>
+    Name
+</th>
+<th>
+    Description
+</th>
+<th>
+    Configuration Path
+</th>
+<th>
+    Notes
+</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>
+    Grid
+</td>
+<td>
+    Spatial constants dimensioned by latitude and longitude relating to the physical properties of the river channels 
+</td>
+<td>
+    <code>grid.path</code>
+</td>
+<td/>
+</tr>
+<tr>
+<td>
+    Land Fraction
+</td>
+<td>
+    Fraction of grid cell that is land (as opposed to i.e. ocean water) dimensioned by latitude and longitude 
+</td>
+<td>
+    <code>grid.land.path</code>
+</td>
+<td>
+    As a TODO item, this variable should be merged into the grid file (historically it was separate for the coupled land model)
+</td>
+</tr>
+<tr>
+<td>
+    Reservoirs
+</td>
+<td>
+    Locations of reservoirs (possibly aggregated) and their physical and political properties
+</td>
+<td>
+    <code>water_management.reservoirs.path</code>
+</td>
+<td/>
+</tr>
+<tr>
+<td>
+    Runoff
+</td>
+<td>
+    Surface runoff, subsurface runoff, and wetland runoff per grid cell averaged per unit of time; used to drive the river routing
+</td>
+<td>
+    <code>runoff.path</code>
+</td>
+<td/>
+</tr>
+<tr>
+<td>
+    Demand
+</td>
+<td>
+    Water demand of grid cells averaged per unit of time; currently assumed to be monthly
+</td>
+<td>
+    <code>water_management.reservoirs.demand</code>
+</td>
+<td>
+    There are plans to support other time scales, such as epiweeks
+</td>
+</tr>
+</tbody>
+</table>
+
+Alternatively, certain model inputs can be set using the BMI interface. This can be useful for coupling `mosartwmpy` with other models. If setting an input that would typically be read from a file, be sure to disable the `read_from_file` configuration value for that input. For example:
+```python
+import numpy as np
+from mosartwmpy import Model
+
+mosart_wm = Model()
+mosart_wm.initialize()
+
+# get a list of model input variables
+mosart_wm.get_input_var_names()
+
+# disable the runoff read_from_file
+mosart_wm.config['runoff.read_from_file'] = False
+
+# set the runoff values manually (i.e. from another model's output)
+surface_runoff = np.empty(mosart_wm.get_grid_size())
+surface_runoff[:] = # <values from coupled model>
+mosart_wm.set_value('surface_runoff_flux', surface_runoff)
+
+# advance one timestep
+mosart_wm.update()
+
+# continue coupling...
 ```
 
-By default, key model variables are output on a monthly basis at a daily averaged resolution to `./output/<simulation name>/<simulation name>_<year>_<month>.nc`. Support for the [CSDMS standard names](https://github.com/csdms/standard_names) will be added shortly, but for now see configuration file for examples of how to modify the outputs, and the `./src/_initialize_state.py` file for state variable names.
+## model output
 
+By default, key model variables are output on a monthly basis at a daily averaged resolution to `./output/<simulation name>/<simulation name>_<year>_<month>.nc`. See the configuration file for examples of how to modify the outputs, and the `./mosartwmpy/state/state.py` file for state variable names.
 
-### input files
+Alternatively, certain model outputs deemed most important can be accessed using the BMI interface methods. For example:
+```python
+import numpy as np
+from mosartwmpy import Model
 
-Several input files in NetCDF format are required to successfuly run a simulation, which are not shipped with this repository due to their large size. The grid files, reservoir files, and a small range of runoff and demand input files are available for public download as a zip archive [here](https://zenodo.org/record/4537907/files/mosartwmpy_sample_input_data_1980_1985.zip?download=1). This data can also be obtained using the download utility by running `python download.py` in the repository root and choosing option 1 for "sample_input". Currently, all input files are assumed to be at the same resolution (for the sample files this is 1/8 degree over the CONUS). Below is a summary of the various input files:
+mosart_wm = Model()
+mosart_wm.initialize()
 
-Name | Description | Configuration Path | Notes
---- | --- | --- | ---
-Grid | Spatial constants dimensioned by latitude and longitude relating to the physical properties of the river channels | `grid.path` |
-Land Fraction | Fraction of grid cell that is land (as opposed to i.e. ocean water) dimensioned by latitude and longitude | `grid.land.path` | as a TODO item, this variable should be merged into the grid file (historically it was separate for the coupled land model)
-Reservoirs | Locations of reservoirs (possibly aggregated) and their physical and political properties | `water_management.reservoirs.path` |
-Runoff | Surface runoff, subsurface runoff, and wetland runoff per grid cell averaged per unit of time; used to drive the river routing | `runoff.path` |
-Demand | Water demand of grid cells averaged per unit of time; currently assumed to be monthly | `water_management.reservoirs.demand` | there are plans to support other time scales, such as epiweeks
+# get a list of model output variables
+mosart_wm.get_output_var_names()
 
+# get the flattened numpy.ndarray of values for an output variable
+supply = mosart_wm.get_value_ptr('supply_water_amount')
+```
 
-### testing and validation
+## testing and validation
 
-Before running the tests or validation, make sure to download the "sample_input" and "validation" datasets using the download utility `python download.py`.
+Before running the tests or validation, make sure to download the "sample_input" and "validation" datasets using the download utility `python -m mosartwmpy.download`.
 
 To execute the tests, run `./test.sh` or `python -m unittest discover mosartwmpy/tests` from the repository root.
 
-To execute the validation, run a model simulation that includes the years 1981 - 1982, note your output directory, and then run `./validation.sh` or `python validation/validate.py` from the repository root. This will ask you for the simulation output directory, think for a moment, and then open a figure with several plots representing the NMAE (Normalized Mean Absolute Error) as a percentage and the spatial sums of several key variables compared between your simulation and the validation scenario. Use these plots to assist you in determining if the changes you have made to the code have caused unintended deviation from the validation scenario. The NMAE should be 0% across time if you have caused no deviations. A non-zero NMAE indicates numerical difference between your simulation and the validation scenario. This might be caused by changes you have made to the code, or alternatively by running a simulation with different configuration or parameters (i.e. larger timestep, fewer iterations, etc). The plots of the spatial sums can assist you in determining what changed and the overall magnitude of the changes.
+To execute the validation, run a model simulation that includes the years 1981 - 1982, note your output directory, and then run `python -m mosartwmpy.validate` from the repository root. This will ask you for the simulation output directory, think for a moment, and then open a figure with several plots representing the NMAE (Normalized Mean Absolute Error) as a percentage and the spatial sums of several key variables compared between your simulation and the validation scenario. Use these plots to assist you in determining if the changes you have made to the code have caused unintended deviation from the validation scenario. The NMAE should be 0% across time if you have caused no deviations. A non-zero NMAE indicates numerical difference between your simulation and the validation scenario. This might be caused by changes you have made to the code, or alternatively by running a simulation with different configuration or parameters (i.e. larger timestep, fewer iterations, etc). The plots of the spatial sums can assist you in determining what changed and the overall magnitude of the changes.
 
 If you wish to merge code changes that intentionally cause significant deviation from the validation scenario, please work with the maintainers to create a new validation dataset.
diff --git a/config.yaml b/config.yaml
@@ -4,9 +4,9 @@ simulation:
     # name this simulation
     name: mosartwmpy demo
     # date to begin the simulation
-    start_date: 1980-01-01
+    start_date: 1981-01-01
     # date to end the simulation
-    end_date: 1980-01-31
+    end_date: 1981-01-31
 
 # For example -- to change the input files, update the paths below:
 
@@ -17,8 +17,8 @@ simulation:
 #       # path to the land grid domain file; can be absolute or relative to the source code root
 #       path: /pic/projects/im3/iwmm/share/domains/domain.clm/domain.lnd.nldas2_0224x0464_c110415.nc
 # runoff:
-#     # flag to turn this feature on or off
-#     enabled: true
+#     # whether or not to read runoff input from file
+#     read_from_file: true
 #     # path to the runoff file; can be absolute or relative to the source code root
 #     path: /pic/projects/im3/iwmm/lnd/dlnd7/NLDAS/Livneh_NLDAS_1980_2011.nc
 # water_management:
@@ -30,5 +30,3 @@ simulation:
 #     reservoirs:
 #       # path to the reservoir parameter file; can be absolute or relative to the source code root
 #       path: /pic/projects/im3/iwmm/input_data/runoff/US_reservoir_8th_NLDAS3_updated_20200421.nc
-# multiprocessing:
-#     enabled: false
diff --git a/docs/index.rst b/docs/index.rst
@@ -1,5 +1,5 @@
-Contents
-========
+mosartwmpy
+==========
 
 Introduction
 ------------

diff --git a/launch.py b/launch.py
@@ -9,4 +9,4 @@
     # launch simulation
     mosart_wm = Model()
     mosart_wm.initialize('./config.yaml')
-    mosart_wm.update_until(mosart_wm.get_end_time())
+    mosart_wm.update_until(mosart_wm.get_end_time())
diff --git a/mosartwmpy/__init__.py b/mosartwmpy/__init__.py
@@ -1 +1 @@
-from .model import Model
+from .model import Model
diff --git a/mosartwmpy/config/__init__.py b/mosartwmpy/config/__init__.py
diff --git a/mosartwmpy/config/config.py b/mosartwmpy/config/config.py
@@ -1,6 +1,8 @@
+import pkg_resources
 from benedict import benedict
 from benedict.dicts import benedict as Benedict
 
+
 def get_config(config_file_path: str) -> Benedict:
     """Configuration object for the model, using the Benedict type.
     
@@ -10,9 +12,8 @@ def get_config(config_file_path: str) -> Benedict:
     Returns:
         Benedict: A Benedict instance containing the merged configuration
     """
+    config = benedict(pkg_resources.resource_filename('mosartwmpy', 'config_defaults.yaml'), format='yaml')
+    if config_file_path is not None and config_file_path != '':
+        config.merge(benedict(str(config_file_path), format='yaml'), overwrite=True)
 
-    config = benedict('./config_defaults.yaml', format='yaml')
-    if config_file_path and config_file_path != '':
-        config.merge(benedict(config_file_path, format='yaml'), overwrite=True)
-
-    return config
+    return config