diff --git a/README.md b/README.md index 62cee1c..2bf114a 100644 --- a/README.md +++ b/README.md @@ -41,10 +41,28 @@ Alternatively, one can update the settings via code in the driving script: mosart_wm.config['simulation.end_date'] = datetime(1985, 12, 31) ``` -By default, key model variables are output on a monthly basis at a daily averaged resolution to `./output//__.nc`. Support for the [CSDMS standard names](https://github.com/csdms/standard_names) will be added shortly, but for now see configuration file and the `./src/_initialize_state.py` file for examples of how to modify the outputs. +By default, key model variables are output on a monthly basis at a daily averaged resolution to `./output//__.nc`. Support for the [CSDMS standard names](https://github.com/csdms/standard_names) will be added shortly, but for now see configuration file for examples of how to modify the outputs, and the `./src/_initialize_state.py` file for state variable names. + ### input files -More details will be added to this section soon. +Several input files in NetCDF format are required to successfuly run a simulation, which are not shipped with this repository due to their large size. The grid files, reservoir files, and a small range of runoff and demand input files are available for public download as a zip archive [here](https://zenodo.org/record/4537907/files/mosartwmpy_sample_input_data_1980_1985.zip?download=1). This data can also be obtained using the download utility by running `python download.py` in the repository root and choosing option 1 for "sample_input". Currently, all input files are assumed to be at the same resolution (for the sample files this is 1/8 degree over the CONUS). Below is a summary of the various input files: + +Name | Description | Configuration Path | Notes +--- | --- | --- | --- +Grid | Spatial constants dimensioned by latitude and longitude relating to the physical properties of the river channels | `grid.path` | +Land Fraction | Fraction of grid cell that is land (as opposed to i.e. ocean water) dimensioned by latitude and longitude | `grid.land.path` | as a TODO item, this variable should be merged into the grid file (historically it was separate for the coupled land model) +Reservoirs | Locations of reservoirs (possibly aggregated) and their physical and political properties | `water_management.reservoirs.path` | +Runoff | Surface runoff, subsurface runoff, and wetland runoff per grid cell averaged per unit of time; used to drive the river routing | `runoff.path` | +Demand | Water demand of grid cells averaged per unit of time; currently assumed to be monthly | `water_management.reservoirs.demand` | there are plans to support other time scales, such as epiweeks + + +### testing and validation + +Before running the tests or validation, make sure to download the "sample_input" and "validation" datasets using the download utility `python download.py`. + +To execute the tests, run `./test.sh` or `python -m unittest discover mosartwmpy/tests` from the repository root. + +To execute the validation, run a model simulation that includes the years 1981 - 1982, note your output directory, and then run `./validation.sh` or `python validation/validate.py` from the repository root. This will ask you for the simulation output directory, think for a moment, and then open a figure with several plots representing the NMAE (Normalized Mean Absolute Error) as a percentage and the spatial sums of several key variables compared between your simulation and the validation scenario. Use these plots to assist you in determining if the changes you have made to the code have caused unintended deviation from the validation scenario. The NMAE should be 0% across time if you have caused no deviations. A non-zero NMAE indicates numerical difference between your simulation and the validation scenario. This might be caused by changes you have made to the code, or alternatively by running a simulation with different configuration or parameters (i.e. larger timestep, fewer iterations, etc). The plots of the spatial sums can assist you in determining what changed and the overall magnitude of the changes. -Several input files are required to successfuly run a simulation, which are not shipped with this repository due to their large size. For now, please reach out to the developers for sample input files. A script will soon be added to download a small subset of input files and validated output files from a public data repository. If you have access to PNNL's Constance, you can download a small subset of inputs at: `/people/thur961/mosartwmpy_input.zip`. +If you wish to merge code changes that intentionally cause significant deviation from the validation scenario, please work with the maintainers to create a new validation dataset. \ No newline at end of file diff --git a/mosartwmpy/grid/grid.py b/mosartwmpy/grid/grid.py index da908f8..bf815ca 100644 --- a/mosartwmpy/grid/grid.py +++ b/mosartwmpy/grid/grid.py @@ -1,10 +1,13 @@ import logging import numpy as np import pandas as pd +import pickle +import tempfile import xarray as xr -from xarray import open_dataset from benedict.dicts import benedict as Benedict +from xarray import open_dataset +from zipfile import ZIP_DEFLATED, ZipFile from mosartwmpy.config.parameters import Parameters from mosartwmpy.reservoirs.grid import load_reservoirs @@ -12,6 +15,69 @@ class Grid(): """Class to store grid related values that are constant throughout a simulation.""" + # initialize all properties + id: np.ndarray = np.empty(0) + downstream_id: np.ndarray = np.empty(0) + longitude: np.ndarray = np.empty(0) + latitude: np.ndarray = np.empty(0) + unique_longitudes: np.ndarray = np.empty(0) + unique_latitudes: np.ndarray = np.empty(0) + cell_count: int = 0 + longitude_spacing: float = 0.0 + latitude_spacing: float = 0.0 + land_mask: np.ndarray = np.empty(0) + mosart_mask: np.ndarray = np.empty(0) + area: np.ndarray = np.empty(0) + outlet_id: np.ndarray = np.empty(0) + upstream_id: np.ndarray = np.empty(0) + upstream_cell_count: np.ndarray = np.empty(0) + land_fraction: np.ndarray = np.empty(0) + iterations_main_channel: np.ndarray = np.empty(0) + iterations_subnetwork: np.ndarray = np.empty(0) + drainage_fraction: np.ndarray = np.empty(0) + local_drainage_area: np.ndarray = np.empty(0) + total_drainage_area_multi: np.ndarray = np.empty(0) + total_drainage_area_single: np.ndarray = np.empty(0) + flow_direction: np.ndarray = np.empty(0) + hillslope_manning: np.ndarray = np.empty(0) + subnetwork_manning: np.ndarray = np.empty(0) + channel_manning: np.ndarray = np.empty(0) + hillslope: np.ndarray = np.empty(0) + hillslope_length: np.ndarray = np.empty(0) + drainage_density: np.ndarray = np.empty(0) + subnetwork_slope: np.ndarray = np.empty(0) + subnetwork_width: np.ndarray = np.empty(0) + subnetwork_length: np.ndarray = np.empty(0) + channel_length: np.ndarray = np.empty(0) + channel_slope: np.ndarray = np.empty(0) + channel_width: np.ndarray = np.empty(0) + channel_floodplain_width: np.ndarray = np.empty(0) + total_channel_length: np.ndarray = np.empty(0) + grid_channel_depth: np.ndarray = np.empty(0) + + # Reservoir related properties + reservoir_id: np.ndarray = np.empty(0) + reservoir_runoff_capacity: np.ndarray = np.empty(0) + reservoir_height: np.ndarray = np.empty(0) + reservoir_length: np.ndarray = np.empty(0) + reservoir_surface_area: np.ndarray = np.empty(0) + reservoir_storage_capacity: np.ndarray = np.empty(0) + reservoir_depth: np.ndarray = np.empty(0) + reservoir_use_irrigation: np.ndarray = np.empty(0) + reservoir_use_electricity: np.ndarray = np.empty(0) + reservoir_use_supply: np.ndarray = np.empty(0) + reservoir_use_flood_control: np.ndarray = np.empty(0) + reservoir_use_recreation: np.ndarray = np.empty(0) + reservoir_use_navigation: np.ndarray = np.empty(0) + reservoir_use_fish_protection: np.ndarray = np.empty(0) + reservoir_withdrawal: np.ndarray = np.empty(0) + reservoir_conveyance: np.ndarray = np.empty(0) + reservoir_count: np.ndarray = np.empty(0) + reservoir_to_grid_mapping: pd.DataFrame = pd.DataFrame() + reservoir_streamflow_schedule: xr.DataArray = xr.DataArray() + reservoir_demand_schedule: xr.DataArray = xr.DataArray() + reservoir_prerelease_schedule: xr.DataArray = xr.DataArray() + def __init__(self, config: Benedict = None, parameters: Parameters = None, empty: bool = False): """Initialize the Grid class. @@ -25,49 +91,6 @@ def __init__(self, config: Benedict = None, parameters: Parameters = None, empty if empty: return - # initialize all properties - self.drainage_fraction = np.empty(0) - self.local_drainage_area = np.empty(0) - self.total_drainage_area_multi = np.empty(0) - self.total_drainage_area_single = np.empty(0) - self.id = np.empty(0) - self.downstream_id = np.empty(0) - self.flow_direction = np.empty(0) - self.hillslope_manning = np.empty(0) - self.subnetwork_manning = np.empty(0) - self.channel_manning = np.empty(0) - self.hillslope = np.empty(0) - self.drainage_density = np.empty(0) - self.subnetwork_slope = np.empty(0) - self.subnetwork_width = np.empty(0) - self.channel_length = np.empty(0) - self.channel_slope = np.empty(0) - self.channel_width = np.empty(0) - self.channel_floodplain_width = np.empty(0) - self.grid_channel_depth = np.empty(0) - if config.get('water_management.enabled', False): - self.reservoir_id = np.empty(0) - self.reservoir_runoff_capacity = np.empty(0) - self.reservoir_height = np.empty(0) - self.reservoir_length = np.empty(0) - self.reservoir_surface_area = np.empty(0) - self.reservoir_storage_capacity = np.empty(0) - self.reservoir_depth = np.empty(0) - self.reservoir_use_irrigation = np.empty(0) - self.reservoir_use_electricity = np.empty(0) - self.reservoir_use_supply = np.empty(0) - self.reservoir_use_flood_control = np.empty(0) - self.reservoir_use_recreation = np.empty(0) - self.reservoir_use_navigation = np.empty(0) - self.reservoir_use_fish_protection = np.empty(0) - self.reservoir_withdrawal = np.empty(0) - self.reservoir_conveyance = np.empty(0) - self.reservoir_count = np.empty(0) - self.reservoir_to_grid_mapping = pd.DataFrame() - self.reservoir_streamflow_schedule = xr.DataArray() - self.reservoir_demand_schedule = xr.DataArray() - self.reservoir_prerelease_schedule = xr.DataArray() - logging.info('Loading grid file.') # open dataset @@ -316,4 +339,111 @@ def __init__(self, config: Benedict = None, parameters: Parameters = None, empty # note that reservoir grid is assumed to be the same as the domain grid if config.get('water_management.enabled', False): logging.debug(' - reservoirs') - load_reservoirs(self, config, parameters) \ No newline at end of file + load_reservoirs(self, config, parameters) + + def to_files(self, path: str) -> None: + """Builds a dataframe from all the grid values. + + Args: + path (str): the file path to save the grid zip file to + """ + + if not path.endswith('.zip'): + path += '.zip' + + keys = dir(self) + + paths = [] + names = [] + + # handle special cases + special = ['unique_longitudes', 'unique_latitudes', 'cell_count', 'longitude_spacing', 'latitude_spacing'] + to_pickle = {} + for key in special: + keys.remove(key) + to_pickle[key] = getattr(self, key) + + # handle numpy arrrays + npdf = pd.DataFrame() + for key in [key for key in keys if isinstance(getattr(self, key), np.ndarray)]: + if getattr(self, key).size > 0: + npdf[key] = getattr(self, key) + + # handle dataframes + dfs = [] + for key in [key for key in keys if isinstance(getattr(self, key), pd.DataFrame)]: + if getattr(self, key).size > 0: + dfs.append({ + 'key': key, + 'frame': getattr(self, key) + }) + + # handle xarrays + xrs = [] + for key in [key for key in keys if isinstance(getattr(self, key), xr.DataArray)]: + if getattr(self, key).size > 0: + xrs.append({ + 'key': key, + 'data_array': getattr(self, key) + }) + + # write them all to files and zip + with tempfile.TemporaryDirectory() as tmpdir: + names.append('special.pickle') + paths.append(f'{tmpdir}/{names[-1]}') + with open(paths[-1], 'wb') as file: + pickle.dump(to_pickle, file) + names.append('np.feather') + paths.append(f'{tmpdir}/{names[-1]}') + npdf.to_feather(paths[-1]) + for df in dfs: + names.append(f'{df["key"]}.df.nc') + paths.append(f'{tmpdir}/{names[-1]}') + df['frame'].to_xarray().to_netcdf(paths[-1], engine='h5netcdf') + for ds in xrs: + names.append(f'{ds["key"]}.xr.nc') + paths.append(f'{tmpdir}/{names[-1]}') + ds['data_array'].to_netcdf(paths[-1], engine='h5netcdf') + with ZipFile(path, 'w', compression=ZIP_DEFLATED, compresslevel=9) as zip: + for i, filename in enumerate(paths): + zip.write(filename, names[i]) + + @staticmethod + def from_files(path: str) -> 'Grid': + """Creates a Grid instance from columns in a dataframe. + + Args: + path (str): the file path to the zip file to load the grid from + + Returns: + Grid: a Grid instance populated with the columns from the dataframe + """ + if not path.endswith('.zip'): + path += '.zip' + + grid = Grid(empty=True) + + with ZipFile(path, 'r') as zip: + for filename in zip.namelist(): + with zip.open(filename) as file: + if filename.endswith('.pickle'): + from_pickle = pickle.load(file) + for key in from_pickle.keys(): + setattr(grid, key, from_pickle[key]) + if filename.endswith('np.feather'): + npdf = pd.read_feather(file) + for key in npdf.columns: + setattr(grid, key, npdf[key].values) + if filename.endswith('df.nc'): + key = filename.split('.')[0] + ds = xr.open_dataset(file, engine='h5netcdf') + df = ds.to_dataframe() + setattr(grid, key, df) + ds.close() + if filename.endswith('xr.nc'): + key = filename.split('.')[0] + ds = xr.open_dataarray(file, engine='h5netcdf').load() + setattr(grid, key, ds) + ds.close() + + return grid \ No newline at end of file diff --git a/mosartwmpy/model.py b/mosartwmpy/model.py index b19214c..3a14d64 100644 --- a/mosartwmpy/model.py +++ b/mosartwmpy/model.py @@ -59,7 +59,7 @@ def __init__(self): self.git_hash = None self.git_untracked = None - def initialize(self, config_file_path: str) -> None: + def initialize(self, config_file_path: str, grid: Grid = None, state: State = None) -> None: t = timer() @@ -98,38 +98,44 @@ def initialize(self, config_file_path: str) -> None: raise e # load grid - try: - self.grid = Grid(config=self.config, parameters=self.parameters) - except Exception as e: - logging.exception('Failed to load grid file; see below for stacktrace.') - raise e + if grid is not None: + self.grid = grid + else: + try: + self.grid = Grid(config=self.config, parameters=self.parameters) + except Exception as e: + logging.exception('Failed to load grid file; see below for stacktrace.') + raise e # load restart file or initialize state - try: - # restart file - if self.config.get('simulation.restart_file') is not None and self.config.get('simulation.restart_file') != '': - path = self.config.get('simulation.restart_file') - logging.info(f'Loading restart file from: `{path}`.') - # set simulation start time based on file name - date = re.search(r'\d{4}_\d{2}_\d{2}', path) - if date: - date = date[len(date) - 1].split('_') - self.current_time = datetime(int(date[0]), int(date[1]), int(date[2])) + if state is not None: + self.state = state + else: + try: + # restart file + if self.config.get('simulation.restart_file') is not None and self.config.get('simulation.restart_file') != '': + path = self.config.get('simulation.restart_file') + logging.info(f'Loading restart file from: `{path}`.') + # set simulation start time based on file name + date = re.search(r'\d{4}_\d{2}_\d{2}', path) + if date: + date = date[len(date) - 1].split('_') + self.current_time = datetime(int(date[0]), int(date[1]), int(date[2])) + else: + logging.warn('Unable to parse date from restart file name, falling back to configured start date.') + self.current_time = datetime.combine(self.config.get('simulation.start_date'), time.min) + x = open_dataset(path) + self.state = State.from_dataframe(x.to_dataframe()) + x.close() + # TODO ensure output file writes still workr else: - logging.warn('Unable to parse date from restart file name, falling back to configured start date.') + # simulation start time self.current_time = datetime.combine(self.config.get('simulation.start_date'), time.min) - x = open_dataset(path) - self.state = State.from_dataframe(x.to_dataframe()) - x.close() - # TODO ensure output file writes still workr - else: - # simulation start time - self.current_time = datetime.combine(self.config.get('simulation.start_date'), time.min) - # initialize state - self.state = State(grid=self.grid, config=self.config, parameters=self.parameters, grid_size=self.get_grid_size()) - except Exception as e: - logging.exception('Failed to initialize model; see below for stacktrace.') - raise e + # initialize state + self.state = State(grid=self.grid, config=self.config, parameters=self.parameters, grid_size=self.get_grid_size()) + except Exception as e: + logging.exception('Failed to initialize model; see below for stacktrace.') + raise e # setup output file averaging try: diff --git a/mosartwmpy/reservoirs/grid.py b/mosartwmpy/reservoirs/grid.py index e7c7dc3..b6ae2e3 100644 --- a/mosartwmpy/reservoirs/grid.py +++ b/mosartwmpy/reservoirs/grid.py @@ -3,7 +3,6 @@ import pandas as pd from xarray import concat, open_dataset, Dataset -from xarray.ufuncs import logical_not from benedict.dicts import benedict as Benedict from mosartwmpy.config.parameters import Parameters @@ -124,13 +123,13 @@ def prepare_reservoir_schedule(self, config: Benedict, parameters: Parameters, r # note that xarray `where` modifies the false values condition = (demand_avg >= (0.5 * flow_avg)) & (flow_avg > 0) prerelease = prerelease.where( - logical_not(condition), + ~condition, demand_avg/ 10 + 9 / 10 * flow_avg * self.reservoir_demand_schedule / demand_avg ) prerelease = prerelease.where( condition, prerelease.where( - logical_not((flow_avg + self.reservoir_demand_schedule - demand_avg) > 0), + ~((flow_avg + self.reservoir_demand_schedule - demand_avg) > 0), flow_avg + self.reservoir_demand_schedule - demand_avg ) ) diff --git a/mosartwmpy/tests/__init__.py b/mosartwmpy/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mosartwmpy/tests/demand_1981_01_01.nc b/mosartwmpy/tests/demand_1981_01_01.nc new file mode 100644 index 0000000..60aa622 Binary files /dev/null and b/mosartwmpy/tests/demand_1981_01_01.nc differ diff --git a/mosartwmpy/tests/grid.zip b/mosartwmpy/tests/grid.zip new file mode 100644 index 0000000..7e1a50a Binary files /dev/null and b/mosartwmpy/tests/grid.zip differ diff --git a/mosartwmpy/tests/reservoirs.nc b/mosartwmpy/tests/reservoirs.nc new file mode 100644 index 0000000..142630d Binary files /dev/null and b/mosartwmpy/tests/reservoirs.nc differ diff --git a/mosartwmpy/tests/runoff_1981_01_01.nc b/mosartwmpy/tests/runoff_1981_01_01.nc new file mode 100644 index 0000000..a14a4fe Binary files /dev/null and b/mosartwmpy/tests/runoff_1981_01_01.nc differ diff --git a/mosartwmpy/tests/test_config.yaml b/mosartwmpy/tests/test_config.yaml new file mode 100644 index 0000000..6370e00 --- /dev/null +++ b/mosartwmpy/tests/test_config.yaml @@ -0,0 +1,10 @@ +simulation: + name: unit_tests + start_date: 1981-01-01 +runoff: + path: ./mosartwmpy/tests/runoff_1981_01_01.nc +water_management: + demand: + path: ./mosartwmpy/tests/demand_1981_01_01.nc + reservoirs: + path: ./mosartwmpy/tests/reservoirs.nc \ No newline at end of file diff --git a/mosartwmpy/tests/test_example.py b/mosartwmpy/tests/test_example.py deleted file mode 100644 index eca6d89..0000000 --- a/mosartwmpy/tests/test_example.py +++ /dev/null @@ -1,14 +0,0 @@ -import unittest - - -class TestExample(unittest.TestCase): - """Sample test case to be deleted.""" - - def test_something(self): - - # make test pass - self.assertEqual(True, True) - - -if __name__ == '__main__': - unittest.main() diff --git a/mosartwmpy/tests/test_model.py b/mosartwmpy/tests/test_model.py new file mode 100644 index 0000000..2b8f82e --- /dev/null +++ b/mosartwmpy/tests/test_model.py @@ -0,0 +1,22 @@ +import numpy as np +import unittest + +from mosartwmpy import Model +from mosartwmpy.grid.grid import Grid +from mosartwmpy.state.state import State + + +class ModelTest(unittest.TestCase): + """Test that the model initializes and runs with the default settings.""" + + def setUp(self): + self.model = Model() + self.grid = Grid.from_files('./mosartwmpy/tests/grid.zip') + + def test_can_initialize_and_run(self): + self.model.initialize('./mosartwmpy/tests/test_config.yaml', grid = self.grid) + self.model.update() + self.assertTrue(True, "model initializes and updates") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 9315e33..e90245d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ bmipy==2.0 dask[complete]==2021.01.1 epiweeks==2.1.2 fastparquet==0.4.1 +h5netcdf==0.10.0 matplotlib==3.3.3 nc-time-axis==1.2.0 netCDF4==1.5.4 @@ -11,5 +12,6 @@ numpy==1.19.4 pandas==1.1.4 pathvalidate==2.3.0 psutil==5.7.3 +pyarrow==3.0.0 python-benedict==0.22.0 xarray==0.16.1 \ No newline at end of file diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..88d96b8 --- /dev/null +++ b/test.sh @@ -0,0 +1 @@ +python -m unittest discover mosartwmpy \ No newline at end of file diff --git a/validate.sh b/validate.sh new file mode 100755 index 0000000..7b6d274 --- /dev/null +++ b/validate.sh @@ -0,0 +1 @@ +python validation/validate.py \ No newline at end of file diff --git a/validation/month_validate.py b/validation/month_validate.py deleted file mode 100644 index 39db414..0000000 --- a/validation/month_validate.py +++ /dev/null @@ -1,14 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt -from xarray import open_dataset -vars = ['RIVER_DISCHARGE_OVER_LAND_LIQ', 'STORAGE_LIQ', 'QSUR_LIQ', 'QSUB_LIQ', 'WRM_DEMAND', 'WRM_DEMAND0', 'WRM_STORAGE', 'WRM_SUPPLY'] -b = open_dataset('validation/wm-validation-1981-1982.nc') -p = open_dataset('output/Fiddling/Fiddling_1981_01.nc') -for var in vars: - nmae = np.zeros(31) - for t in np.arange(31): - nmae[t] = 100 * np.fabs(b[var][t,:,:] - p[var][t,:,:]).sum() / b[var][t,:,:].sum() - f = plt.figure() - plt.plot(nmae) - plt.title(var) - f.show() diff --git a/validation/validate.py b/validation/validate.py index 5e64dd6..cb18235 100644 --- a/validation/validate.py +++ b/validation/validate.py @@ -1,27 +1,25 @@ import matplotlib.pyplot as plt import numpy as np import os -import sys from xarray import open_mfdataset -from xarray.ufuncs import fabs # TODO accept command line path input as alternative # TODO accept command line year input # TODO allow easily toggling between scenarios for variables of interest (no-wm, wm, heat, etc) years = [1981, 1982] -baseline_data_path = 'validation/wm-validation-1981-1982.nc' +baseline_data_path = 'validation/mosartwmpy_validation_wm_1981_1982.nc' variables_of_interest = ['STORAGE_LIQ', 'RIVER_DISCHARGE_OVER_LAND_LIQ', 'WRM_STORAGE', 'WRM_SUPPLY'] physical_dimensions = ['lat', 'lon'] temporal_dimension = 'time' print() -print("🎶 Wolfgang Mosart Validation 🎶") +print("🎶 MOSARTWMPY VALIDATION 🎶") print() print("Thanks for validating your code changes!") -print(f"This tool expects that you have generated data for the years {years}.") +print(f"This tool expects that you have generated data within the years {years}.") print("Please open an issue on GitHub if you have any trouble or suggestions.") -print("https://github.com/IMMM-SFA/wolfgang") +print("https://github.com/IMMM-SFA/mosartwmpy") print() data_path = input("Where are your output files located? Enter the absolute path or path relative to the wolfgang root directory: ") @@ -31,9 +29,10 @@ data = open_mfdataset(f"{data_path}/*.nc" if data_path[-3:] != '.nc' else data_path) try: - data = data.sel({ temporal_dimension: slice(f"{years[0]}-01-02", f"{years[-1]}") }) + data = data.sel({ temporal_dimension: slice(f"{years[0]}", f"{years[-1]}") }) + timeslice = slice(data[temporal_dimension].values[0], data[temporal_dimension].values[len(data[temporal_dimension].values) - 1]) baseline_data = open_mfdataset(baseline_data_path) - baseline_data = baseline_data.sel({ temporal_dimension: slice(f"{years[0]}-01-02", f"{years[-1]}") }) + baseline_data = baseline_data.sel({ temporal_dimension: timeslice }) except: print(f"Either your data or the baseline data does not appear to include years {years}.") print("Please double check and try again or update this code to look for a different year.") @@ -51,19 +50,29 @@ data[temporal_dimension] = data.indexes[temporal_dimension].normalize() baseline_data[temporal_dimension] = baseline_data.indexes[temporal_dimension].normalize() -nmae = 100 * fabs(baseline_data - data).sum(dim=physical_dimensions) / baseline_data.sum(dim=physical_dimensions) +nmae = 100.0 * np.abs(baseline_data - data).sum(dim=physical_dimensions) / baseline_data.sum(dim=physical_dimensions) +data_sums = data.sum(dim=physical_dimensions) +baseline_sums = baseline_data.sum(dim=physical_dimensions) -figure, axes = plt.subplots(int(np.ceil(len(variables_of_interest) / 2)), int(np.floor(len(variables_of_interest) / 2))) +figure, axes = plt.subplots(len(variables_of_interest), 2) -for i, axis in enumerate(axes.flatten()): +for i in range(len(variables_of_interest)): v = variables_of_interest[i] + axis = axes[i, 0] nmae[v].plot(ax=axis) axis.set_ylabel(None) axis.set_xlabel(None) axis.set_ylim([0, 10]) - axis.set_title(f'{v}') + axis.set_title(v) + axis = axes[i, 1] + baseline_sums[v].plot(ax=axis) + data_sums[v].plot(ax=axis) + axis.set_ylabel(None) + axis.set_xlabel(None) + axis.set_title(f'Sum of {v}') + axis.legend(['baseline', 'validation']) figure.text(0.5, 0.04, 'time', ha='center') figure.text(0.04, 0.5, 'NMAE (%)', va='center', rotation='vertical') figure.tight_layout() -plt.show() +plt.show() \ No newline at end of file