Allow for flexibility to plot multiple datasets together (#102)

* start of this data driver change * Trying to see what fails * Debugging changes * More changes * Small typo * One more issue * Norms * missing norm * Attempt at new test to plot two datatypes together * Make plot better * Fix title typo * YAML lint trap * Notebook test fix 1 * change bokeh * Update bokeh version to fix CI * Increment to minor version * Changes after develop merge
JCSDA-internal · May 19, 2023 · 487b51d · 487b51d
1 parent 579f787
commit 487b51d
Show file tree

Hide file tree

Showing 22 changed files with 609 additions and 486 deletions.
diff --git a/.github/workflows/eva_tests_notebook.yml b/.github/workflows/eva_tests_notebook.yml
@@ -13,10 +13,10 @@ jobs:
     steps:
 
     # Setup Python
-    - name: Set up Python 3.9
+    - name: Set up Python 3.10
       uses: actions/setup-python@v2
       with:
-        python-version: 3.9
+        python-version: 3.10.10
 
     # Update conda
     - name: Update conda

diff --git a/requirements-github.txt b/requirements-github.txt
@@ -8,4 +8,4 @@ xarray==2022.6.0
 seaborn==0.12.2
 hvplot==0.8.2
 nbconvert==6.5.4
-bokeh==2.4.3
+bokeh==3.1.1
diff --git a/setup.py b/setup.py
@@ -16,7 +16,7 @@
 
 setuptools.setup(
     name='eva',
-    version='1.3.5',
+    version='1.4.0',
     author='Community owned code',
     description='Evaluation and Verification of an Analysis',
     url='https://github.com/JCSDA-internal/eva',

diff --git a/src/eva/data/cubed_sphere_restart.py b/src/eva/data/cubed_sphere_restart.py
@@ -54,63 +54,62 @@ class CubedSphereRestart(EvaBase):
 
     # ----------------------------------------------------------------------------------------------
 
-    def execute(self, data_collections, timing):
+    def execute(self, dataset_config, data_collections, timing):
 
-        for dataset in self.config.get('datasets'):
-            # Filenames to be read into this collection
-            # -----------------------------------------
-            fv3_filenames = get(dataset, self.logger, 'fv3_filenames')
-            orog_filenames = get(dataset, self.logger, 'orog_filenames')
+        # Filenames to be read into this collection
+        # -----------------------------------------
+        fv3_filenames = get(dataset_config, self.logger, 'fv3_filenames')
+        orog_filenames = get(dataset_config, self.logger, 'orog_filenames')
 
-            # File variable type
-            variable = get(dataset, self.logger, 'variable')
+        # File variable type
+        variable = get(dataset_config, self.logger, 'variable')
 
-            # File resolution
-            resolution = get(dataset, self.logger, 'resolution')
-            resolution = int(resolution.replace('C', ''))
+        # File resolution
+        resolution = get(dataset_config, self.logger, 'resolution')
+        resolution = int(resolution.replace('C', ''))
 
-            # Get missing value threshold
-            # ---------------------------
-            threshold = float(get(dataset, self.logger, 'missing_value_threshold', 1.0e30))
+        # Get missing value threshold
+        # ---------------------------
+        threshold = float(get(dataset_config, self.logger, 'missing_value_threshold', 1.0e30))
 
-            # Get the groups to be read
-            # -------------------------
-            groups = get(dataset, self.logger, 'groups')
+        # Get the groups to be read
+        # -------------------------
+        groups = get(dataset_config, self.logger, 'groups')
 
-            for group in groups:
+        for group in groups:
 
-                # Group name and variables
-                group_name = get(group, self.logger, 'name')
-                group_vars = get(group, self.logger, 'variables', 'all')
+            # Group name and variables
+            group_name = get(group, self.logger, 'name')
+            group_vars = get(group, self.logger, 'variables', 'all')
 
-                # Set the collection name
-                collection_name = dataset['name']
+            # Set the collection name
+            collection_name = dataset_config['name']
 
-                var_dict = {}
+            var_dict = {}
 
-                # Loop through group vars to create data dictionary
-                for var in group_vars:
-                    if var in ['geolon', 'geolat']:
-                        var_dict[group_name + '::' + var] = (["lon", "lat", "tile"],
-                                                             read_nc(orog_filenames, var,
-                                                                     resolution, self.logger))
+            # Loop through group vars to create data dictionary
+            for var in group_vars:
+                if var in ['geolon', 'geolat']:
+                    var_dict[group_name + '::' + var] = (["lon", "lat", "tile"],
+                                                         read_nc(orog_filenames, var,
+                                                                 resolution, self.logger))
 
-                    else:
-                        var_dict[group_name + '::' + var] = (["lon", "lat", "tile"],
-                                                             read_nc(fv3_filenames, var,
-                                                                     resolution, self.logger))
+                else:
+                    var_dict[group_name + '::' + var] = (["lon", "lat", "tile"],
+                                                         read_nc(fv3_filenames, var,
+                                                                 resolution, self.logger))
 
-                # Create dataset from data dictionary
-                ds = xr.Dataset(var_dict)
+            # Create dataset_config from data dictionary
+            ds = xr.Dataset(var_dict)
 
-                # Assert that the collection contains at least one variable
-                if not ds.keys():
-                    self.logger.abort('Collection \'' + dataset['name'] + '\', group \'' +
-                                      group_name + '\' in file ' + filename +
-                                      ' does not have any variables.')
+            # Assert that the collection contains at least one variable
+            if not ds.keys():
+                self.logger.abort('Collection \'' + dataset_config['name'] + '\', group \'' +
+                                  group_name + '\' in file ' + filename +
+                                  ' does not have any variables.')
 
-            # Add the dataset to the collections
-            data_collections.create_or_add_to_collection(collection_name, ds)
+        # Add the dataset_config to the collections
+        data_collections.create_or_add_to_collection(collection_name, ds)
 
         # Nan out unphysical values
         data_collections.nan_float_values_outside_threshold(threshold)

diff --git a/src/eva/data/data_driver.py b/src/eva/data/data_driver.py
@@ -0,0 +1,58 @@
+# (C) Copyright 2021-2023 NOAA/NWS/EMC
+#
+# (C) Copyright 2021-2023 United States Government as represented by the Administrator of the
+# National Aeronautics and Space Administration. All Rights Reserved.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+
+
+# --------------------------------------------------------------------------------------------------
+
+
+from eva.utilities.config import get
+from eva.eva_base import EvaBase, EvaFactory
+
+import importlib
+import os
+
+
+# --------------------------------------------------------------------------------------------------
+
+
+class DataDriver(EvaBase):
+
+    def execute(self, data_collections, timing):
+
+        # Get list of dataset dictionaries
+        datasets = get(self.config['data'], self.logger, 'datasets')
+
+        # Loop over datasets
+        for dataset in datasets:
+
+            # Extract name for this diagnostic data type
+            try:
+                eva_data_class_name = dataset['type']
+            except Exception as e:
+                msg = '\'type\' key not found. \'diagnostic_data_config\': ' \
+                    f'{diagnostic_data_config}, error: {e}'
+                raise KeyError(msg)
+
+            # Create the data object
+            creator = EvaFactory()
+            timing.start('DataObjectConstructor')
+            eva_data_object = creator.create_eva_object(eva_data_class_name,
+                                                        'data',
+                                                        dataset,
+                                                        self.logger,
+                                                        timing)
+            timing.stop('DataObjectConstructor')
+
+            # Prepare diagnostic data
+            self.logger.info(f'Running execute for {eva_data_object.name}')
+            timing.start('DataObjectExecute')
+            eva_data_object.execute(dataset, data_collections, timing)
+            timing.stop('DataObjectExecute')
+
+# --------------------------------------------------------------------------------------------------
diff --git a/src/eva/data/eva_interactive.py b/src/eva/data/eva_interactive.py
@@ -33,17 +33,17 @@ def __init__(self):
 
     def load_ioda(self, filename):
         self.filename = filename
-        eva_dict = {'datasets': [{'filenames': [filename],
-                                  'groups': [],
-                                  'missing_value_threshold': 1.0e06,
-                                  'name': self.collection}]}
+        eva_dict = {'filenames': [filename],
+                    'groups': [],
+                    'missing_value_threshold': 1.0e06,
+                    'name': self.collection}
         creator = EvaFactory()
         eva_data_object = creator.create_eva_object('IodaObsSpace',
                                                     'data',
                                                     eva_dict,
                                                     self.logger,
                                                     self.timer)
-        eva_data_object.execute(self.data_collection, self.timer)
+        eva_data_object.execute(eva_dict, self.data_collection, self.timer)
 
     def scatter(self, x, y):
         x_group, x_var = x.split('::')