analysis: Improve log messages, warning and errors (#313)

* analysis: Improve errors Improve the errors in the ema_workbench/analysis, by: - Making them more explicit - Including what the variable value or type not expected currently is - Including what the variable value or type not expected should be - Stating the explicit type of warning or error (DeprecationWarning, ValueError, etc.) - Converting them to F-strings (for better in-code readability) - Formatting warnings and errors with a Capital letter, while leaving log-statements lowercase. This will make debugging easier, allowing more time for model development and experimentation. It also documents explicitly that an empty list [] is allowed for outcomes_to_show in plotting_util.py.
quaquel · Nov 17, 2023 · 0ecf4a8 · 0ecf4a8
1 parent 2cfca39
commit 0ecf4a8
Show file tree

Hide file tree

Showing 7 changed files with 50 additions and 31 deletions.
diff --git a/ema_workbench/analysis/b_and_w_plotting.py b/ema_workbench/analysis/b_and_w_plotting.py
@@ -148,7 +148,7 @@ def set_ax_collections_to_bw(ax, style, colormap):
         try:
             converter_func = _collection_converter[collection_type]
         except KeyError:
-            raise EMAError(f"converter for {collection_type} not implemented")
+            raise EMAError(f"Converter for collection type {collection_type} not implemented")
         else:
             converter_func(collection, ax, style, colormap)
 
@@ -333,7 +333,9 @@ def set_fig_to_bw(fig, style=HATCHING, line_style="continuous"):
 
     if len(all_colors) > len(bw_mapping):
         mapping_cycle = itertools.cycle(bw_mapping)
-        _logger.warning("more colors used than provided in B&W mapping, cycling over mapping")
+        _logger.warning(
+            f"more colors ({len(all_colors)}) used than provided in B&W mapping ({len(bw_mapping)}), cycling over mapping"
+        )
     else:
         mapping_cycle = bw_mapping
     colormap = dict(zip(all_colors, mapping_cycle))

diff --git a/ema_workbench/analysis/cart.py b/ema_workbench/analysis/cart.py
@@ -60,7 +60,7 @@ def setup_cart(results, classify, incl_unc=None, mass_min=0.05):
         y = classify(outcomes)
         mode = sdutil.RuleInductionType.BINARY
     else:
-        raise TypeError("unknown type for classify")
+        raise TypeError(f"Unknown type for classify: {type(classify)}")
 
     return CART(x, y, mass_min, mode=mode)
 
@@ -329,7 +329,9 @@ def show_tree(self, mplfig=True, format="png"):
         # but just in case, we raise an error if assumption of len==1 does
         # not hold
         if len(graphs) > 1:
-            raise EMAError("trying to visualize more than one tree")
+            raise EMAError(
+                f"Expected a single tree for visualization, but found {len(graphs)} trees."
+            )
 
         graph = graphs[0]
 
@@ -343,6 +345,6 @@ def show_tree(self, mplfig=True, format="png"):
         elif format == "svg":
             img = graph.create_svg()
         else:
-            raise TypeError("""format must be in {'png', 'svg'}""")
+            raise TypeError(f"format must be 'png' or 'svg' (instead of {format}).")
 
         return img
diff --git a/ema_workbench/analysis/feature_scoring.py b/ema_workbench/analysis/feature_scoring.py
@@ -112,7 +112,7 @@ def _prepare_outcomes(outcomes, classify):
         y = classify(outcomes)
         categorical = True
     else:
-        raise TypeError("unknown type for classify")
+        raise TypeError(f"Unknown type for classify: {type(classify)}")
 
     return y, categorical
 

diff --git a/ema_workbench/analysis/pairs_plotting.py b/ema_workbench/analysis/pairs_plotting.py
@@ -548,7 +548,7 @@ def do_text_ticks_labels(ax, i, j, field1, field2, ylabels, outcomes_to_show):
             try:
                 ax.set_xlabel(ylabels.get(field2))
             except KeyError:
-                _logger.info("no label specified for " + field2)
+                _logger.info(f"no label specified for {field2}")
         else:
             ax.set_xlabel(field2)
 
@@ -561,6 +561,6 @@ def do_text_ticks_labels(ax, i, j, field1, field2, ylabels, outcomes_to_show):
             try:
                 ax.set_ylabel(ylabels.get(field1))
             except KeyError:
-                _logger.info("no label specified for " + field1)
+                _logger.info(f"no label specified for {field1}")
         else:
             ax.set_ylabel(field1)
diff --git a/ema_workbench/analysis/plotting.py b/ema_workbench/analysis/plotting.py
@@ -203,7 +203,7 @@ def group_by_envelopes(outcomes, outcome_to_plot, time, density, ax, ax_d, fill,
         try:
             plot_envelope(ax, j, time, value, fill)
         except ValueError:
-            _logger.exception(f"value error when plotting for {key}")
+            _logger.exception(f"ValueError when plotting for {key}")
             raise
 
     if density:
@@ -747,7 +747,7 @@ def multiple_densities(
 
         # making of grid
         if not points_in_time:
-            raise EMAError("no points in time specified")
+            raise EMAError("No points in time specified, should be a list of length 1-6")
         if len(points_in_time) == 1:
             ax_env = plt.subplot2grid((2, 3), (0, 0), colspan=3)
             ax1 = plt.subplot2grid((2, 3), (1, 1), sharey=ax_env)
@@ -788,7 +788,7 @@ def multiple_densities(
             ax6 = plt.subplot2grid((2, 6), (1, 5), sharex=ax1, sharey=ax_env)
             kde_axes = [ax1, ax2, ax3, ax4, ax5, ax6]
         else:
-            raise EMAError("too many points in time provided")
+            raise EMAError(f"Too many points in time specified: {len(points_in_time)}, max is 6")
 
         axes_dict["main plot"] = ax_env
         for n, entry in enumerate(kde_axes):

diff --git a/ema_workbench/analysis/plotting_util.py b/ema_workbench/analysis/plotting_util.py
@@ -284,7 +284,7 @@ def group_density(ax_d, density, outcomes, outcome_to_plot, group_labels, log=Fa
     elif density == Density.BOXENPLOT:
         plot_boxenplot(ax_d, values, log, group_labels=group_labels)
     else:
-        raise EMAError(f"unknown density type: {density}")
+        raise EMAError(f"Unknown density plot type: {density}")
 
     ax_d.set_xlabel("")
     ax_d.set_ylabel("")
@@ -316,7 +316,7 @@ def simple_density(density, value, ax_d, ax, log):
     elif density == Density.BOXENPLOT:
         plot_boxenplot(ax_d, [value[:, -1]], log)
     else:
-        raise EMAError("unknown density plot type")
+        raise EMAError(f"Unknown density plot type: {density}")
 
     ax_d.get_yaxis().set_view_interval(
         ax.get_yaxis().get_view_interval()[0], ax.get_yaxis().get_view_interval()[1]
@@ -483,7 +483,7 @@ def determine_kde(data, size_kde=1000, ymin=None, ymax=None):
     #         best_kde = grid.best_estimator_
     #         kde_x = np.exp(best_kde.score_samples(kde_y[:, np.newaxis]))
     except Exception as e:
-        _logger.warning(e)
+        _logger.warning(f"error in determine_kde: {e}")
         kde_x = np.zeros(kde_y.shape)
 
     return kde_x, kde_y
@@ -508,7 +508,7 @@ def filter_scalar_outcomes(outcomes):
     temp = {}
     for key, value in outcomes.items():
         if value.ndim < 2:
-            _logger.info(("{} not shown because it is " "not time series data").format(key))
+            _logger.info(f"outcome {key} not shown because it is not time series data")
         else:
             temp[key] = value
     return temp
@@ -664,15 +664,23 @@ def prepare_pairs_data(
     Parameters
     ----------
     results : tuple
-    outcomes_to_show : list of str, optional
+    outcomes_to_show : list of str, optional. Both None and an empty list indicate that all outcomes should be shown.
     group_by : str, optional
     grouping_specifiers : iterable, optional
     point_in_time : int, optional
     filter_scalar : bool, optional
 
     """
-    if isinstance(outcomes_to_show, str):
-        raise EMAError("for pair wise plotting, more than one outcome needs to be provided")
+    if outcomes_to_show is not None:
+        if not isinstance(outcomes_to_show, list):
+            raise TypeError(
+                f"For pair-wise plotting multiple outcomes need to be provided.\n"
+                f"outcomes_to_show must be a list of strings or None, instead of a {type(outcomes_to_show)}"
+            )
+        elif len(outcomes_to_show) == 1:
+            raise ValueError(
+                f"Only {len(outcomes_to_show)} outcome provided, at least two are needed for pair-wise plotting."
+            )
 
     experiments, outcomes, outcomes_to_show, time, grouping_labels = prepare_data(
         experiments, None, outcomes, outcomes_to_show, group_by, grouping_specifiers, filter_scalar
@@ -755,7 +763,7 @@ def prepare_data(
         if not grouping_specifiers:
             # no grouping specifier, so infer from the data
             if group_by == "index":
-                raise EMAError("no grouping specifiers provided while " "trying to group on index")
+                raise EMAError("No grouping specifiers provided while trying to group on index")
             else:
                 column_to_group_by = experiments[group_by]
                 if column_to_group_by.dtype in (object, "category"):
@@ -810,7 +818,7 @@ def do_titles(ax, titles, outcome):
             try:
                 ax.set_title(titles[outcome])
             except KeyError:
-                _logger.warning(f"key error in do_titles, no title provided for `{outcome}`")
+                _logger.warning(f"KeyError in do_titles, no title provided for outcome `{outcome}`")
                 ax.set_title(outcome)
 
 
@@ -835,7 +843,9 @@ def do_ylabels(ax, ylabels, outcome):
             try:
                 ax.set_ylabel(ylabels[outcome])
             except KeyError:
-                _logger.warning(f"key error in do_ylabels, no ylabel provided for `{outcome}`")
+                _logger.warning(
+                    f"KeyError in do_ylabels, no ylabel provided for outcome `{outcome}`"
+                )
                 ax.set_ylabel(outcome)
 
 

diff --git a/ema_workbench/analysis/prim.py b/ema_workbench/analysis/prim.py
@@ -27,7 +27,7 @@
     import altair as alt
 except ImportError:
     alt = None
-    warnings.warn(("altair based interactive " "inspection not available"), ImportWarning)
+    warnings.warn("altair based interactive inspection not available", ImportWarning)
 
 from ..util import EMAError, temporary_filter, INFO, get_module_logger
 from . import scenario_discovery_util as sdutil
@@ -108,7 +108,9 @@ def pca_preprocess(experiments, y, subsets=None, exclude=set()):
     if not x.select_dtypes(exclude=np.number).empty:
         raise RuntimeError("X includes non numeric columns")
     if not set(np.unique(y)) == {0, 1}:
-        raise RuntimeError("y should only contain 0s and 1s")
+        raise RuntimeError(
+            f"y should only contain 0s and 1s, currently y contains {set(np.unique(y))}."
+        )
 
     # if no subsets are provided all uncertainties with non dtype object
     # are in the same subset, the name of this is r, for rotation
@@ -416,7 +418,7 @@ def inspect(self, i=None, style="table", **kwargs):
             i = [i]
 
         if not all(isinstance(x, int) for x in i):
-            raise TypeError("i must be an integer or list of integers")
+            raise TypeError(f"i must be an integer or list of integers, not {type(i)}")
 
         return [self._inspect(entry, style=style, **kwargs) for entry in i]
 
@@ -450,7 +452,7 @@ def _inspect(self, i=None, style="table", **kwargs):
         elif style == "data":
             return self._inspect_data(i, uncs, qp_values)
         else:
-            raise ValueError("style must be one of graph, table or data")
+            raise ValueError(f"style must be one of 'graph', 'table' or 'data', not {style}.")
 
     def _inspect_data(self, i, uncs, qp_values):
         """Helper method for inspecting boxes,
@@ -1026,7 +1028,9 @@ def __init__(
         for column in x_nominal.columns.values:
             if np.unique(x[column]).shape == (1,):
                 x = x.drop(column, axis=1)
-                _logger.info(f"{column} dropped from analysis " "because only a single category")
+                _logger.info(
+                    f"column {column} dropped from analysis because it has only one category"
+                )
 
         x_nominal = x.select_dtypes(exclude=np.number)
         self.x_nominal = x_nominal.values
@@ -1044,9 +1048,9 @@ def __init__(
         self._update_yi_remaining = self._update_functions[update_function]
 
         if len(self.y.shape) > 1:
-            raise PrimException("y is not a 1-d array")
+            raise PrimException(f"y is not a 1-d array, but has shape {self.y.shape}")
         if self.y.shape[0] != len(self.x):
-            raise PrimException("len(y) != len(x)")
+            raise PrimException(f"len(y) != len(x): {self.y.shape[0]} != {len(self.x)}")
 
         # store the remainder of the parameters
         self.paste_alpha = paste_alpha
@@ -1100,7 +1104,7 @@ def find_box(self):
             box._frozen = True
 
         if self.yi_remaining.shape[0] == 0:
-            _logger.info("no data remaining")
+            _logger.info("no data remaining, exiting")
             return
 
         # log how much data and how many coi are remaining
@@ -1119,7 +1123,7 @@ def find_box(self):
         box = self._paste(box)
         _logger.debug("pasting completed")
 
-        message = "mean: {0}, mass: {1}, coverage: {2}, " "density: {3} restricted_dimensions: {4}"
+        message = "mean: {0}, mass: {1}, coverage: {2}, density: {3} restricted_dimensions: {4}"
         message = message.format(box.mean, box.mass, box.coverage, box.density, box.res_dim)
 
         if (self.threshold_type == ABOVE) & (box.mean >= self.threshold):
@@ -1133,7 +1137,8 @@ def find_box(self):
         else:
             # make a dump box
             _logger.info(
-                f"box does not meet threshold criteria, value is {box.mean}, returning dump box"
+                f"box mean ({box.mean}) does not meet threshold criteria ({self.threshold_type} {self.threshold}),"
+                f"returning dump box"
             )
             box = PrimBox(self, self.box_init, self.yi_remaining[:])
             self._boxes.append(box)