Skip to content

Commit

Permalink
analysis: Improve log messages, warning and errors (#313)
Browse files Browse the repository at this point in the history
* analysis: Improve errors

Improve the errors in the ema_workbench/analysis, by:
- Making them more explicit
- Including what the variable value or type not expected currently is
- Including what the variable value or type not expected should be
- Stating the explicit type of warning or error (DeprecationWarning, ValueError, etc.)
- Converting them to F-strings (for better in-code readability)
- Formatting warnings and errors with a Capital letter, while leaving log-statements lowercase.

This will make debugging easier, allowing more time for model development and experimentation.

It also documents explicitly that an empty list [] is allowed for outcomes_to_show in plotting_util.py.
  • Loading branch information
EwoutH authored Nov 17, 2023
1 parent 2cfca39 commit 0ecf4a8
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 31 deletions.
6 changes: 4 additions & 2 deletions ema_workbench/analysis/b_and_w_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def set_ax_collections_to_bw(ax, style, colormap):
try:
converter_func = _collection_converter[collection_type]
except KeyError:
raise EMAError(f"converter for {collection_type} not implemented")
raise EMAError(f"Converter for collection type {collection_type} not implemented")
else:
converter_func(collection, ax, style, colormap)

Expand Down Expand Up @@ -333,7 +333,9 @@ def set_fig_to_bw(fig, style=HATCHING, line_style="continuous"):

if len(all_colors) > len(bw_mapping):
mapping_cycle = itertools.cycle(bw_mapping)
_logger.warning("more colors used than provided in B&W mapping, cycling over mapping")
_logger.warning(
f"more colors ({len(all_colors)}) used than provided in B&W mapping ({len(bw_mapping)}), cycling over mapping"
)
else:
mapping_cycle = bw_mapping
colormap = dict(zip(all_colors, mapping_cycle))
Expand Down
8 changes: 5 additions & 3 deletions ema_workbench/analysis/cart.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def setup_cart(results, classify, incl_unc=None, mass_min=0.05):
y = classify(outcomes)
mode = sdutil.RuleInductionType.BINARY
else:
raise TypeError("unknown type for classify")
raise TypeError(f"Unknown type for classify: {type(classify)}")

return CART(x, y, mass_min, mode=mode)

Expand Down Expand Up @@ -329,7 +329,9 @@ def show_tree(self, mplfig=True, format="png"):
# but just in case, we raise an error if assumption of len==1 does
# not hold
if len(graphs) > 1:
raise EMAError("trying to visualize more than one tree")
raise EMAError(
f"Expected a single tree for visualization, but found {len(graphs)} trees."
)

graph = graphs[0]

Expand All @@ -343,6 +345,6 @@ def show_tree(self, mplfig=True, format="png"):
elif format == "svg":
img = graph.create_svg()
else:
raise TypeError("""format must be in {'png', 'svg'}""")
raise TypeError(f"format must be 'png' or 'svg' (instead of {format}).")

return img
2 changes: 1 addition & 1 deletion ema_workbench/analysis/feature_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def _prepare_outcomes(outcomes, classify):
y = classify(outcomes)
categorical = True
else:
raise TypeError("unknown type for classify")
raise TypeError(f"Unknown type for classify: {type(classify)}")

return y, categorical

Expand Down
4 changes: 2 additions & 2 deletions ema_workbench/analysis/pairs_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ def do_text_ticks_labels(ax, i, j, field1, field2, ylabels, outcomes_to_show):
try:
ax.set_xlabel(ylabels.get(field2))
except KeyError:
_logger.info("no label specified for " + field2)
_logger.info(f"no label specified for {field2}")
else:
ax.set_xlabel(field2)

Expand All @@ -561,6 +561,6 @@ def do_text_ticks_labels(ax, i, j, field1, field2, ylabels, outcomes_to_show):
try:
ax.set_ylabel(ylabels.get(field1))
except KeyError:
_logger.info("no label specified for " + field1)
_logger.info(f"no label specified for {field1}")
else:
ax.set_ylabel(field1)
6 changes: 3 additions & 3 deletions ema_workbench/analysis/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def group_by_envelopes(outcomes, outcome_to_plot, time, density, ax, ax_d, fill,
try:
plot_envelope(ax, j, time, value, fill)
except ValueError:
_logger.exception(f"value error when plotting for {key}")
_logger.exception(f"ValueError when plotting for {key}")
raise

if density:
Expand Down Expand Up @@ -747,7 +747,7 @@ def multiple_densities(

# making of grid
if not points_in_time:
raise EMAError("no points in time specified")
raise EMAError("No points in time specified, should be a list of length 1-6")
if len(points_in_time) == 1:
ax_env = plt.subplot2grid((2, 3), (0, 0), colspan=3)
ax1 = plt.subplot2grid((2, 3), (1, 1), sharey=ax_env)
Expand Down Expand Up @@ -788,7 +788,7 @@ def multiple_densities(
ax6 = plt.subplot2grid((2, 6), (1, 5), sharex=ax1, sharey=ax_env)
kde_axes = [ax1, ax2, ax3, ax4, ax5, ax6]
else:
raise EMAError("too many points in time provided")
raise EMAError(f"Too many points in time specified: {len(points_in_time)}, max is 6")

axes_dict["main plot"] = ax_env
for n, entry in enumerate(kde_axes):
Expand Down
30 changes: 20 additions & 10 deletions ema_workbench/analysis/plotting_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def group_density(ax_d, density, outcomes, outcome_to_plot, group_labels, log=Fa
elif density == Density.BOXENPLOT:
plot_boxenplot(ax_d, values, log, group_labels=group_labels)
else:
raise EMAError(f"unknown density type: {density}")
raise EMAError(f"Unknown density plot type: {density}")

ax_d.set_xlabel("")
ax_d.set_ylabel("")
Expand Down Expand Up @@ -316,7 +316,7 @@ def simple_density(density, value, ax_d, ax, log):
elif density == Density.BOXENPLOT:
plot_boxenplot(ax_d, [value[:, -1]], log)
else:
raise EMAError("unknown density plot type")
raise EMAError(f"Unknown density plot type: {density}")

ax_d.get_yaxis().set_view_interval(
ax.get_yaxis().get_view_interval()[0], ax.get_yaxis().get_view_interval()[1]
Expand Down Expand Up @@ -483,7 +483,7 @@ def determine_kde(data, size_kde=1000, ymin=None, ymax=None):
# best_kde = grid.best_estimator_
# kde_x = np.exp(best_kde.score_samples(kde_y[:, np.newaxis]))
except Exception as e:
_logger.warning(e)
_logger.warning(f"error in determine_kde: {e}")
kde_x = np.zeros(kde_y.shape)

return kde_x, kde_y
Expand All @@ -508,7 +508,7 @@ def filter_scalar_outcomes(outcomes):
temp = {}
for key, value in outcomes.items():
if value.ndim < 2:
_logger.info(("{} not shown because it is " "not time series data").format(key))
_logger.info(f"outcome {key} not shown because it is not time series data")
else:
temp[key] = value
return temp
Expand Down Expand Up @@ -664,15 +664,23 @@ def prepare_pairs_data(
Parameters
----------
results : tuple
outcomes_to_show : list of str, optional
outcomes_to_show : list of str, optional. Both None and an empty list indicate that all outcomes should be shown.
group_by : str, optional
grouping_specifiers : iterable, optional
point_in_time : int, optional
filter_scalar : bool, optional
"""
if isinstance(outcomes_to_show, str):
raise EMAError("for pair wise plotting, more than one outcome needs to be provided")
if outcomes_to_show is not None:
if not isinstance(outcomes_to_show, list):
raise TypeError(
f"For pair-wise plotting multiple outcomes need to be provided.\n"
f"outcomes_to_show must be a list of strings or None, instead of a {type(outcomes_to_show)}"
)
elif len(outcomes_to_show) == 1:
raise ValueError(
f"Only {len(outcomes_to_show)} outcome provided, at least two are needed for pair-wise plotting."
)

experiments, outcomes, outcomes_to_show, time, grouping_labels = prepare_data(
experiments, None, outcomes, outcomes_to_show, group_by, grouping_specifiers, filter_scalar
Expand Down Expand Up @@ -755,7 +763,7 @@ def prepare_data(
if not grouping_specifiers:
# no grouping specifier, so infer from the data
if group_by == "index":
raise EMAError("no grouping specifiers provided while " "trying to group on index")
raise EMAError("No grouping specifiers provided while trying to group on index")
else:
column_to_group_by = experiments[group_by]
if column_to_group_by.dtype in (object, "category"):
Expand Down Expand Up @@ -810,7 +818,7 @@ def do_titles(ax, titles, outcome):
try:
ax.set_title(titles[outcome])
except KeyError:
_logger.warning(f"key error in do_titles, no title provided for `{outcome}`")
_logger.warning(f"KeyError in do_titles, no title provided for outcome `{outcome}`")
ax.set_title(outcome)


Expand All @@ -835,7 +843,9 @@ def do_ylabels(ax, ylabels, outcome):
try:
ax.set_ylabel(ylabels[outcome])
except KeyError:
_logger.warning(f"key error in do_ylabels, no ylabel provided for `{outcome}`")
_logger.warning(
f"KeyError in do_ylabels, no ylabel provided for outcome `{outcome}`"
)
ax.set_ylabel(outcome)


Expand Down
25 changes: 15 additions & 10 deletions ema_workbench/analysis/prim.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import altair as alt
except ImportError:
alt = None
warnings.warn(("altair based interactive " "inspection not available"), ImportWarning)
warnings.warn("altair based interactive inspection not available", ImportWarning)

from ..util import EMAError, temporary_filter, INFO, get_module_logger
from . import scenario_discovery_util as sdutil
Expand Down Expand Up @@ -108,7 +108,9 @@ def pca_preprocess(experiments, y, subsets=None, exclude=set()):
if not x.select_dtypes(exclude=np.number).empty:
raise RuntimeError("X includes non numeric columns")
if not set(np.unique(y)) == {0, 1}:
raise RuntimeError("y should only contain 0s and 1s")
raise RuntimeError(
f"y should only contain 0s and 1s, currently y contains {set(np.unique(y))}."
)

# if no subsets are provided all uncertainties with non dtype object
# are in the same subset, the name of this is r, for rotation
Expand Down Expand Up @@ -416,7 +418,7 @@ def inspect(self, i=None, style="table", **kwargs):
i = [i]

if not all(isinstance(x, int) for x in i):
raise TypeError("i must be an integer or list of integers")
raise TypeError(f"i must be an integer or list of integers, not {type(i)}")

return [self._inspect(entry, style=style, **kwargs) for entry in i]

Expand Down Expand Up @@ -450,7 +452,7 @@ def _inspect(self, i=None, style="table", **kwargs):
elif style == "data":
return self._inspect_data(i, uncs, qp_values)
else:
raise ValueError("style must be one of graph, table or data")
raise ValueError(f"style must be one of 'graph', 'table' or 'data', not {style}.")

def _inspect_data(self, i, uncs, qp_values):
"""Helper method for inspecting boxes,
Expand Down Expand Up @@ -1026,7 +1028,9 @@ def __init__(
for column in x_nominal.columns.values:
if np.unique(x[column]).shape == (1,):
x = x.drop(column, axis=1)
_logger.info(f"{column} dropped from analysis " "because only a single category")
_logger.info(
f"column {column} dropped from analysis because it has only one category"
)

x_nominal = x.select_dtypes(exclude=np.number)
self.x_nominal = x_nominal.values
Expand All @@ -1044,9 +1048,9 @@ def __init__(
self._update_yi_remaining = self._update_functions[update_function]

if len(self.y.shape) > 1:
raise PrimException("y is not a 1-d array")
raise PrimException(f"y is not a 1-d array, but has shape {self.y.shape}")
if self.y.shape[0] != len(self.x):
raise PrimException("len(y) != len(x)")
raise PrimException(f"len(y) != len(x): {self.y.shape[0]} != {len(self.x)}")

# store the remainder of the parameters
self.paste_alpha = paste_alpha
Expand Down Expand Up @@ -1100,7 +1104,7 @@ def find_box(self):
box._frozen = True

if self.yi_remaining.shape[0] == 0:
_logger.info("no data remaining")
_logger.info("no data remaining, exiting")
return

# log how much data and how many coi are remaining
Expand All @@ -1119,7 +1123,7 @@ def find_box(self):
box = self._paste(box)
_logger.debug("pasting completed")

message = "mean: {0}, mass: {1}, coverage: {2}, " "density: {3} restricted_dimensions: {4}"
message = "mean: {0}, mass: {1}, coverage: {2}, density: {3} restricted_dimensions: {4}"
message = message.format(box.mean, box.mass, box.coverage, box.density, box.res_dim)

if (self.threshold_type == ABOVE) & (box.mean >= self.threshold):
Expand All @@ -1133,7 +1137,8 @@ def find_box(self):
else:
# make a dump box
_logger.info(
f"box does not meet threshold criteria, value is {box.mean}, returning dump box"
f"box mean ({box.mean}) does not meet threshold criteria ({self.threshold_type} {self.threshold}),"
f"returning dump box"
)
box = PrimBox(self, self.box_init, self.yi_remaining[:])
self._boxes.append(box)
Expand Down

0 comments on commit 0ecf4a8

Please sign in to comment.