Skip to content

Commit

Permalink
Differentiated the "error_metric" from the "aggregate_error_metric". …
Browse files Browse the repository at this point in the history
…Improved metrics support. Fixes.
  • Loading branch information
sarusso committed Jun 5, 2024
1 parent 6bc477f commit 6cfc941
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 102 deletions.
82 changes: 32 additions & 50 deletions timeseria/models/anomaly_detectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,13 @@
import fitter as fitter_library
from ..datastructures import TimeSeries, DataTimePoint



# Setup logging
import logging
logger = logging.getLogger(__name__)

fitter_library.fitter.logger = logging.getLogger('fitter')
fitter_library.fitter.logger.setLevel(level=logging.CRITICAL)



# Suppress TensorFlow warnings as default behavior
try:
import tensorflow as tf
Expand Down Expand Up @@ -57,11 +53,8 @@ def mark_events(timeseries, index_treshold=1.0, min_persistence=2, max_gap=2, re
Args:
index_treshold(float): the anomaly index above which to consider a data point anomalous.
min_persistence(int): the minimum persistence of an event, in terms of consecutive data points.
max_gap(int): the maximum gap within a signle event of data points below the index_treshold.
replace_index(bool): if to replace the existent ``anomaly`` index instead of adding a new ``anomaly_event`` one.
"""

Expand Down Expand Up @@ -240,23 +233,19 @@ def _get_predicted_value(self, series, i, data_label, with_context):
return predicted

@AnomalyDetector.fit_function
def fit(self, series, with_context=False, error='PE', error_distribution='gennorm', store_errors=True, verbose=False, summary=False, **kwargs):
def fit(self, series, with_context=False, error_metric='PE', error_distribution='gennorm', store_errors=True, verbose=False, summary=False, **kwargs):
"""Fit the anomaly detection model on a series.
Args:
with_context(bool): if to use context for multivariate time series.
error(str): the error metric to use for the model. Supported values are: ``E``, ``AE``, ``PE`` and ``APE``. Defaults to ``PE``.
error_distribution(str): if to use a specific error distribution or find it automatically (``error_distribution='auto'``).
with_context(bool): if to use context for multivariate time series or not. Defaults to ``False``.
error_metric(str): the error metric to use for evaluating the model errors and thus nuild the anomaly detector anomaly esitimates.
Supported values are: ``E``, ``AE``, ``PE`` and ``APE``. Defaults to ``PE``.
error_distribution(str): if to use a specific error_metric distribution or find it automatically (``error_distribution='auto'``).
Defaults to ``gennorm``, a generalized normal distribution.
store_errors(float): if to store the prediction errors (together with actual and predicted values) internally for further analysis.
Access them with ``model.data['prediction_errors']``, ``model.data['actual_values']`` or ``model.data['predicted_values']``.
store_errors(float): if to store the prediction errors (together with actual and predicted values) internally for further analysis. Access
them with ``model.data['prediction_errors']``, ``model.data['actual_values']`` and ``model.data['predicted_values']``.
verbose(bool): if to print the fit progress (one dot = 10% done).
summary(bool): if to display a summary on the error distribution fitting or selection.
summary(bool): if to display a summary on the error_metric distribution fitting or selection.
"""

# Handle the error distribution(s)
Expand Down Expand Up @@ -311,11 +300,11 @@ def fit(self, series, with_context=False, error='PE', error_distribution='gennor
# Store if to use context or not
self.data['with_context'] = with_context

# Check and store error type
if error not in ['E', 'AE', 'PE', 'APE']:
raise ValueError('Unknown error metric "{}"'.format(error))
# Check and store error_metric type
if error_metric not in ['E', 'AE', 'PE', 'APE', 'SLE']:
raise ValueError('Unknown error_metric metric "{}"'.format(error_metric))

self.data['error'] = error
self.data['error_metric'] = error_metric

# Initialize internal dictionaries
if store_errors:
Expand Down Expand Up @@ -358,37 +347,37 @@ def fit(self, series, with_context=False, error='PE', error_distribution='gennor
if i > len(series)-self.data['model_window']-1:
break

# Predict & append the error
# Predict & append the error_metric
#logger.debug('Predicting and computing the difference (i=%s)', i)
actual = self._get_actual_value(series, i, data_label)
predicted = self._get_predicted_value(series, i, data_label, with_context)
if store_errors:
actual_values[data_label].append(actual)
predicted_values[data_label].append(predicted)

if error == 'E':
if error_metric == 'E':
prediction_error = actual-predicted
elif error == 'AE':
elif error_metric == 'AE':
prediction_error = abs(actual-predicted)
elif error == 'PE':
elif error_metric == 'PE':
prediction_error = (actual-predicted)/actual
elif error == 'APE':
elif error_metric == 'APE':
prediction_error = abs((actual-predicted)/actual)
else:
raise ValueError('Unknown error type "{}"'.format(self.data['error']))
prediction_errors[data_label].append(actual-predicted)
raise ValueError('Unknown error_metric type "{}"'.format(self.data['error_metric']))
prediction_errors[data_label].append(prediction_error)

if verbose:
print('')

if verbose:
print('Model(s) evaluated, now computing the error distribution(s)')
logger.info('Model(s) evaluated, now computing the error distribution(s)...')
print('Model(s) evaluated, now computing the error_metric distribution(s)')
logger.info('Model(s) evaluated, now computing the error_metric distribution(s)...')

for data_label in series.data_labels:
#if verbose:
# print('Selecting error distribution for "{}"'.format(data_label))
#logger.debug('Selecting error distribution for "%s"', data_label))
# print('Selecting error_metric distribution for "{}"'.format(data_label))
#logger.debug('Selecting error_metric distribution for "%s"', data_label))

# Fit the distributions and select the best one
fitter = fitter_library.fitter.Fitter(prediction_errors[data_label], distributions=error_distributions)
Expand All @@ -403,7 +392,7 @@ def fit(self, series, with_context=False, error='PE', error_distribution='gennor
error_distribution_params = fitter.get_best()[best_error_distribution]

if best_error_distribution_stats['ks_pvalue'] < 0.05:
logger.warning('The error distribution for "{}" ({}) p-value is low ({}). Expect issues.'.format(data_label, best_error_distribution, best_error_distribution_stats['ks_pvalue']))
logger.warning('The error_metric distribution for "{}" ({}) p-value is low ({}). Expect issues.'.format(data_label, best_error_distribution, best_error_distribution_stats['ks_pvalue']))

self.data['error_distributions'][data_label] = best_error_distribution
self.data['error_distributions_params'][data_label] = error_distribution_params
Expand Down Expand Up @@ -519,27 +508,22 @@ def apply(self, series, index_range=['avg_err','max_err'], index_type='log', thr
Other supported values are ``x_sigma`` where x is a standard deviation multiplier,
``adherence/x'`` where x is a divider for the model adherence probability,
or any numerical value in terms of prediction error value.
index_type(str, callable): if to use a logarithmic anomaly index ("log", the default value) which compresses
the index range so that bigger anomalies stand out more than smaller ones, or if to
use a linear one ("lin"). Can also support a custom anomaly index as a callable,
in which case the form must be ``f(x, y, x_start, x_end, y_start, y_end)`` where x
is the model error, y its value on the distribution curve, and x_start/x_end together
with y_start/y_end the respective x and y range start and end values, based on the
range set by the ``index_range`` argument.
threshold(float): a threshold to make the anomaly index categorical (0-1) instead of continuous.
multivariate_index_strategy(str, callable): the strategy to use when computing the overall anomaly index for multivariate
time series items. Possible choices are "max" to use the maximum one, "avg"
for the mean and "min" for the minimum; or a callable taking as input the
list of the anomaly indexes for each data label. Defaults to "max".
details(bool, list): if to add details to the time series as the predicted value, the error and the
model adherence probability. If set to ``True``, it adds all of them, if instead using
a list only selected details can be added: ``pred`` for the predicted values, ``err`` for
the errors, and ``adh`` for the model adherence probability.
verbose(bool): if to print the apply progress (one dot = 10% done).
"""
# Initialize the result time series
Expand All @@ -552,11 +536,9 @@ def apply(self, series, index_range=['avg_err','max_err'], index_type='log', thr
self.data['error_distributions_params'][data_label])

# Check error metric
if self.data['error'] == 'E':
pass
elif self.data['error'] == 'PE':
if self.data['error_metric'] in ['E', 'AE', 'PE', 'APE']:
pass
elif callable(self.data['error']):
elif callable(self.data['error_metric']):
# If the model is loaded, it will never get here
pass
else:
Expand Down Expand Up @@ -666,16 +648,16 @@ def apply(self, series, index_range=['avg_err','max_err'], index_type='log', thr
actual = self._get_actual_value(series, i, data_label)
predicted = self._get_predicted_value(series, i, data_label, self.data['with_context'])

if self.data['error'] == 'E':
if self.data['error_metric'] == 'E':
prediction_error = actual-predicted
elif self.data['error'] == 'AE':
elif self.data['error_metric'] == 'AE':
prediction_error = abs(actual-predicted)
elif self.data['error'] == 'PE':
elif self.data['error_metric'] == 'PE':
prediction_error = (actual-predicted)/actual
elif self.data['error'] == 'APE':
elif self.data['error_metric'] == 'APE':
prediction_error = abs((actual-predicted)/actual)
else:
raise ValueError('Unknown error type "{}"'.format(self.data['error']))
raise ValueError('Unknown error type "{}"'.format(self.data['error_metric']))

# Reverse values on the left side of the error distribution
if prediction_error < self.data['error_distributions_params'][data_label]['loc']:
Expand Down Expand Up @@ -755,6 +737,7 @@ def error_distribution_function(self):
self.data['error_distribution_params'])
return distribution_function


#===================================
# Periodic Average Forecaster
# Predictive Anomaly Detector
Expand All @@ -766,7 +749,6 @@ class PeriodicAverageAnomalyDetector(ModelBasedAnomalyDetector):
model_class = PeriodicAverageForecaster



#===================================
# Periodic Average Reconstructor
# Predictive Anomaly Detector
Expand Down
Loading

0 comments on commit 6cfc941

Please sign in to comment.