diff --git a/pkg/algorithms/algorithm.py b/pkg/algorithms/algorithm.py index 41b25b6..b84cbf1 100644 --- a/pkg/algorithms/algorithm.py +++ b/pkg/algorithms/algorithm.py @@ -113,10 +113,10 @@ def group_change_points_by_time( changes: List[ChangePoint] = [] for metric in change_points.keys(): changes += change_points[metric] + changes.sort(key=lambda c: c.index) points = [] for k, g in groupby(changes, key=lambda c: c.index): - cp = ChangePointGroup( index=k, time=series.time[k], @@ -126,6 +126,7 @@ def group_change_points_by_time( changes=list(g), ) points.append(cp) + return points def setup_series(self) -> Series: @@ -173,5 +174,4 @@ def output(self, output_format) -> Union[Any,None]: return self.output_text() if output_format == cnsts.JUNIT: return self.output_junit() - raise ValueError("Unsupported output format {output_format} selected") diff --git a/pkg/algorithms/cmr/cmr.py b/pkg/algorithms/cmr/cmr.py index 6208255..dd03a49 100644 --- a/pkg/algorithms/cmr/cmr.py +++ b/pkg/algorithms/cmr/cmr.py @@ -1,4 +1,4 @@ -"""EDivisive Algorithm from hunter""" +"""CMR Algorithm""" # pylint: disable = line-too-long from typing import List @@ -27,24 +27,22 @@ def _analyze(self): change_points_by_metric: list of ChangePoints """ logger_instance = SingletonLogger.getLogger("Orion") - logger_instance.info("Starting analysis using Isolation Forest") + logger_instance.info("Starting analysis using CMR") self.dataframe["timestamp"] = pd.to_datetime(self.dataframe["timestamp"]) self.dataframe["timestamp"] = self.dataframe["timestamp"].astype(int) // 10**9 - logger_instance.info('data frame ' + str(self.dataframe)) - # if larger than 2 rows, need to get the mean of 0 through -2 - self.dataframe = self.combine_data_frames( self.dataframe) + self.dataframe = self.combine_and_average_runs( self.dataframe) series= self.setup_series() tolerancy = 20 - metric_columns = self.metrics_config.keys() - df, change_points_by_metric = self.run_cmr(tolerancy, metric_columns, self.dataframe) + + df, change_points_by_metric = self.run_cmr(tolerancy, self.dataframe) series.data= df return series, change_points_by_metric - def run_cmr(self, tolerancy: int,metric_columns: List[str], dataframe_list: pd.DataFrame): + def run_cmr(self, tolerancy: int, dataframe_list: pd.DataFrame): """ Generate the percent difference in a 2 row dataframe @@ -56,6 +54,7 @@ def run_cmr(self, tolerancy: int,metric_columns: List[str], dataframe_list: pd. Returns: pd.Dataframe, dict[metric_name, ChangePoint]: Returned data frame and change points """ + metric_columns = self.metrics_config.keys() change_points_by_metric={ k:[] for k in metric_columns } max_date_time = pd.Timestamp.max.to_pydatetime() max_time = max_date_time.timestamp() @@ -90,7 +89,7 @@ def run_cmr(self, tolerancy: int,metric_columns: List[str], dataframe_list: pd. # based on change point generate pass/fail return dataframe_list, change_points_by_metric - def combine_data_frames(self, dataFrame: pd.DataFrame): + def combine_and_average_runs(self, dataFrame: pd.DataFrame): """ If more than 1 previous run, mean data together into 1 single row Combine with current run into 1 data frame (current run being -1 index)