diff --git a/README.md b/README.md index 1e23769..cfd5bbc 100644 --- a/README.md +++ b/README.md @@ -124,17 +124,9 @@ Additionally, users can specify a custom path for the output CSV file using the Orion now supports anomaly detection for your data. Use the ```--anomaly-detection``` command to start the anomaly detection process. -To be able to find significant percent differences in workload runs, use the ```--cmr``` command. This will compare the most recent run with any previous matching runs or baseline UUIDs. If more than 1 other run is found from the most recent, the values will be meaned together and then compared with the previous run. Use with *direction: 0* (set in the config) when using ```-o json``` format to see percent differences -``` -time uuid buildUrl timestamp podReadyLatency_P99 apiserverCPU_avg ovnCPU_avg etcdCPU_avg kubelet_avg -------------------------- ----------------------------- ----------- --------------------- ------------------ ------------ ------------- ------------- -2024-05-20 00:47:53 +0000 0ed676a0-6e23-498e-b33e-fe520636e459,e752c921-6b93-42d8-b262-0bcc219bfc2b https://prow....... 1.71617e+09 132000 15.5236 6.18368 14.711 24.4395 - ····················· ·················· ············ ············· ············· - -8.3% +1.3% -6.7% -0.6% -19.0% - ····················· ·················· ············ ············· ············· -2024-08-14 17:07:33 +0000 e9e1f71c-9457-4a82-b561-e2158c8eae7c https://prow....... 1.72366e+09 121000 15.7236 5.77077 14.627 19.7842 +To be able to find significant percent differences in workload runs, use the ```--cmr``` command. This will compare the most recent run with any previous matching runs or baseline UUIDs. If more than 1 other run is found from the most recent, the values will be meaned together and then compared with the previous run. Use with *direction: 0* (set in the config) when using ```-o json``` format to see percent differences -``` +![cmr percent difference](https://private-user-images.githubusercontent.com/64206430/359942919-fcf0ba90-5571-4afd-bc64-a7f4accffe6a.jpg?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MjQyNTAxMDIsIm5iZiI6MTcyNDI0OTgwMiwicGF0aCI6Ii82NDIwNjQzMC8zNTk5NDI5MTktZmNmMGJhOTAtNTU3MS00YWZkLWJjNjQtYTdmNGFjY2ZmZTZhLmpwZz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNDA4MjElMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjQwODIxVDE0MTY0MlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTI2YTk0ZmU0OWVlODJmNDhlNTU0ZGI0YWFlNTdhYTZjNzE4ZjRjMGNjNzIzMjdkZmM1ODdlMTU3NjQ3MTk4MGQmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0JmFjdG9yX2lkPTAma2V5X2lkPTAmcmVwb19pZD0wIn0.-3p6Muzv0EmGfcxiYMym1vprqSAkklYGmJP54nQNF5g) You can now constrain your look-back period using the ```--lookback``` option. The format for look-back is ```XdYh```, where X represents the number of days and Y represents the number of hours. diff --git a/orion.py b/orion.py index 5cdf1e6..0278cb5 100644 --- a/orion.py +++ b/orion.py @@ -71,7 +71,7 @@ def cli(max_content_width=120): # pylint: disable=unused-argument @cli.command(name="cmd") @click.option( "--cmr", - is_flag=True, + is_flag=True, help="Generate percent difference in comparison", cls=MutuallyExclusiveOption, mutually_exclusive=["anomaly_detection","hunter_analyze"], diff --git a/pkg/algorithms/algorithm.py b/pkg/algorithms/algorithm.py index 33106ce..41b25b6 100644 --- a/pkg/algorithms/algorithm.py +++ b/pkg/algorithms/algorithm.py @@ -173,5 +173,5 @@ def output(self, output_format) -> Union[Any,None]: return self.output_text() if output_format == cnsts.JUNIT: return self.output_junit() - + raise ValueError("Unsupported output format {output_format} selected") diff --git a/pkg/algorithms/cmr/cmr.py b/pkg/algorithms/cmr/cmr.py index f4df837..6208255 100644 --- a/pkg/algorithms/cmr/cmr.py +++ b/pkg/algorithms/cmr/cmr.py @@ -1,16 +1,18 @@ """EDivisive Algorithm from hunter""" # pylint: disable = line-too-long +from typing import List import pandas as pd import numpy -from pkg.algorithms.algorithm import Algorithm -from hunter.series import ChangePoint, ComparativeStats from fmatch.logrus import SingletonLogger +from hunter.series import ChangePoint, ComparativeStats +from pkg.algorithms.algorithm import Algorithm + class CMR(Algorithm): """Implementation of the CMR algorithm - Will Combine metrics into 2 lines and compare with a tolerancy to logger_instance.info pass fail + Will Combine metrics into 2 lines and compare with a tolerancy to set pass fail Args: Algorithm (Algorithm): Inherits @@ -18,14 +20,19 @@ class CMR(Algorithm): def _analyze(self): + """Analyze the dataframe with meaning any previous data and generate percent change with a current uuid + Returns: + series: data series that contains attributes and full dataframe + change_points_by_metric: list of ChangePoints + """ logger_instance = SingletonLogger.getLogger("Orion") logger_instance.info("Starting analysis using Isolation Forest") self.dataframe["timestamp"] = pd.to_datetime(self.dataframe["timestamp"]) self.dataframe["timestamp"] = self.dataframe["timestamp"].astype(int) // 10**9 logger_instance.info('data frame ' + str(self.dataframe)) - + # if larger than 2 rows, need to get the mean of 0 through -2 self.dataframe = self.combine_data_frames( self.dataframe) @@ -37,13 +44,24 @@ def _analyze(self): return series, change_points_by_metric - def run_cmr(self, tolerancy,metric_columns, dataframe_list): + def run_cmr(self, tolerancy: int,metric_columns: List[str], dataframe_list: pd.DataFrame): + """ + Generate the percent difference in a 2 row dataframe + + Args: + tolerancy (int): tolerancy to compare on + metric_columns (List[str]): string list of metric column names + dataframe_list (pd.DataFrame): data frame of all data to compare on + + Returns: + pd.Dataframe, dict[metric_name, ChangePoint]: Returned data frame and change points + """ change_points_by_metric={ k:[] for k in metric_columns } max_date_time = pd.Timestamp.max.to_pydatetime() max_time = max_date_time.timestamp() difference = ["difference", max_time] pass_fail_list = ["Pass/Fail", max_time] - for column in metric_columns: + for column in metric_columns: pct_change_result = dataframe_list[column].pct_change() single_pct_diff = round(pct_change_result.iloc[[-1]].values[0] * 100) pass_fail = "Pass" @@ -71,9 +89,18 @@ def run_cmr(self, tolerancy,metric_columns, dataframe_list): # based on change point generate pass/fail return dataframe_list, change_points_by_metric - - def combine_data_frames(self, dataFrame): - # https://stackoverflow.com/questions/63037612/how-to-combine-two-dataframes-and-average-like-values + + def combine_data_frames(self, dataFrame: pd.DataFrame): + """ + If more than 1 previous run, mean data together into 1 single row + Combine with current run into 1 data frame (current run being -1 index) + + Args: + dataFrame (pd.DataFrame): data to combine into 2 rows + + Returns: + pd.Dataframe: data frame of most recent run and averaged previous runs + """ i = 0 last_row = dataFrame.tail(1) @@ -83,9 +110,9 @@ def combine_data_frames(self, dataFrame): metric_columns = list(dataFrame.columns) for column in metric_columns: - if type(dF.loc[0, column]) is numpy.float64 or type(dF.loc[0, column]) is numpy.int64: + if isinstance(dF.loc[0, column], (numpy.float64, numpy.int64)): mean = dF[column].mean() - else: + else: column_list = dF[column].tolist() mean = ','.join(column_list) data2[column] = [mean] @@ -93,4 +120,4 @@ def combine_data_frames(self, dataFrame): df2 = pd.DataFrame(data2) result = pd.concat([df2, last_row], ignore_index=True) - return result \ No newline at end of file + return result diff --git a/pkg/constants.py b/pkg/constants.py index 5ba718e..ea9a770 100644 --- a/pkg/constants.py +++ b/pkg/constants.py @@ -6,4 +6,4 @@ JSON="json" TEXT="text" JUNIT="junit" -CMR="cmr" \ No newline at end of file +CMR="cmr" diff --git a/pkg/utils.py b/pkg/utils.py index 50fa1c2..7449e51 100644 --- a/pkg/utils.py +++ b/pkg/utils.py @@ -262,7 +262,7 @@ def process_test( if options["convert_tinyurl"] else buildUrls[uuid] ) - + # pylint: disable = cell-var-from-loop ) #save the dataframe @@ -271,6 +271,15 @@ def process_test( return merged_df, metrics_config def shorten_url(shortener: any, uuids: List[str]) -> str: + """Shorten url if there is a list of buildUrls + + Args: + shortener (any): shortener object to use tinyrl.short on + uuids (List[str]): List of uuids to shorten + + Returns: + str: a combined string of shortened urls + """ short_url_list = [] for buildUrl in uuids.split(","): short_url_list.append(shortener.tinyurl.short(buildUrl))