Skip to content

Commit

Permalink
spacing and doc chagnes
Browse files Browse the repository at this point in the history
rh-pre-commit.version: 2.2.0
rh-pre-commit.check-secrets: ENABLED
  • Loading branch information
Auto User committed Aug 21, 2024
1 parent 49595d2 commit 3352126
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 26 deletions.
12 changes: 2 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,17 +124,9 @@ Additionally, users can specify a custom path for the output CSV file using the
Orion now supports anomaly detection for your data. Use the ```--anomaly-detection``` command to start the anomaly detection process.


To be able to find significant percent differences in workload runs, use the ```--cmr``` command. This will compare the most recent run with any previous matching runs or baseline UUIDs. If more than 1 other run is found from the most recent, the values will be meaned together and then compared with the previous run. Use with *direction: 0* (set in the config) when using ```-o json``` format to see percent differences
```
time uuid buildUrl timestamp podReadyLatency_P99 apiserverCPU_avg ovnCPU_avg etcdCPU_avg kubelet_avg
------------------------- ----------------------------- ----------- --------------------- ------------------ ------------ ------------- -------------
2024-05-20 00:47:53 +0000 0ed676a0-6e23-498e-b33e-fe520636e459,e752c921-6b93-42d8-b262-0bcc219bfc2b https://prow....... 1.71617e+09 132000 15.5236 6.18368 14.711 24.4395
····················· ·················· ············ ············· ·············
-8.3% +1.3% -6.7% -0.6% -19.0%
····················· ·················· ············ ············· ·············
2024-08-14 17:07:33 +0000 e9e1f71c-9457-4a82-b561-e2158c8eae7c https://prow....... 1.72366e+09 121000 15.7236 5.77077 14.627 19.7842
To be able to find significant percent differences in workload runs, use the ```--cmr``` command. This will compare the most recent run with any previous matching runs or baseline UUIDs. If more than 1 other run is found from the most recent, the values will be meaned together and then compared with the previous run. Use with *direction: 0* (set in the config) when using ```-o json``` format to see percent differences

```
![cmr percent difference](https://private-user-images.githubusercontent.com/64206430/359942919-fcf0ba90-5571-4afd-bc64-a7f4accffe6a.jpg?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MjQyNTAxMDIsIm5iZiI6MTcyNDI0OTgwMiwicGF0aCI6Ii82NDIwNjQzMC8zNTk5NDI5MTktZmNmMGJhOTAtNTU3MS00YWZkLWJjNjQtYTdmNGFjY2ZmZTZhLmpwZz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNDA4MjElMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjQwODIxVDE0MTY0MlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTI2YTk0ZmU0OWVlODJmNDhlNTU0ZGI0YWFlNTdhYTZjNzE4ZjRjMGNjNzIzMjdkZmM1ODdlMTU3NjQ3MTk4MGQmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0JmFjdG9yX2lkPTAma2V5X2lkPTAmcmVwb19pZD0wIn0.-3p6Muzv0EmGfcxiYMym1vprqSAkklYGmJP54nQNF5g)

You can now constrain your look-back period using the ```--lookback``` option. The format for look-back is ```XdYh```, where X represents the number of days and Y represents the number of hours.

Expand Down
2 changes: 1 addition & 1 deletion orion.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def cli(max_content_width=120): # pylint: disable=unused-argument
@cli.command(name="cmd")
@click.option(
"--cmr",
is_flag=True,
is_flag=True,
help="Generate percent difference in comparison",
cls=MutuallyExclusiveOption,
mutually_exclusive=["anomaly_detection","hunter_analyze"],
Expand Down
2 changes: 1 addition & 1 deletion pkg/algorithms/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,5 +173,5 @@ def output(self, output_format) -> Union[Any,None]:
return self.output_text()
if output_format == cnsts.JUNIT:
return self.output_junit()

raise ValueError("Unsupported output format {output_format} selected")
51 changes: 39 additions & 12 deletions pkg/algorithms/cmr/cmr.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,38 @@
"""EDivisive Algorithm from hunter"""

# pylint: disable = line-too-long
from typing import List
import pandas as pd
import numpy
from pkg.algorithms.algorithm import Algorithm
from hunter.series import ChangePoint, ComparativeStats

from fmatch.logrus import SingletonLogger
from hunter.series import ChangePoint, ComparativeStats
from pkg.algorithms.algorithm import Algorithm


class CMR(Algorithm):
"""Implementation of the CMR algorithm
Will Combine metrics into 2 lines and compare with a tolerancy to logger_instance.info pass fail
Will Combine metrics into 2 lines and compare with a tolerancy to set pass fail
Args:
Algorithm (Algorithm): Inherits
"""


def _analyze(self):
"""Analyze the dataframe with meaning any previous data and generate percent change with a current uuid
Returns:
series: data series that contains attributes and full dataframe
change_points_by_metric: list of ChangePoints
"""
logger_instance = SingletonLogger.getLogger("Orion")
logger_instance.info("Starting analysis using Isolation Forest")
self.dataframe["timestamp"] = pd.to_datetime(self.dataframe["timestamp"])
self.dataframe["timestamp"] = self.dataframe["timestamp"].astype(int) // 10**9

logger_instance.info('data frame ' + str(self.dataframe))

# if larger than 2 rows, need to get the mean of 0 through -2
self.dataframe = self.combine_data_frames( self.dataframe)

Expand All @@ -37,13 +44,24 @@ def _analyze(self):
return series, change_points_by_metric


def run_cmr(self, tolerancy,metric_columns, dataframe_list):
def run_cmr(self, tolerancy: int,metric_columns: List[str], dataframe_list: pd.DataFrame):
"""
Generate the percent difference in a 2 row dataframe
Args:
tolerancy (int): tolerancy to compare on
metric_columns (List[str]): string list of metric column names
dataframe_list (pd.DataFrame): data frame of all data to compare on
Returns:
pd.Dataframe, dict[metric_name, ChangePoint]: Returned data frame and change points
"""
change_points_by_metric={ k:[] for k in metric_columns }
max_date_time = pd.Timestamp.max.to_pydatetime()
max_time = max_date_time.timestamp()
difference = ["difference", max_time]
pass_fail_list = ["Pass/Fail", max_time]
for column in metric_columns:
for column in metric_columns:
pct_change_result = dataframe_list[column].pct_change()
single_pct_diff = round(pct_change_result.iloc[[-1]].values[0] * 100)
pass_fail = "Pass"
Expand Down Expand Up @@ -71,9 +89,18 @@ def run_cmr(self, tolerancy,metric_columns, dataframe_list):

# based on change point generate pass/fail
return dataframe_list, change_points_by_metric

def combine_data_frames(self, dataFrame):
# https://stackoverflow.com/questions/63037612/how-to-combine-two-dataframes-and-average-like-values

def combine_data_frames(self, dataFrame: pd.DataFrame):
"""
If more than 1 previous run, mean data together into 1 single row
Combine with current run into 1 data frame (current run being -1 index)
Args:
dataFrame (pd.DataFrame): data to combine into 2 rows
Returns:
pd.Dataframe: data frame of most recent run and averaged previous runs
"""
i = 0

last_row = dataFrame.tail(1)
Expand All @@ -83,14 +110,14 @@ def combine_data_frames(self, dataFrame):
metric_columns = list(dataFrame.columns)
for column in metric_columns:

if type(dF.loc[0, column]) is numpy.float64 or type(dF.loc[0, column]) is numpy.int64:
if isinstance(dF.loc[0, column], (numpy.float64, numpy.int64)):
mean = dF[column].mean()
else:
else:
column_list = dF[column].tolist()
mean = ','.join(column_list)
data2[column] = [mean]
i += 1
df2 = pd.DataFrame(data2)

result = pd.concat([df2, last_row], ignore_index=True)
return result
return result
2 changes: 1 addition & 1 deletion pkg/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
JSON="json"
TEXT="text"
JUNIT="junit"
CMR="cmr"
CMR="cmr"
11 changes: 10 additions & 1 deletion pkg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def process_test(
if options["convert_tinyurl"]
else buildUrls[uuid]
)

# pylint: disable = cell-var-from-loop
)
#save the dataframe
Expand All @@ -271,6 +271,15 @@ def process_test(
return merged_df, metrics_config

def shorten_url(shortener: any, uuids: List[str]) -> str:
"""Shorten url if there is a list of buildUrls
Args:
shortener (any): shortener object to use tinyrl.short on
uuids (List[str]): List of uuids to shorten
Returns:
str: a combined string of shortened urls
"""
short_url_list = []
for buildUrl in uuids.split(","):
short_url_list.append(shortener.tinyurl.short(buildUrl))
Expand Down

0 comments on commit 3352126

Please sign in to comment.