spacing and doc chagnes

rh-pre-commit.version: 2.2.0 rh-pre-commit.check-secrets: ENABLED
cloud-bulldozer · Aug 21, 2024 · 3352126 · 3352126
1 parent 49595d2
commit 3352126
Show file tree

Hide file tree

Showing 6 changed files with 54 additions and 26 deletions.
diff --git a/README.md b/README.md
@@ -124,17 +124,9 @@ Additionally, users can specify a custom path for the output CSV file using the
 Orion now supports anomaly detection for your data. Use the ```--anomaly-detection``` command to start the anomaly detection process.
 
 
-To be able to find significant percent differences in workload runs, use the ```--cmr``` command. This will compare the most recent run with any previous matching runs or baseline UUIDs. If more than 1 other run is found from the most recent, the values will be meaned together and then compared with the previous run. Use with *direction: 0* (set in the config) when using ```-o json``` format to see percent differences 
-```
-time uuid  buildUrl  timestamp    podReadyLatency_P99    apiserverCPU_avg    ovnCPU_avg    etcdCPU_avg    kubelet_avg
--------------------------  -----------------------------  -----------  ---------------------  ------------------  ------------  -------------  -------------
-2024-05-20 00:47:53 +0000  0ed676a0-6e23-498e-b33e-fe520636e459,e752c921-6b93-42d8-b262-0bcc219bfc2b  https://prow....... 1.71617e+09  132000  15.5236 6.18368 14.711        24.4395
-                                                                                                                ·····················  ··················  ············  ·············  ·············  
-                                                                                                                                                                                              -8.3%               +1.3%         -6.7%          -0.6%         -19.0%  
-                                                                                                                ·····················  ··················  ············  ·············  ·············  
-2024-08-14 17:07:33 +0000  e9e1f71c-9457-4a82-b561-e2158c8eae7c https://prow.......     1.72366e+09    121000  15.7236 5.77077 14.627  19.7842
+To be able to find significant percent differences in workload runs, use the ```--cmr``` command. This will compare the most recent run with any previous matching runs or baseline UUIDs. If more than 1 other run is found from the most recent, the values will be meaned together and then compared with the previous run. Use with *direction: 0* (set in the config) when using ```-o json``` format to see percent differences
 
-```
+![cmr percent difference](https://private-user-images.githubusercontent.com/64206430/359942919-fcf0ba90-5571-4afd-bc64-a7f4accffe6a.jpg?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MjQyNTAxMDIsIm5iZiI6MTcyNDI0OTgwMiwicGF0aCI6Ii82NDIwNjQzMC8zNTk5NDI5MTktZmNmMGJhOTAtNTU3MS00YWZkLWJjNjQtYTdmNGFjY2ZmZTZhLmpwZz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNDA4MjElMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjQwODIxVDE0MTY0MlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTI2YTk0ZmU0OWVlODJmNDhlNTU0ZGI0YWFlNTdhYTZjNzE4ZjRjMGNjNzIzMjdkZmM1ODdlMTU3NjQ3MTk4MGQmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0JmFjdG9yX2lkPTAma2V5X2lkPTAmcmVwb19pZD0wIn0.-3p6Muzv0EmGfcxiYMym1vprqSAkklYGmJP54nQNF5g)
 
 You can now constrain your look-back period using the ```--lookback``` option. The format for look-back is ```XdYh```, where X represents the number of days and Y represents the number of hours.
 

diff --git a/orion.py b/orion.py
@@ -71,7 +71,7 @@ def cli(max_content_width=120):  # pylint: disable=unused-argument
 @cli.command(name="cmd")
 @click.option(
     "--cmr", 
-    is_flag=True, 
+    is_flag=True,
     help="Generate percent difference in comparison",
     cls=MutuallyExclusiveOption,
     mutually_exclusive=["anomaly_detection","hunter_analyze"],

diff --git a/pkg/algorithms/algorithm.py b/pkg/algorithms/algorithm.py
@@ -173,5 +173,5 @@ def output(self, output_format) -> Union[Any,None]:
             return self.output_text()
         if output_format == cnsts.JUNIT:
             return self.output_junit()
-        
+
         raise ValueError("Unsupported output format {output_format} selected")
diff --git a/pkg/algorithms/cmr/cmr.py b/pkg/algorithms/cmr/cmr.py
@@ -1,31 +1,38 @@
 """EDivisive Algorithm from hunter"""
 
 # pylint: disable = line-too-long
+from typing import List
 import pandas as pd
 import numpy
-from pkg.algorithms.algorithm import Algorithm
-from hunter.series import  ChangePoint, ComparativeStats
 
 from fmatch.logrus import SingletonLogger
+from hunter.series import  ChangePoint, ComparativeStats
+from pkg.algorithms.algorithm import Algorithm
+
 
 class CMR(Algorithm):
     """Implementation of the CMR algorithm
-    Will Combine metrics into 2 lines and compare with a tolerancy to logger_instance.info pass fail
+    Will Combine metrics into 2 lines and compare with a tolerancy to set pass fail
 
     Args:
         Algorithm (Algorithm): Inherits
     """
 
 
     def _analyze(self):
+        """Analyze the dataframe with meaning any previous data and generate percent change with a current uuid
 
+        Returns:
+            series: data series that contains attributes and full dataframe
+            change_points_by_metric: list of ChangePoints
+        """
         logger_instance = SingletonLogger.getLogger("Orion")
         logger_instance.info("Starting analysis using Isolation Forest")
         self.dataframe["timestamp"] = pd.to_datetime(self.dataframe["timestamp"])
         self.dataframe["timestamp"] = self.dataframe["timestamp"].astype(int) // 10**9
 
         logger_instance.info('data frame ' + str(self.dataframe))
-        
+
         # if larger than 2 rows, need to get the mean of 0 through -2
         self.dataframe = self.combine_data_frames( self.dataframe)
 
@@ -37,13 +44,24 @@ def _analyze(self):
         return series, change_points_by_metric
 
 
-    def run_cmr(self, tolerancy,metric_columns,  dataframe_list):
+    def run_cmr(self, tolerancy: int,metric_columns: List[str],  dataframe_list: pd.DataFrame):
+        """
+        Generate the percent difference in a 2 row dataframe
+
+        Args:
+            tolerancy (int): tolerancy to compare on 
+            metric_columns (List[str]): string list of metric column names
+            dataframe_list (pd.DataFrame): data frame of all data to compare on
+
+        Returns:
+            pd.Dataframe, dict[metric_name, ChangePoint]: Returned data frame and change points
+        """
         change_points_by_metric={ k:[] for k in metric_columns }
         max_date_time = pd.Timestamp.max.to_pydatetime()
         max_time = max_date_time.timestamp()
         difference = ["difference", max_time]
         pass_fail_list = ["Pass/Fail", max_time]
-        for column in metric_columns: 
+        for column in metric_columns:
             pct_change_result = dataframe_list[column].pct_change()
             single_pct_diff = round(pct_change_result.iloc[[-1]].values[0] * 100)
             pass_fail = "Pass"
@@ -71,9 +89,18 @@ def run_cmr(self, tolerancy,metric_columns,  dataframe_list):
 
         # based on change point generate pass/fail
         return dataframe_list, change_points_by_metric
-
-    def combine_data_frames(self, dataFrame):
-        # https://stackoverflow.com/questions/63037612/how-to-combine-two-dataframes-and-average-like-values
+
+    def combine_data_frames(self, dataFrame: pd.DataFrame):
+        """
+        If more than 1 previous run, mean data together into 1 single row
+        Combine with current run into 1 data frame (current run being -1 index)
+
+        Args:
+            dataFrame (pd.DataFrame): data to combine into 2 rows
+
+        Returns:
+            pd.Dataframe: data frame of most recent run and averaged previous runs
+        """
         i = 0
 
         last_row = dataFrame.tail(1)
@@ -83,14 +110,14 @@ def combine_data_frames(self, dataFrame):
         metric_columns = list(dataFrame.columns)
         for column in metric_columns:
 
-            if type(dF.loc[0, column]) is numpy.float64 or type(dF.loc[0, column]) is numpy.int64: 
+            if isinstance(dF.loc[0, column], (numpy.float64, numpy.int64)):
                 mean = dF[column].mean()
-            else: 
+            else:
                 column_list = dF[column].tolist()
                 mean = ','.join(column_list)
             data2[column] = [mean]
             i += 1
         df2 = pd.DataFrame(data2)
 
         result = pd.concat([df2, last_row], ignore_index=True)
-        return result
+        return result
diff --git a/pkg/constants.py b/pkg/constants.py
@@ -6,4 +6,4 @@
 JSON="json"
 TEXT="text"
 JUNIT="junit"
-CMR="cmr"
+CMR="cmr"
diff --git a/pkg/utils.py b/pkg/utils.py
@@ -262,7 +262,7 @@ def process_test(
             if options["convert_tinyurl"]
             else buildUrls[uuid]
         )
-        
+
         # pylint: disable = cell-var-from-loop
     )
     #save the dataframe
@@ -271,6 +271,15 @@ def process_test(
     return merged_df, metrics_config
 
 def shorten_url(shortener: any, uuids: List[str]) -> str:
+    """Shorten url if there is a list of buildUrls
+
+    Args:
+        shortener (any): shortener object to use tinyrl.short on
+        uuids (List[str]): List of uuids to shorten
+
+    Returns:
+        str: a combined string of shortened urls
+    """
     short_url_list = []
     for buildUrl in uuids.split(","):
         short_url_list.append(shortener.tinyurl.short(buildUrl))