From c810e06f714be313481498e137a0c9df6ba60de3 Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Fri, 2 Feb 2024 12:29:31 -0500
Subject: [PATCH 1/5] hunter integration and refactor

Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>
---
 README.md            |   4 ++
 orion.py             | 131 ++++++----------------------------
 requirements.txt     |  35 ++++++++-
 setup.py             |   4 +-
 utils/__init__.py    |   0
 utils/orion_funcs.py | 164 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 225 insertions(+), 113 deletions(-)
 create mode 100644 utils/__init__.py
 create mode 100644 utils/orion_funcs.py

diff --git a/README.md b/README.md
index 24d81fa..19e3407 100644
--- a/README.md
+++ b/README.md
@@ -84,8 +84,12 @@ Orion provides flexibility in configuring its behavior by allowing users to set
 
 For enhanced troubleshooting and debugging, Orion supports the ```--debug``` flag, enabling the generation of detailed debug logs. 
 
+Activate Orion's regression detection tool for performance-scale CPT runs effortlessly with the ```--hunter-analyze``` command. This seamlessly integrates with metadata and hunter, ensuring a robust and efficient regression detection process.
+
 Additionally, users can specify a custom path for the output CSV file using the ```--output``` flag, providing control over the location where the generated CSV will be stored.
 
+
+
 Orion's seamless integration with metadata and hunter ensures a robust regression detection tool for perf-scale CPT runs.
 
 
diff --git a/orion.py b/orion.py
index 891d0bd..b0e24c3 100644
--- a/orion.py
+++ b/orion.py
@@ -8,9 +8,11 @@
 import os
 
 import click
-import yaml
 import pandas as pd
+
 from fmatch.matcher import Matcher
+from utils.orion_funcs import run_hunter_analyze, get_metadata, \
+                                set_logging, load_config, get_metric_data
 
 
 @click.group()
@@ -24,7 +26,8 @@ def cli():
 @click.option("--config", default="config.yaml", help="Path to the configuration file")
 @click.option("--output", default="output.csv", help="Path to save the output csv file")
 @click.option("--debug", is_flag=True, help="log level ")
-def orion(config, debug, output):
+@click.option("--hunter-analyze",is_flag=True, help="run hunter analyze")
+def orion(config, debug, output,hunter_analyze):
     """Orion is the cli tool to detect regressions over the runs
 
     Args:
@@ -35,25 +38,22 @@ def orion(config, debug, output):
     level = logging.DEBUG if debug else logging.INFO
     logger = logging.getLogger("Orion")
     logger = set_logging(level, logger)
+    data = load_config(config,logger)
+    ES_URL=None
+
+    if "ES_SERVER" in data.keys():
+        ES_URL = data['ES_SERVER']
+    else:
+        if 'ES_SERVER' in os.environ:
+            ES_URL=os.environ.get("ES_SERVER")
+        else:
+            logger.error("ES_SERVER environment variable/config variable not set")
+            sys.exit(1)
 
-    if "ES_SERVER" not in os.environ:
-        logger.error("ES_SERVER environment variable not set")
-        sys.exit(1)
-
-    try:
-        with open(config, "r", encoding="utf-8") as file:
-            data = yaml.safe_load(file)
-            logger.debug("The %s file has successfully loaded", config)
-    except FileNotFoundError as e:
-        logger.error("Config file not found: %s", e)
-        sys.exit(1)
-    except Exception as e:  # pylint: disable=broad-exception-caught
-        logger.error("An error occurred: %s", e)
-        sys.exit(1)
     for test in data["tests"]:
         metadata = get_metadata(test, logger)
         logger.info("The test %s has started", test["name"])
-        match = Matcher(index="perf_scale_ci", level=level)
+        match = Matcher(index="perf_scale_ci", level=level, ES_URL=ES_URL)
         uuids = match.get_uuid_by_metadata(metadata)
         if len(uuids) == 0:
             print("No UUID present for given metadata")
@@ -77,103 +77,12 @@ def orion(config, debug, output):
             lambda left, right: pd.merge(left, right, on="uuid", how="inner"),
             dataframe_list,
         )
-        match.save_results(merged_df, csv_file_path=output)
-
-
-def get_metric_data(ids, index, metrics, match, logger):
-    """Gets details metrics basked on metric yaml list
+        match.save_results(merged_df, csv_file_path=output.split(".")[0]+"-"+test['name']+".csv")
 
-    Args:
-        ids (list): list of all uuids
-        index (dict): index in es of where to find data
-        metrics (dict): metrics to gather data on
-        match (Matcher): current matcher instance
-        logger (logger): log data to one output
-
-    Returns:
-        dataframe_list: dataframe of the all metrics
-    """
-    dataframe_list = []
-    for metric in metrics:
-        metric_name = metric['name']
-        logger.info("Collecting %s", metric_name)
-        metric_of_interest = metric['metric_of_interest']
-
-        if "agg" in metric.keys():
-            try:
-                cpu = match.get_agg_metric_query(
-                    ids, index, metric
-                )
-                agg_value = metric['agg']['value']
-                agg_type = metric['agg']['agg_type']
-                agg_name = agg_value + "_" + agg_type
-                cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name])
-                cpu_df = cpu_df.rename(
-                    columns={agg_name: metric_name+ "_" +  agg_name}
-                )
-                dataframe_list.append(cpu_df)
-                logger.debug(cpu_df)
-
-            except Exception as e:  # pylint: disable=broad-exception-caught
-                logger.error(
-                    "Couldn't get agg metrics %s, exception %s",
-                    metric_name,
-                    e,
-                )
-        else:
-            try:
-                podl = match.getResults("", ids, index, metric)
-                podl_df = match.convert_to_df(
-                    podl, columns=["uuid", "timestamp", metric_of_interest]
-                )
-                dataframe_list.append(podl_df)
-                logger.debug(podl_df)
-            except Exception as e:  # pylint: disable=broad-exception-caught
-                logger.error(
-                    "Couldn't get metrics %s, exception %s",
-                    metric_name,
-                    e,
-                )
-    return dataframe_list
-
-def get_metadata(test,logger):
-    """Gets metadata of the run from each test
+        if hunter_analyze:
+            run_hunter_analyze(merged_df,test)
 
-    Args:
-        test (dict): test dictionary
 
-    Returns:
-        dict: dictionary of the metadata
-    """
-    metadata = {}
-    for k,v in test.items():
-        if k in ["metrics","name"]:
-            continue
-        metadata[k] = v
-    metadata["ocpVersion"] = str(metadata["ocpVersion"])
-    logger.debug('metadata' + str(metadata))
-    return metadata
-
-
-def set_logging(level, logger):
-    """sets log level and format
-
-    Args:
-        level (_type_): level of the log
-        logger (_type_): logger object
-
-    Returns:
-        logging.Logger: a formatted and level set logger
-    """
-    logger.setLevel(level)
-    handler = logging.StreamHandler(sys.stdout)
-    handler.setLevel(level)
-    formatter = logging.Formatter(
-        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-    )
-    handler.setFormatter(formatter)
-    logger.addHandler(handler)
-    return logger
 
 
 if __name__ == "__main__":
diff --git a/requirements.txt b/requirements.txt
index 218fe88..39850b0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,14 +1,47 @@
+astroid==3.0.2
 certifi==2023.11.17
+charset-normalizer==3.3.2
 click==8.1.7
+dateparser==1.2.0
+DateTime==5.4
+decorator==5.1.1
+dill==0.3.7
 elastic-transport==8.11.0
 elasticsearch==8.11.1
 elasticsearch7==7.13.0
+expandvars==0.6.5
+gevent==23.9.1
+greenlet==3.0.3
+hunter @ git+https://github.com/datastax-labs/hunter.git@8ff166979d000780ad548e49f006ef2a15d54123
+idna==3.6
+isort==5.13.2
+mccabe==0.7.0
+more-itertools==8.14.0
+numpy==1.24.0
 fmatch==0.0.4
-numpy==1.26.3
 pandas==2.1.4
+platformdirs==4.1.0
+pylint==3.0.3
+pystache==0.6.5
 python-dateutil==2.8.2
 pytz==2023.3.post1
 PyYAML==6.0.1
+regex==2023.12.25
+requests==2.31.0
+ruamel.yaml==0.17.21
+ruamel.yaml.clib==0.2.8
+scipy==1.12.0
+signal-processing-algorithms==1.3.5
 six==1.16.0
+slack_sdk==3.26.2
+structlog==19.2.0
+tabulate==0.8.10
+tomlkit==0.12.3
+typed-ast==1.5.5
+typing-extensions==3.10.0.2
 tzdata==2023.4
+tzlocal==5.2
 urllib3==1.26.18
+validators==0.18.2
+zope.event==5.0
+zope.interface==6.1
diff --git a/setup.py b/setup.py
index be07410..52fdafc 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 """
 setup.py for orion cli tool
 """
-from setuptools import setup
+from setuptools import setup, find_packages
 
 setup(
     name='orion',
@@ -17,6 +17,8 @@
             'orion = orion:orion',
         ],
     },
+    packages=find_packages(),
+    package_data={'utils': ['utils.py'],'hunter': ['*.py']},
     classifiers=[
         'Programming Language :: Python :: 3',
         'License :: OSI Approved :: MIT License',
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utils/orion_funcs.py b/utils/orion_funcs.py
new file mode 100644
index 0000000..f0fdd4b
--- /dev/null
+++ b/utils/orion_funcs.py
@@ -0,0 +1,164 @@
+# pylint: disable=cyclic-import
+"""
+module for all utility functions orion uses
+"""
+# pylint: disable = import-error
+
+import logging
+import sys
+
+import yaml
+import pandas as pd
+
+from hunter.report import Report, ReportType
+from hunter.series import Metric, Series
+
+
+def run_hunter_analyze(merged_df,test):
+    """Start hunter analyze function
+
+    Args:
+        merged_df (Dataframe): merged dataframe of all the metrics
+        test (dict): test dictionary with the each test information
+    """
+    merged_df["timestamp"] = pd.to_datetime(merged_df["timestamp"])
+    merged_df["timestamp"] = merged_df["timestamp"].astype(int) // 10**9
+    metrics = {column: Metric(1, 1.0)
+               for column in merged_df.columns
+               if column not in ["uuid","timestamp"]}
+    data = {column: merged_df[column]
+            for column in merged_df.columns
+            if column not in ["uuid","timestamp"]}
+    attributes={column: merged_df[column] for column in merged_df.columns if column in ["uuid"]}
+    series=Series(
+        test_name=test["name"],
+        branch=None,
+        time=list(merged_df["timestamp"]),
+        metrics=metrics,
+        data=data,
+        attributes=attributes
+    )
+    change_points=series.analyze().change_points_by_time
+    report=Report(series,change_points)
+    output = report.produce_report(test_name="test",report_type=ReportType.LOG)
+    print(output)
+
+# pylint: disable=too-many-locals
+def get_metric_data(ids, index, metrics, match, logger):
+    """Gets details metrics basked on metric yaml list
+
+    Args:
+        ids (list): list of all uuids
+        index (dict): index in es of where to find data
+        metrics (dict): metrics to gather data on
+        match (Matcher): current matcher instance
+        logger (logger): log data to one output
+
+    Returns:
+        dataframe_list: dataframe of the all metrics
+    """
+    dataframe_list = []
+    for metric in metrics:
+        metric_name = metric['name']
+        logger.info("Collecting %s", metric_name)
+        metric_of_interest = metric['metric_of_interest']
+
+        if "agg" in metric.keys():
+            try:
+                cpu = match.get_agg_metric_query(
+                    ids, index, metric
+                )
+                agg_value = metric['agg']['value']
+                agg_type = metric['agg']['agg_type']
+                agg_name = agg_value + "_" + agg_type
+                cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name])
+                cpu_df = cpu_df.rename(
+                    columns={agg_name: metric_name+ "_" +  agg_name}
+                )
+                dataframe_list.append(cpu_df)
+                logger.debug(cpu_df)
+
+            except Exception as e:  # pylint: disable=broad-exception-caught
+                logger.error(
+                    "Couldn't get agg metrics %s, exception %s",
+                    metric_name,
+                    e,
+                )
+        else:
+            try:
+                podl = match.getResults("", ids, index, metric)
+                podl_df = match.convert_to_df(
+                    podl, columns=["uuid", "timestamp", metric_of_interest]
+                )
+                dataframe_list.append(podl_df)
+                logger.debug(podl_df)
+            except Exception as e:  # pylint: disable=broad-exception-caught
+                logger.error(
+                    "Couldn't get metrics %s, exception %s",
+                    metric_name,
+                    e,
+                )
+    return dataframe_list
+
+
+def get_metadata(test,logger):
+    """Gets metadata of the run from each test
+
+    Args:
+        test (dict): test dictionary
+
+    Returns:
+        dict: dictionary of the metadata
+    """
+    metadata = {}
+    for k,v in test.items():
+        if k in ["metrics","name"]:
+            continue
+        metadata[k] = v
+    metadata["ocpVersion"] = str(metadata["ocpVersion"])
+    logger.debug('metadata' + str(metadata))
+    return metadata
+
+
+
+def set_logging(level, logger):
+    """sets log level and format
+
+    Args:
+        level (_type_): level of the log
+        logger (_type_): logger object
+
+    Returns:
+        logging.Logger: a formatted and level set logger
+    """
+    logger.setLevel(level)
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setLevel(level)
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    return logger
+
+def load_config(config,logger):
+    """Loads config file
+
+    Args:
+        config (str): path to config file
+        logger (Logger): logger
+
+    Returns:
+        dict: dictionary of the config file
+    """
+    try:
+        with open(config, "r", encoding="utf-8") as file:
+            data = yaml.safe_load(file)
+            logger.debug("The %s file has successfully loaded", config)
+    except FileNotFoundError as e:
+        logger.error("Config file not found: %s", e)
+        sys.exit(1)
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        logger.error("An error occurred: %s", e)
+        sys.exit(1)
+    return data

From 8e9c9e658b8ba1ba4937395e357360351658d33d Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Mon, 5 Feb 2024 15:20:55 -0500
Subject: [PATCH 2/5] updated version windows for requirements and updated
 pylint workflow

Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>
---
 .github/workflows/pylint.yml |  2 ++
 requirements.txt             | 19 ++++++++++---------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 9ded563..383186f 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -18,6 +18,8 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pylint
+        pip install -r requirements.txt
+        pip install .
     - name: Analysing the code with pylint
       run: |
         pylint -d C0103 $(git ls-files '*.py')
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 39850b0..16505ee 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-astroid==3.0.2
+astroid==3.0.3
 certifi==2023.11.17
 charset-normalizer==3.3.2
 click==8.1.7
@@ -10,6 +10,7 @@ elastic-transport==8.11.0
 elasticsearch==8.11.1
 elasticsearch7==7.13.0
 expandvars==0.6.5
+fmatch==0.0.4
 gevent==23.9.1
 greenlet==3.0.3
 hunter @ git+https://github.com/datastax-labs/hunter.git@8ff166979d000780ad548e49f006ef2a15d54123
@@ -17,9 +18,8 @@ idna==3.6
 isort==5.13.2
 mccabe==0.7.0
 more-itertools==8.14.0
-numpy==1.24.0
-fmatch==0.0.4
-pandas==2.1.4
+numpy
+pandas
 platformdirs==4.1.0
 pylint==3.0.3
 pystache==0.6.5
@@ -28,17 +28,18 @@ pytz==2023.3.post1
 PyYAML==6.0.1
 regex==2023.12.25
 requests==2.31.0
-ruamel.yaml==0.17.21
-ruamel.yaml.clib==0.2.8
-scipy==1.12.0
-signal-processing-algorithms==1.3.5
+ruamel.yaml
+ruamel.yaml.clib
+scipy
+signal-processing-algorithms==1.3.4
 six==1.16.0
 slack_sdk==3.26.2
 structlog==19.2.0
 tabulate==0.8.10
+tomli==2.0.1
 tomlkit==0.12.3
 typed-ast==1.5.5
-typing-extensions==3.10.0.2
+typing_extensions==4.9.0
 tzdata==2023.4
 tzlocal==5.2
 urllib3==1.26.18

From e865a890d0260f3bb8d0957cafe13050f52202b0 Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Mon, 5 Feb 2024 15:44:51 -0500
Subject: [PATCH 3/5] added compatibilty notes to Readme

Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>
---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 19e3407..986e8d4 100644
--- a/README.md
+++ b/README.md
@@ -63,6 +63,10 @@ tests :
 ## Build Orion
 Building Orion is a straightforward process. Follow these commands:
 
+**Note: Orion Compatibility**
+
+Orion currently supports Python versions `3.8.x`, `3.9.x`, `3.10.x`, and `3.11.x`. Please be aware that using other Python versions might lead to dependency conflicts caused by hunter, creating a challenging situation known as "dependency hell." It's crucial to highlight that Python `3.12.x` may result in errors due to the removal of distutils, a dependency used by numpy. This information is essential to ensure a smooth experience with Orion and avoid potential compatibility issues.
+
 Clone the current repository using git clone.
 
 ```

From ecd3ec2ad7b85ab6e5c5828f8cf7aac337f658db Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Wed, 7 Feb 2024 16:11:51 -0500
Subject: [PATCH 4/5] reduced requirements.txt

Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>
---
 requirements.txt | 39 ++-------------------------------------
 1 file changed, 2 insertions(+), 37 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 16505ee..0c7d487 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,48 +1,13 @@
-astroid==3.0.3
+hunter @ git+https://github.com/datastax-labs/hunter.git@8ff166979d000780ad548e49f006ef2a15d54123
 certifi==2023.11.17
-charset-normalizer==3.3.2
 click==8.1.7
-dateparser==1.2.0
-DateTime==5.4
-decorator==5.1.1
-dill==0.3.7
 elastic-transport==8.11.0
 elasticsearch==8.11.1
 elasticsearch7==7.13.0
-expandvars==0.6.5
 fmatch==0.0.4
-gevent==23.9.1
-greenlet==3.0.3
-hunter @ git+https://github.com/datastax-labs/hunter.git@8ff166979d000780ad548e49f006ef2a15d54123
-idna==3.6
-isort==5.13.2
-mccabe==0.7.0
-more-itertools==8.14.0
-numpy
-pandas
-platformdirs==4.1.0
-pylint==3.0.3
-pystache==0.6.5
 python-dateutil==2.8.2
 pytz==2023.3.post1
 PyYAML==6.0.1
-regex==2023.12.25
-requests==2.31.0
-ruamel.yaml
-ruamel.yaml.clib
-scipy
-signal-processing-algorithms==1.3.4
 six==1.16.0
-slack_sdk==3.26.2
-structlog==19.2.0
-tabulate==0.8.10
-tomli==2.0.1
-tomlkit==0.12.3
-typed-ast==1.5.5
-typing_extensions==4.9.0
 tzdata==2023.4
-tzlocal==5.2
-urllib3==1.26.18
-validators==0.18.2
-zope.event==5.0
-zope.interface==6.1
+urllib3==1.26.18
\ No newline at end of file

From 3ede92cffc65270f7e6ab6c7b3e5a618a0f6049b Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Fri, 9 Feb 2024 14:19:21 -0500
Subject: [PATCH 5/5] pylinting only 3.11

Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>
---
 .github/workflows/pylint.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 383186f..d8c23ef 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.11"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}