From 765ec5345060a01ace255df19d51e9af9728e448 Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Wed, 24 Jan 2024 11:14:20 -0500
Subject: [PATCH 1/4] orion cli, pylint workflow and README (#2)

---
 .github/workflows/pylint.yml |  23 +++++
 .gitignore                   |   4 +
 README.md                    |  68 +++++++++++++++
 orion.py                     | 161 +++++++++++++++++++++++++++++++++++
 requirements.txt             |  14 +++
 setup.py                     |  25 ++++++
 6 files changed, 295 insertions(+)
 create mode 100644 .github/workflows/pylint.yml
 create mode 100644 README.md
 create mode 100644 orion.py
 create mode 100644 requirements.txt
 create mode 100644 setup.py

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
new file mode 100644
index 0000000..9ded563
--- /dev/null
+++ b/.github/workflows/pylint.yml
@@ -0,0 +1,23 @@
+name: Pylint
+
+on: [push,pull_request]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pylint
+    - name: Analysing the code with pylint
+      run: |
+        pylint -d C0103 $(git ls-files '*.py')
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 68bc17f..850de9f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -158,3 +158,7 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+*.yaml
+*.csv
+.vscode/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b85a259
--- /dev/null
+++ b/README.md
@@ -0,0 +1,68 @@
+# Orion - CLI tool to find regressions
+Orion stands as a powerful command-line tool designed for identifying regressions within perf-scale CPT runs, leveraging metadata provided during the process. The detection mechanism relies on [hunter](https://github.com/datastax-labs/hunter).
+
+Below is an illustrative example of the config and metadata that Orion can handle:
+
+```
+tests :
+  - name : aws-small-scale-cluster-density-v2
+    platform: AWS
+    masterNodesType: m6a.xlarge
+    masterNodesCount: 3
+    workerNodesType: m6a.xlarge
+    workerNodesCount: 24
+    benchmark: cluster-density-v2
+    ocpVersion: 4.15
+    networkType: OVNKubernetes
+    # encrypted: true
+    # fips: false
+    # ipsec: false
+
+    metrics : 
+    - metric : podReadyLatency
+      metricType : latency
+      
+    - metric : apiserverCPU
+      metricType : cpu
+      namespace: openshift-kube-apiserver
+
+    - metric: ovnCPU
+      metricType: cpu
+      namespace: openshift-ovn-kubernetes
+    
+    - metric: etcdCPU
+      metricType: cpu
+      namespace: openshift-ovn-kubernetes
+
+
+```
+
+## Build Orion
+Building Orion is a straightforward process. Follow these commands:
+
+Clone the current repository using git clone.
+
+```
+>> git clone <repository_url>
+>> pip install venv
+>> source venv/bin/activate
+>> pip install -r requirements.txt
+>> export ES_SERVER = <es_server_url>
+>> pip install .
+```
+## Run Orion
+Executing Orion is as simple as building it. After following the build steps, run the following:
+```
+>> orion
+```
+At the moment, 
+
+Orion provides flexibility in configuring its behavior by allowing users to set the path to their config file using the ```--config``` flag. 
+
+For enhanced troubleshooting and debugging, Orion supports the ```--debug``` flag, enabling the generation of detailed debug logs. 
+
+Additionally, users can specify a custom path for the output CSV file using the ```--output``` flag, providing control over the location where the generated CSV will be stored.
+
+Orion's seamless integration with metadata and hunter ensures a robust regression detection tool for perf-scale CPT runs.
+
+
diff --git a/orion.py b/orion.py
new file mode 100644
index 0000000..f40b19a
--- /dev/null
+++ b/orion.py
@@ -0,0 +1,161 @@
+"""
+This is the cli file for orion, tool to detect regressions using hunter
+"""
+# pylint: disable = import-error
+import sys
+from functools import reduce
+import logging
+import os
+
+import click
+import yaml
+import pandas as pd
+from fmatch.matcher import Matcher
+
+
+@click.group()
+def cli():
+    """
+    cli function to group commands
+    """
+
+
+# pylint: disable=too-many-locals
+@click.command()
+@click.option("--config", default="config.yaml", help="Path to the configuration file")
+@click.option("--output", default="output.csv", help="Path to save the output csv file")
+@click.option("--debug", is_flag=True, help="log level ")
+def orion(config, debug, output):
+    """Orion is the cli tool to detect regressions over the runs
+
+    Args:
+        config (str): path to the config file
+        debug (bool): lets you log debug mode
+        output (str): path to the output csv file
+    """
+    level = logging.DEBUG if debug else logging.INFO
+    logger = logging.getLogger("Orion")
+    logger = set_logging(level, logger)
+
+    if "ES_SERVER" not in os.environ:
+        logger.error("ES_SERVER environment variable not set")
+        sys.exit(1)
+
+    try:
+        with open(config, "r", encoding="utf-8") as file:
+            data = yaml.safe_load(file)
+            logger.debug("The %s file has successfully loaded", config)
+    except FileNotFoundError as e:
+        logger.error("Config file not found: %s", e)
+        sys.exit(1)
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        logger.error("An error occurred: %s", e)
+        sys.exit(1)
+    for test in data["tests"]:
+        metadata = get_metadata(test)
+        logger.info("The test %s has started", test["name"])
+        match = Matcher(index="perf_scale_ci", level=level)
+        uuids = match.get_uuid_by_metadata(metadata)
+        if len(uuids) == 0:
+            print("No UUID present for given metadata")
+            sys.exit()
+
+        runs = match.match_kube_burner(uuids)
+        ids = match.filter_runs(runs, runs)
+        metrics = test["metrics"]
+        dataframe_list = []
+
+        for metric in metrics:
+            logger.info("Collecting %s", metric["metric"])
+            if metric["metricType"] == "latency":
+                if metric["metric"] == "podReadyLatency":
+                    try:
+                        podl = match.burner_results("", ids, "ripsaw-kube-burner*")
+                        podl_df = match.convert_to_df(
+                            podl, columns=["uuid", "timestamp", "P99"]
+                        )
+                        dataframe_list.append(podl_df)
+                        logger.debug(podl_df)
+                    except Exception as e:  # pylint: disable=broad-exception-caught
+                        logger.error(
+                            "The namespace %s does not exist, exception %s",
+                            metric["namespace"],
+                            e,
+                        )
+
+            elif metric["metricType"] == "cpu":
+                try:
+                    cpu = match.burner_cpu_results(
+                        ids, metric["namespace"], "ripsaw-kube-burner*"
+                    )
+                    cpu_df = match.convert_to_df(cpu, columns=["uuid", "cpu_avg"])
+                    cpu_df = cpu_df.rename(
+                        columns={"cpu_avg": metric["metric"] + "_cpu_avg"}
+                    )
+                    dataframe_list.append(cpu_df)
+                    logger.debug(cpu_df)
+                except Exception as e:  # pylint: disable=broad-exception-caught
+                    logger.error(
+                        "The namespace %s does not exist, exception %s",
+                        metric["namespace"],
+                        e,
+                    )
+
+        merged_df = reduce(
+            lambda left, right: pd.merge(left, right, on="uuid", how="inner"),
+            dataframe_list,
+        )
+        match.save_results(merged_df, csv_file_path=output)
+
+
+def get_metadata(test):
+    """Gets metadata of the run from each test
+
+    Args:
+        test (dict): test dictionary
+
+    Returns:
+        dict: dictionary of the metadata
+    """
+    metadata_columns = [
+        "platform",
+        "masterNodesType",
+        "masterNodesCount",
+        "workerNodesType",
+        "workerNodesCount",
+        "benchmark",
+        "ocpVersion",
+        "networkType",
+        "encrypted",
+        "fips",
+        "ipsec",
+    ]
+    metadata = {key: test[key] for key in metadata_columns if key in test}
+    metadata["ocpVersion"] = str(metadata["ocpVersion"])
+    return metadata
+
+
+def set_logging(level, logger):
+    """sets log level and format
+
+    Args:
+        level (_type_): level of the log
+        logger (_type_): logger object
+
+    Returns:
+        logging.Logger: a formatted and level set logger
+    """
+    logger.setLevel(level)
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setLevel(level)
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    return logger
+
+
+if __name__ == "__main__":
+    cli.add_command(orion)
+    cli()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b98bd22
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,14 @@
+certifi==2023.11.17
+click==8.1.7
+elastic-transport==8.11.0
+elasticsearch==8.11.1
+elasticsearch7==7.13.0
+fmatch==0.0.2
+numpy==1.26.3
+pandas==2.1.4
+python-dateutil==2.8.2
+pytz==2023.3.post1
+PyYAML==6.0.1
+six==1.16.0
+tzdata==2023.4
+urllib3==1.26.18
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..be07410
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,25 @@
+# orion/setup.py
+"""
+setup.py for orion cli tool
+"""
+from setuptools import setup
+
+setup(
+    name='orion',
+    version='1.0',
+    py_modules=['orion'],
+    install_requires=[
+        'click',
+        'fmatch'
+    ],
+    entry_points={
+        'console_scripts': [
+            'orion = orion:orion',
+        ],
+    },
+    classifiers=[
+        'Programming Language :: Python :: 3',
+        'License :: OSI Approved :: MIT License',
+        'Operating System :: OS Independent',
+    ],
+)

From 7d90ee97b51428b28e9c20ae3953f03eb8458bde Mon Sep 17 00:00:00 2001
From: "Joe Talerico (rook)" <jtaleric@redhat.com>
Date: Mon, 29 Jan 2024 06:48:56 -0500
Subject: [PATCH 2/4] Adding Dockerfile (#5)

This Dockerfile will build in Hunter and Orion into a single container
image.

Signed-off-by: Joe Talerico <rook@redhat.com>
Co-authored-by: Joe Talerico <rook@redhat.com>
---
 .github/workflows/build-push.yaml | 30 +++++++++++++++++++
 Dockerfile                        | 49 +++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 .github/workflows/build-push.yaml
 create mode 100644 Dockerfile

diff --git a/.github/workflows/build-push.yaml b/.github/workflows/build-push.yaml
new file mode 100644
index 0000000..1e50eca
--- /dev/null
+++ b/.github/workflows/build-push.yaml
@@ -0,0 +1,30 @@
+name: Build and Push Image
+on: [ push ]
+
+jobs:
+  build:
+    name: Build and push image
+    runs-on: ubuntu-20.04
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Build Orion Image
+      id: build-orion
+      uses: redhat-actions/buildah-build@v2
+      with:
+        image: orion 
+        context: orion 
+        tags: latest ${{ github.sha }}
+        containerfiles: |
+          ./Dockerfile
+
+    - name: Push frontend image to quay.io
+      id: push-front-to-quay
+      uses: redhat-actions/push-to-registry@v2
+      with:
+        image: ${{ steps.build-orion.outputs.image }}
+        tags: ${{ steps.build-orion.outputs.tags }}
+        registry: quay.io/cloud-bulldozer
+        username: ${{ secrets.QUAY_USER }}
+        password: ${{ secrets.QUAY_TOKEN }}
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..cbb96d2
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,49 @@
+FROM python:3.12.1-slim-bullseye
+# So that STDOUT/STDERR is printed
+ENV PYTHONUNBUFFERED="1"
+
+# We create the default user and group to run unprivileged
+ENV HUNTER_HOME /srv/hunter
+WORKDIR ${HUNTER_HOME}
+
+RUN groupadd --gid 8192 hunter && \
+    useradd --uid 8192 --shell /bin/false --create-home --no-log-init --gid hunter hunter && \
+    chown hunter:hunter ${HUNTER_HOME}
+
+# First let's just get things updated.
+# Install System dependencies
+RUN apt-get update --assume-yes && \
+    apt-get install -o 'Dpkg::Options::=--force-confnew' -y --force-yes -q \
+    git \
+    openssh-client \
+    gcc \
+    clang \
+    build-essential \
+    make \
+    curl \
+    virtualenv \
+    && rm -rf /var/lib/apt/lists/*
+
+# Get poetry package
+RUN curl -sSL https://install.python-poetry.org | python3 -
+# Adding poetry to PATH
+ENV PATH="/root/.local/bin/:$PATH"
+
+RUN git clone https://github.com/datastax-labs/hunter.git ${HUNTER_HOME}
+
+ENV PATH="${HUNTER_HOME}/bin:$PATH"
+
+RUN  --mount=type=ssh \
+    virtualenv --python python venv && \
+    . venv/bin/activate && \
+    poetry install -v && \
+    mkdir -p bin && \
+    ln -s ../venv/bin/hunter ${HUNTER_HOME}/bin
+
+COPY --chown=hunter:hunter . orion 
+
+RUN . venv/bin/activate && \
+    cd orion \
+    pip install -r requirements.txt && \
+    python setup.py install && \
+    ln -s ../venv/bin/orion ${HUNTER_HOME}/bin

From 6ed1919ef5482b0a7812e87ffac22e96fa65de2d Mon Sep 17 00:00:00 2001
From: Paige Rubendall <64206430+paigerube14@users.noreply.github.com>
Date: Tue, 30 Jan 2024 17:20:37 -0500
Subject: [PATCH 3/4] Update fmatch version and not set keys (#7)

* adding origon logging

Signed-off-by: Paige Rubendall <prubenda@redhat.com>

* adidng updated fmatch

Signed-off-by: Paige Rubendall <prubenda@redhat.com>

* k list

Signed-off-by: Paige Rubendall <prubenda@redhat.com>

* fixing spaces

Signed-off-by: Paige Rubendall <prubenda@redhat.com>

---------

Signed-off-by: Paige Rubendall <prubenda@redhat.com>
---
 orion.py         | 24 ++++++++----------------
 requirements.txt |  2 +-
 2 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/orion.py b/orion.py
index f40b19a..1d455b0 100644
--- a/orion.py
+++ b/orion.py
@@ -52,7 +52,7 @@ def orion(config, debug, output):
         logger.error("An error occurred: %s", e)
         sys.exit(1)
     for test in data["tests"]:
-        metadata = get_metadata(test)
+        metadata = get_metadata(test, logger)
         logger.info("The test %s has started", test["name"])
         match = Matcher(index="perf_scale_ci", level=level)
         uuids = match.get_uuid_by_metadata(metadata)
@@ -108,7 +108,7 @@ def orion(config, debug, output):
         match.save_results(merged_df, csv_file_path=output)
 
 
-def get_metadata(test):
+def get_metadata(test,logger):
     """Gets metadata of the run from each test
 
     Args:
@@ -117,21 +117,13 @@ def get_metadata(test):
     Returns:
         dict: dictionary of the metadata
     """
-    metadata_columns = [
-        "platform",
-        "masterNodesType",
-        "masterNodesCount",
-        "workerNodesType",
-        "workerNodesCount",
-        "benchmark",
-        "ocpVersion",
-        "networkType",
-        "encrypted",
-        "fips",
-        "ipsec",
-    ]
-    metadata = {key: test[key] for key in metadata_columns if key in test}
+    metadata = {}
+    for k,v in test.items():
+        if k in ["metrics","name"]:
+            continue
+        metadata[k] = v
     metadata["ocpVersion"] = str(metadata["ocpVersion"])
+    logger.debug('metadata' + str(metadata))
     return metadata
 
 
diff --git a/requirements.txt b/requirements.txt
index b98bd22..e3f3ac3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ click==8.1.7
 elastic-transport==8.11.0
 elasticsearch==8.11.1
 elasticsearch7==7.13.0
-fmatch==0.0.2
+fmatch==0.0.3
 numpy==1.26.3
 pandas==2.1.4
 python-dateutil==2.8.2

From d8f933b617ad268c8eff5e8829b6a2d92ace785a Mon Sep 17 00:00:00 2001
From: Paige Rubendall <64206430+paigerube14@users.noreply.github.com>
Date: Thu, 1 Feb 2024 14:08:28 -0500
Subject: [PATCH 4/4] adding more generic way of comparing with working ingress
 (#9)

Signed-off-by: Paige Rubendall <prubenda@redhat.com>
---
 README.md        |  45 +++++++++++++++-----
 orion.py         | 108 +++++++++++++++++++++++++++++------------------
 requirements.txt |   2 +-
 3 files changed, 103 insertions(+), 52 deletions(-)

diff --git a/README.md b/README.md
index b85a259..24d81fa 100644
--- a/README.md
+++ b/README.md
@@ -19,20 +19,43 @@ tests :
     # ipsec: false
 
     metrics : 
-    - metric : podReadyLatency
-      metricType : latency
+    - name:  podReadyLatency
+      metricName: podLatencyQuantilesMeasurement
+      quantileName: Ready
+      metric_of_interest: P99
+      not: 
+      - jobConfig.name: "garbage-collection"
       
-    - metric : apiserverCPU
-      metricType : cpu
-      namespace: openshift-kube-apiserver
+    - name:  apiserverCPU
+      metricName : containerCPU
+      labels.namespace: openshift-kube-apiserver
+      metric_of_interest: value
+      agg:
+        value: cpu
+        agg_type: avg
 
-    - metric: ovnCPU
-      metricType: cpu
-      namespace: openshift-ovn-kubernetes
+    - name:  ovnCPU
+      metricName : containerCPU
+      labels.namespace: openshift-ovn-kubernetes
+      metric_of_interest: value
+      agg:
+        value: cpu
+        agg_type: avg
+
+    - name:  etcdCPU
+      metricName : containerCPU
+      labels.namespace: openshift-etcd
+      metric_of_interest: value
+      agg:
+        value: cpu
+        agg_type: avg
     
-    - metric: etcdCPU
-      metricType: cpu
-      namespace: openshift-ovn-kubernetes
+    - name:  etcdDisck
+      metricName : 99thEtcdDiskBackendCommitDurationSeconds
+      metric_of_interest: value
+      agg:
+        value: duration
+        agg_type: avg
 
 
 ```
diff --git a/orion.py b/orion.py
index 1d455b0..891d0bd 100644
--- a/orion.py
+++ b/orion.py
@@ -19,7 +19,6 @@ def cli():
     cli function to group commands
     """
 
-
 # pylint: disable=too-many-locals
 @click.command()
 @click.option("--config", default="config.yaml", help="Path to the configuration file")
@@ -60,46 +59,19 @@ def orion(config, debug, output):
             print("No UUID present for given metadata")
             sys.exit()
 
-        runs = match.match_kube_burner(uuids)
-        ids = match.filter_runs(runs, runs)
+        if metadata["benchmark"] == "k8s-netperf" :
+            index = "k8s-netperf"
+            ids = uuids
+        elif metadata["benchmark"] == "ingress-perf" :
+            index = "ingress-performance"
+            ids = uuids
+        else:
+            index = "ripsaw-kube-burner"
+            runs = match.match_kube_burner(uuids)
+            ids = match.filter_runs(runs, runs)
+
         metrics = test["metrics"]
-        dataframe_list = []
-
-        for metric in metrics:
-            logger.info("Collecting %s", metric["metric"])
-            if metric["metricType"] == "latency":
-                if metric["metric"] == "podReadyLatency":
-                    try:
-                        podl = match.burner_results("", ids, "ripsaw-kube-burner*")
-                        podl_df = match.convert_to_df(
-                            podl, columns=["uuid", "timestamp", "P99"]
-                        )
-                        dataframe_list.append(podl_df)
-                        logger.debug(podl_df)
-                    except Exception as e:  # pylint: disable=broad-exception-caught
-                        logger.error(
-                            "The namespace %s does not exist, exception %s",
-                            metric["namespace"],
-                            e,
-                        )
-
-            elif metric["metricType"] == "cpu":
-                try:
-                    cpu = match.burner_cpu_results(
-                        ids, metric["namespace"], "ripsaw-kube-burner*"
-                    )
-                    cpu_df = match.convert_to_df(cpu, columns=["uuid", "cpu_avg"])
-                    cpu_df = cpu_df.rename(
-                        columns={"cpu_avg": metric["metric"] + "_cpu_avg"}
-                    )
-                    dataframe_list.append(cpu_df)
-                    logger.debug(cpu_df)
-                except Exception as e:  # pylint: disable=broad-exception-caught
-                    logger.error(
-                        "The namespace %s does not exist, exception %s",
-                        metric["namespace"],
-                        e,
-                    )
+        dataframe_list = get_metric_data(ids, index, metrics, match, logger)
 
         merged_df = reduce(
             lambda left, right: pd.merge(left, right, on="uuid", how="inner"),
@@ -108,6 +80,62 @@ def orion(config, debug, output):
         match.save_results(merged_df, csv_file_path=output)
 
 
+def get_metric_data(ids, index, metrics, match, logger):
+    """Gets details metrics basked on metric yaml list
+
+    Args:
+        ids (list): list of all uuids
+        index (dict): index in es of where to find data
+        metrics (dict): metrics to gather data on
+        match (Matcher): current matcher instance
+        logger (logger): log data to one output
+
+    Returns:
+        dataframe_list: dataframe of the all metrics
+    """
+    dataframe_list = []
+    for metric in metrics:
+        metric_name = metric['name']
+        logger.info("Collecting %s", metric_name)
+        metric_of_interest = metric['metric_of_interest']
+
+        if "agg" in metric.keys():
+            try:
+                cpu = match.get_agg_metric_query(
+                    ids, index, metric
+                )
+                agg_value = metric['agg']['value']
+                agg_type = metric['agg']['agg_type']
+                agg_name = agg_value + "_" + agg_type
+                cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name])
+                cpu_df = cpu_df.rename(
+                    columns={agg_name: metric_name+ "_" +  agg_name}
+                )
+                dataframe_list.append(cpu_df)
+                logger.debug(cpu_df)
+
+            except Exception as e:  # pylint: disable=broad-exception-caught
+                logger.error(
+                    "Couldn't get agg metrics %s, exception %s",
+                    metric_name,
+                    e,
+                )
+        else:
+            try:
+                podl = match.getResults("", ids, index, metric)
+                podl_df = match.convert_to_df(
+                    podl, columns=["uuid", "timestamp", metric_of_interest]
+                )
+                dataframe_list.append(podl_df)
+                logger.debug(podl_df)
+            except Exception as e:  # pylint: disable=broad-exception-caught
+                logger.error(
+                    "Couldn't get metrics %s, exception %s",
+                    metric_name,
+                    e,
+                )
+    return dataframe_list
+
 def get_metadata(test,logger):
     """Gets metadata of the run from each test
 
diff --git a/requirements.txt b/requirements.txt
index e3f3ac3..218fe88 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ click==8.1.7
 elastic-transport==8.11.0
 elasticsearch==8.11.1
 elasticsearch7==7.13.0
-fmatch==0.0.3
+fmatch==0.0.4
 numpy==1.26.3
 pandas==2.1.4
 python-dateutil==2.8.2