From 765ec5345060a01ace255df19d51e9af9728e448 Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Wed, 24 Jan 2024 11:14:20 -0500 Subject: [PATCH 1/4] orion cli, pylint workflow and README (#2) --- .github/workflows/pylint.yml | 23 +++++ .gitignore | 4 + README.md | 68 +++++++++++++++ orion.py | 161 +++++++++++++++++++++++++++++++++++ requirements.txt | 14 +++ setup.py | 25 ++++++ 6 files changed, 295 insertions(+) create mode 100644 .github/workflows/pylint.yml create mode 100644 README.md create mode 100644 orion.py create mode 100644 requirements.txt create mode 100644 setup.py diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml new file mode 100644 index 0000000..9ded563 --- /dev/null +++ b/.github/workflows/pylint.yml @@ -0,0 +1,23 @@ +name: Pylint + +on: [push,pull_request] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + - name: Analysing the code with pylint + run: | + pylint -d C0103 $(git ls-files '*.py') \ No newline at end of file diff --git a/.gitignore b/.gitignore index 68bc17f..850de9f 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,7 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +*.yaml +*.csv +.vscode/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..b85a259 --- /dev/null +++ b/README.md @@ -0,0 +1,68 @@ +# Orion - CLI tool to find regressions +Orion stands as a powerful command-line tool designed for identifying regressions within perf-scale CPT runs, leveraging metadata provided during the process. The detection mechanism relies on [hunter](https://github.com/datastax-labs/hunter). + +Below is an illustrative example of the config and metadata that Orion can handle: + +``` +tests : + - name : aws-small-scale-cluster-density-v2 + platform: AWS + masterNodesType: m6a.xlarge + masterNodesCount: 3 + workerNodesType: m6a.xlarge + workerNodesCount: 24 + benchmark: cluster-density-v2 + ocpVersion: 4.15 + networkType: OVNKubernetes + # encrypted: true + # fips: false + # ipsec: false + + metrics : + - metric : podReadyLatency + metricType : latency + + - metric : apiserverCPU + metricType : cpu + namespace: openshift-kube-apiserver + + - metric: ovnCPU + metricType: cpu + namespace: openshift-ovn-kubernetes + + - metric: etcdCPU + metricType: cpu + namespace: openshift-ovn-kubernetes + + +``` + +## Build Orion +Building Orion is a straightforward process. Follow these commands: + +Clone the current repository using git clone. + +``` +>> git clone +>> pip install venv +>> source venv/bin/activate +>> pip install -r requirements.txt +>> export ES_SERVER = +>> pip install . +``` +## Run Orion +Executing Orion is as simple as building it. After following the build steps, run the following: +``` +>> orion +``` +At the moment, + +Orion provides flexibility in configuring its behavior by allowing users to set the path to their config file using the ```--config``` flag. + +For enhanced troubleshooting and debugging, Orion supports the ```--debug``` flag, enabling the generation of detailed debug logs. + +Additionally, users can specify a custom path for the output CSV file using the ```--output``` flag, providing control over the location where the generated CSV will be stored. + +Orion's seamless integration with metadata and hunter ensures a robust regression detection tool for perf-scale CPT runs. + + diff --git a/orion.py b/orion.py new file mode 100644 index 0000000..f40b19a --- /dev/null +++ b/orion.py @@ -0,0 +1,161 @@ +""" +This is the cli file for orion, tool to detect regressions using hunter +""" +# pylint: disable = import-error +import sys +from functools import reduce +import logging +import os + +import click +import yaml +import pandas as pd +from fmatch.matcher import Matcher + + +@click.group() +def cli(): + """ + cli function to group commands + """ + + +# pylint: disable=too-many-locals +@click.command() +@click.option("--config", default="config.yaml", help="Path to the configuration file") +@click.option("--output", default="output.csv", help="Path to save the output csv file") +@click.option("--debug", is_flag=True, help="log level ") +def orion(config, debug, output): + """Orion is the cli tool to detect regressions over the runs + + Args: + config (str): path to the config file + debug (bool): lets you log debug mode + output (str): path to the output csv file + """ + level = logging.DEBUG if debug else logging.INFO + logger = logging.getLogger("Orion") + logger = set_logging(level, logger) + + if "ES_SERVER" not in os.environ: + logger.error("ES_SERVER environment variable not set") + sys.exit(1) + + try: + with open(config, "r", encoding="utf-8") as file: + data = yaml.safe_load(file) + logger.debug("The %s file has successfully loaded", config) + except FileNotFoundError as e: + logger.error("Config file not found: %s", e) + sys.exit(1) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error("An error occurred: %s", e) + sys.exit(1) + for test in data["tests"]: + metadata = get_metadata(test) + logger.info("The test %s has started", test["name"]) + match = Matcher(index="perf_scale_ci", level=level) + uuids = match.get_uuid_by_metadata(metadata) + if len(uuids) == 0: + print("No UUID present for given metadata") + sys.exit() + + runs = match.match_kube_burner(uuids) + ids = match.filter_runs(runs, runs) + metrics = test["metrics"] + dataframe_list = [] + + for metric in metrics: + logger.info("Collecting %s", metric["metric"]) + if metric["metricType"] == "latency": + if metric["metric"] == "podReadyLatency": + try: + podl = match.burner_results("", ids, "ripsaw-kube-burner*") + podl_df = match.convert_to_df( + podl, columns=["uuid", "timestamp", "P99"] + ) + dataframe_list.append(podl_df) + logger.debug(podl_df) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error( + "The namespace %s does not exist, exception %s", + metric["namespace"], + e, + ) + + elif metric["metricType"] == "cpu": + try: + cpu = match.burner_cpu_results( + ids, metric["namespace"], "ripsaw-kube-burner*" + ) + cpu_df = match.convert_to_df(cpu, columns=["uuid", "cpu_avg"]) + cpu_df = cpu_df.rename( + columns={"cpu_avg": metric["metric"] + "_cpu_avg"} + ) + dataframe_list.append(cpu_df) + logger.debug(cpu_df) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error( + "The namespace %s does not exist, exception %s", + metric["namespace"], + e, + ) + + merged_df = reduce( + lambda left, right: pd.merge(left, right, on="uuid", how="inner"), + dataframe_list, + ) + match.save_results(merged_df, csv_file_path=output) + + +def get_metadata(test): + """Gets metadata of the run from each test + + Args: + test (dict): test dictionary + + Returns: + dict: dictionary of the metadata + """ + metadata_columns = [ + "platform", + "masterNodesType", + "masterNodesCount", + "workerNodesType", + "workerNodesCount", + "benchmark", + "ocpVersion", + "networkType", + "encrypted", + "fips", + "ipsec", + ] + metadata = {key: test[key] for key in metadata_columns if key in test} + metadata["ocpVersion"] = str(metadata["ocpVersion"]) + return metadata + + +def set_logging(level, logger): + """sets log level and format + + Args: + level (_type_): level of the log + logger (_type_): logger object + + Returns: + logging.Logger: a formatted and level set logger + """ + logger.setLevel(level) + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(level) + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + return logger + + +if __name__ == "__main__": + cli.add_command(orion) + cli() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b98bd22 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +certifi==2023.11.17 +click==8.1.7 +elastic-transport==8.11.0 +elasticsearch==8.11.1 +elasticsearch7==7.13.0 +fmatch==0.0.2 +numpy==1.26.3 +pandas==2.1.4 +python-dateutil==2.8.2 +pytz==2023.3.post1 +PyYAML==6.0.1 +six==1.16.0 +tzdata==2023.4 +urllib3==1.26.18 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..be07410 --- /dev/null +++ b/setup.py @@ -0,0 +1,25 @@ +# orion/setup.py +""" +setup.py for orion cli tool +""" +from setuptools import setup + +setup( + name='orion', + version='1.0', + py_modules=['orion'], + install_requires=[ + 'click', + 'fmatch' + ], + entry_points={ + 'console_scripts': [ + 'orion = orion:orion', + ], + }, + classifiers=[ + 'Programming Language :: Python :: 3', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + ], +) From 7d90ee97b51428b28e9c20ae3953f03eb8458bde Mon Sep 17 00:00:00 2001 From: "Joe Talerico (rook)" Date: Mon, 29 Jan 2024 06:48:56 -0500 Subject: [PATCH 2/4] Adding Dockerfile (#5) This Dockerfile will build in Hunter and Orion into a single container image. Signed-off-by: Joe Talerico Co-authored-by: Joe Talerico --- .github/workflows/build-push.yaml | 30 +++++++++++++++++++ Dockerfile | 49 +++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 .github/workflows/build-push.yaml create mode 100644 Dockerfile diff --git a/.github/workflows/build-push.yaml b/.github/workflows/build-push.yaml new file mode 100644 index 0000000..1e50eca --- /dev/null +++ b/.github/workflows/build-push.yaml @@ -0,0 +1,30 @@ +name: Build and Push Image +on: [ push ] + +jobs: + build: + name: Build and push image + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v2 + + - name: Build Orion Image + id: build-orion + uses: redhat-actions/buildah-build@v2 + with: + image: orion + context: orion + tags: latest ${{ github.sha }} + containerfiles: | + ./Dockerfile + + - name: Push frontend image to quay.io + id: push-front-to-quay + uses: redhat-actions/push-to-registry@v2 + with: + image: ${{ steps.build-orion.outputs.image }} + tags: ${{ steps.build-orion.outputs.tags }} + registry: quay.io/cloud-bulldozer + username: ${{ secrets.QUAY_USER }} + password: ${{ secrets.QUAY_TOKEN }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..cbb96d2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,49 @@ +FROM python:3.12.1-slim-bullseye +# So that STDOUT/STDERR is printed +ENV PYTHONUNBUFFERED="1" + +# We create the default user and group to run unprivileged +ENV HUNTER_HOME /srv/hunter +WORKDIR ${HUNTER_HOME} + +RUN groupadd --gid 8192 hunter && \ + useradd --uid 8192 --shell /bin/false --create-home --no-log-init --gid hunter hunter && \ + chown hunter:hunter ${HUNTER_HOME} + +# First let's just get things updated. +# Install System dependencies +RUN apt-get update --assume-yes && \ + apt-get install -o 'Dpkg::Options::=--force-confnew' -y --force-yes -q \ + git \ + openssh-client \ + gcc \ + clang \ + build-essential \ + make \ + curl \ + virtualenv \ + && rm -rf /var/lib/apt/lists/* + +# Get poetry package +RUN curl -sSL https://install.python-poetry.org | python3 - +# Adding poetry to PATH +ENV PATH="/root/.local/bin/:$PATH" + +RUN git clone https://github.com/datastax-labs/hunter.git ${HUNTER_HOME} + +ENV PATH="${HUNTER_HOME}/bin:$PATH" + +RUN --mount=type=ssh \ + virtualenv --python python venv && \ + . venv/bin/activate && \ + poetry install -v && \ + mkdir -p bin && \ + ln -s ../venv/bin/hunter ${HUNTER_HOME}/bin + +COPY --chown=hunter:hunter . orion + +RUN . venv/bin/activate && \ + cd orion \ + pip install -r requirements.txt && \ + python setup.py install && \ + ln -s ../venv/bin/orion ${HUNTER_HOME}/bin From 6ed1919ef5482b0a7812e87ffac22e96fa65de2d Mon Sep 17 00:00:00 2001 From: Paige Rubendall <64206430+paigerube14@users.noreply.github.com> Date: Tue, 30 Jan 2024 17:20:37 -0500 Subject: [PATCH 3/4] Update fmatch version and not set keys (#7) * adding origon logging Signed-off-by: Paige Rubendall * adidng updated fmatch Signed-off-by: Paige Rubendall * k list Signed-off-by: Paige Rubendall * fixing spaces Signed-off-by: Paige Rubendall --------- Signed-off-by: Paige Rubendall --- orion.py | 24 ++++++++---------------- requirements.txt | 2 +- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/orion.py b/orion.py index f40b19a..1d455b0 100644 --- a/orion.py +++ b/orion.py @@ -52,7 +52,7 @@ def orion(config, debug, output): logger.error("An error occurred: %s", e) sys.exit(1) for test in data["tests"]: - metadata = get_metadata(test) + metadata = get_metadata(test, logger) logger.info("The test %s has started", test["name"]) match = Matcher(index="perf_scale_ci", level=level) uuids = match.get_uuid_by_metadata(metadata) @@ -108,7 +108,7 @@ def orion(config, debug, output): match.save_results(merged_df, csv_file_path=output) -def get_metadata(test): +def get_metadata(test,logger): """Gets metadata of the run from each test Args: @@ -117,21 +117,13 @@ def get_metadata(test): Returns: dict: dictionary of the metadata """ - metadata_columns = [ - "platform", - "masterNodesType", - "masterNodesCount", - "workerNodesType", - "workerNodesCount", - "benchmark", - "ocpVersion", - "networkType", - "encrypted", - "fips", - "ipsec", - ] - metadata = {key: test[key] for key in metadata_columns if key in test} + metadata = {} + for k,v in test.items(): + if k in ["metrics","name"]: + continue + metadata[k] = v metadata["ocpVersion"] = str(metadata["ocpVersion"]) + logger.debug('metadata' + str(metadata)) return metadata diff --git a/requirements.txt b/requirements.txt index b98bd22..e3f3ac3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ click==8.1.7 elastic-transport==8.11.0 elasticsearch==8.11.1 elasticsearch7==7.13.0 -fmatch==0.0.2 +fmatch==0.0.3 numpy==1.26.3 pandas==2.1.4 python-dateutil==2.8.2 From d8f933b617ad268c8eff5e8829b6a2d92ace785a Mon Sep 17 00:00:00 2001 From: Paige Rubendall <64206430+paigerube14@users.noreply.github.com> Date: Thu, 1 Feb 2024 14:08:28 -0500 Subject: [PATCH 4/4] adding more generic way of comparing with working ingress (#9) Signed-off-by: Paige Rubendall --- README.md | 45 +++++++++++++++----- orion.py | 108 +++++++++++++++++++++++++++++------------------ requirements.txt | 2 +- 3 files changed, 103 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index b85a259..24d81fa 100644 --- a/README.md +++ b/README.md @@ -19,20 +19,43 @@ tests : # ipsec: false metrics : - - metric : podReadyLatency - metricType : latency + - name: podReadyLatency + metricName: podLatencyQuantilesMeasurement + quantileName: Ready + metric_of_interest: P99 + not: + - jobConfig.name: "garbage-collection" - - metric : apiserverCPU - metricType : cpu - namespace: openshift-kube-apiserver + - name: apiserverCPU + metricName : containerCPU + labels.namespace: openshift-kube-apiserver + metric_of_interest: value + agg: + value: cpu + agg_type: avg - - metric: ovnCPU - metricType: cpu - namespace: openshift-ovn-kubernetes + - name: ovnCPU + metricName : containerCPU + labels.namespace: openshift-ovn-kubernetes + metric_of_interest: value + agg: + value: cpu + agg_type: avg + + - name: etcdCPU + metricName : containerCPU + labels.namespace: openshift-etcd + metric_of_interest: value + agg: + value: cpu + agg_type: avg - - metric: etcdCPU - metricType: cpu - namespace: openshift-ovn-kubernetes + - name: etcdDisck + metricName : 99thEtcdDiskBackendCommitDurationSeconds + metric_of_interest: value + agg: + value: duration + agg_type: avg ``` diff --git a/orion.py b/orion.py index 1d455b0..891d0bd 100644 --- a/orion.py +++ b/orion.py @@ -19,7 +19,6 @@ def cli(): cli function to group commands """ - # pylint: disable=too-many-locals @click.command() @click.option("--config", default="config.yaml", help="Path to the configuration file") @@ -60,46 +59,19 @@ def orion(config, debug, output): print("No UUID present for given metadata") sys.exit() - runs = match.match_kube_burner(uuids) - ids = match.filter_runs(runs, runs) + if metadata["benchmark"] == "k8s-netperf" : + index = "k8s-netperf" + ids = uuids + elif metadata["benchmark"] == "ingress-perf" : + index = "ingress-performance" + ids = uuids + else: + index = "ripsaw-kube-burner" + runs = match.match_kube_burner(uuids) + ids = match.filter_runs(runs, runs) + metrics = test["metrics"] - dataframe_list = [] - - for metric in metrics: - logger.info("Collecting %s", metric["metric"]) - if metric["metricType"] == "latency": - if metric["metric"] == "podReadyLatency": - try: - podl = match.burner_results("", ids, "ripsaw-kube-burner*") - podl_df = match.convert_to_df( - podl, columns=["uuid", "timestamp", "P99"] - ) - dataframe_list.append(podl_df) - logger.debug(podl_df) - except Exception as e: # pylint: disable=broad-exception-caught - logger.error( - "The namespace %s does not exist, exception %s", - metric["namespace"], - e, - ) - - elif metric["metricType"] == "cpu": - try: - cpu = match.burner_cpu_results( - ids, metric["namespace"], "ripsaw-kube-burner*" - ) - cpu_df = match.convert_to_df(cpu, columns=["uuid", "cpu_avg"]) - cpu_df = cpu_df.rename( - columns={"cpu_avg": metric["metric"] + "_cpu_avg"} - ) - dataframe_list.append(cpu_df) - logger.debug(cpu_df) - except Exception as e: # pylint: disable=broad-exception-caught - logger.error( - "The namespace %s does not exist, exception %s", - metric["namespace"], - e, - ) + dataframe_list = get_metric_data(ids, index, metrics, match, logger) merged_df = reduce( lambda left, right: pd.merge(left, right, on="uuid", how="inner"), @@ -108,6 +80,62 @@ def orion(config, debug, output): match.save_results(merged_df, csv_file_path=output) +def get_metric_data(ids, index, metrics, match, logger): + """Gets details metrics basked on metric yaml list + + Args: + ids (list): list of all uuids + index (dict): index in es of where to find data + metrics (dict): metrics to gather data on + match (Matcher): current matcher instance + logger (logger): log data to one output + + Returns: + dataframe_list: dataframe of the all metrics + """ + dataframe_list = [] + for metric in metrics: + metric_name = metric['name'] + logger.info("Collecting %s", metric_name) + metric_of_interest = metric['metric_of_interest'] + + if "agg" in metric.keys(): + try: + cpu = match.get_agg_metric_query( + ids, index, metric + ) + agg_value = metric['agg']['value'] + agg_type = metric['agg']['agg_type'] + agg_name = agg_value + "_" + agg_type + cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name]) + cpu_df = cpu_df.rename( + columns={agg_name: metric_name+ "_" + agg_name} + ) + dataframe_list.append(cpu_df) + logger.debug(cpu_df) + + except Exception as e: # pylint: disable=broad-exception-caught + logger.error( + "Couldn't get agg metrics %s, exception %s", + metric_name, + e, + ) + else: + try: + podl = match.getResults("", ids, index, metric) + podl_df = match.convert_to_df( + podl, columns=["uuid", "timestamp", metric_of_interest] + ) + dataframe_list.append(podl_df) + logger.debug(podl_df) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error( + "Couldn't get metrics %s, exception %s", + metric_name, + e, + ) + return dataframe_list + def get_metadata(test,logger): """Gets metadata of the run from each test diff --git a/requirements.txt b/requirements.txt index e3f3ac3..218fe88 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ click==8.1.7 elastic-transport==8.11.0 elasticsearch==8.11.1 elasticsearch7==7.13.0 -fmatch==0.0.3 +fmatch==0.0.4 numpy==1.26.3 pandas==2.1.4 python-dateutil==2.8.2