From 8ac467fc60a783889470ede351cbd6ea4acbd331 Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Mon, 18 Mar 2024 16:17:32 -0400 Subject: [PATCH 01/12] daemon mode, documents update, refactor, multiple test support Signed-off-by: Shashank Reddy Boyapally --- README.md | 48 ++++++- orion.py | 135 ++++++------------- {utils => pkg}/__init__.py | 0 pkg/daemon.py | 35 +++++ pkg/logrus.py | 42 ++++++ pkg/runTest.py | 37 ++++++ pkg/utils.py | 262 +++++++++++++++++++++++++++++++++++++ requirements.txt | 4 +- setup.py | 4 +- 9 files changed, 466 insertions(+), 101 deletions(-) rename {utils => pkg}/__init__.py (100%) create mode 100644 pkg/daemon.py create mode 100644 pkg/logrus.py create mode 100644 pkg/runTest.py create mode 100644 pkg/utils.py diff --git a/README.md b/README.md index 4b47ff5..112a99e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Orion - CLI tool to find regressions -Orion stands as a powerful command-line tool designed for identifying regressions within perf-scale CPT runs, leveraging metadata provided during the process. The detection mechanism relies on [hunter](https://github.com/datastax-labs/hunter). +Orion stands as a powerful command-line tool/Daemon designed for identifying regressions within perf-scale CPT runs, leveraging metadata provided during the process. The detection mechanism relies on [hunter](https://github.com/datastax-labs/hunter). Below is an illustrative example of the config and metadata that Orion can handle: @@ -78,9 +78,12 @@ Clone the current repository using git clone. >> pip install . ``` ## Run Orion -Executing Orion is as simple as building it. After following the build steps, run the following: +Executing Orion is as seamless as its building it. With the latest enhancements, Orion introduces a versatile command-line option and a Daemon mode, empowering users to select the mode that aligns perfectly with their requirements. + +### Command-line mode +Running Orion in command-line Mode is straightforward. Simply follow these instructions: ``` ->> orion +>> orion cmd-mode ``` At the moment, @@ -92,6 +95,45 @@ Activate Orion's regression detection tool for performance-scale CPT runs effort Additionally, users can specify a custom path for the output CSV file using the ```--output``` flag, providing control over the location where the generated CSV will be stored. +### Daemon mode +The core purpose of Daemon mode is to operate Orion as a self-contained server, dedicated to handling incoming requests. By sending a POST request accompanied by a configuration file, users can trigger change point detection on the provided metadata and metrics. Following the processing, the response is formatted in JSON, providing a structured output for seamless integration and analysis. To trigger daemon mode just use the following commands + +``` +>> orion daemon-mode +``` + +Below is a sample output structure: the top level of the JSON contains the test name, while within each test, runs are organized into arrays. Each run includes succinct metadata alongside corresponding metrics for comprehensive analysis. +``` +{ + "aws-small-scale-cluster-density-v2": [ + { + "uuid": "4cb3efec-609a-4ac5-985d-4cbbcbb11625", + "timestamp": 1704889895, + "metrics": { + "etcdCPU_cpu_avg": { + "value": 8.7663162253, + "percentage_change": 0 + }, + "ovnCPU_cpu_avg": { + "value": 2.8503958847, + "percentage_change": 0 + }, + "P99": { + "value": 13000, + "percentage_change": 0 + }, + "apiserverCPU_cpu_avg": { + "value": 10.2344511574, + "percentage_change": 0 + } + }, + "buildUrl": "https://prow.ci.openshift.org/view/gs/origin-ci-test/logs/periodic-ci-openshift-qe-ocp-qe-perfscale-ci-main-aws-4.16-nightly-x86-control-plane-24nodes/1745037917119582208", + "is_changepoint": false + }, + ] +} +``` + Orion's seamless integration with metadata and hunter ensures a robust regression detection tool for perf-scale CPT runs. diff --git a/orion.py b/orion.py index 064046e..f34b5d8 100644 --- a/orion.py +++ b/orion.py @@ -3,47 +3,44 @@ """ # pylint: disable = import-error +import logging import sys import warnings -from functools import reduce -import logging -import os -import re -import pyshorteners import click -import pandas as pd +import uvicorn +from pkg.logrus import SingletonLogger +from pkg.runTest import run -from fmatch.matcher import Matcher -from utils import orion_funcs +logger_instance = SingletonLogger(debug=False).logger warnings.filterwarnings("ignore", message="Unverified HTTPS request.*") @click.group() -# pylint: disable=unused-argument -def cli(max_content_width=120): +def cli(max_content_width=120): # pylint: disable=unused-argument """ cli function to group commands """ -# pylint: disable=too-many-locals, too-many-statements -@click.command() -@click.option("--uuid", default="", help="UUID to use as base for comparisons") -@click.option("--baseline", default="", help="Baseline UUID(s) to to compare against uuid") +# pylint: disable=too-many-locals +@cli.command(name="cmd-mode") @click.option("--config", default="config.yaml", help="Path to the configuration file") -@click.option("--output-path", default="output.csv", help="Path to save the output csv file") -@click.option("--debug", is_flag=True, help="log level ") -@click.option("--hunter-analyze",is_flag=True, help="run hunter analyze") +@click.option( + "--output-path", default="output.csv", help="Path to save the output csv file" +) +@click.option("--debug", default=False, is_flag=True, help="log level") +@click.option("--hunter-analyze", default=True, is_flag=True, help="run hunter analyze") @click.option( "-o", - "--output", + "--output-format", type=click.Choice(["json", "text"]), default="text", help="Choose output format (json or text)", ) -def orion(**kwargs): - """Orion is the cli tool to detect regressions over the runs +def cmd_analysis(config, debug, output_path, hunter_analyze, output_format): + """ + Orion runs on command line mode, and helps in detecting regressions \b Args: @@ -54,84 +51,32 @@ def orion(**kwargs): output (str): path to the output csv file hunter_analyze (bool): turns on hunter analysis of gathered uuid(s) data """ - - level = logging.DEBUG if kwargs["debug"] else logging.INFO - logger = logging.getLogger("Orion") - logger = orion_funcs.set_logging(level, logger) - data = orion_funcs.load_config(kwargs["config"],logger) - ES_URL=None - - if "ES_SERVER" in data.keys(): - ES_URL = data["ES_SERVER"] - else: - if "ES_SERVER" in os.environ: - ES_URL = os.environ.get("ES_SERVER") - else: - logger.error("ES_SERVER environment variable/config variable not set") - sys.exit(1) - shortener = pyshorteners.Shortener() - for test in data["tests"]: - benchmarkIndex=test['benchmarkIndex'] - uuid = kwargs["uuid"] - baseline = kwargs["baseline"] - fingerprint_index = test["index"] - match = Matcher(index=fingerprint_index, - level=level, ES_URL=ES_URL, verify_certs=False) - if uuid == "": - metadata = orion_funcs.get_metadata(test, logger) - else: - metadata = orion_funcs.filter_metadata(uuid,match,logger) - - logger.info("The test %s has started", test["name"]) - if baseline == "": - runs = match.get_uuid_by_metadata(metadata) - uuids = [run["uuid"] for run in runs] - buildUrls = {run["uuid"]: run["buildUrl"] for run in runs} - if len(uuids) == 0: - logging.info("No UUID present for given metadata") - sys.exit() - else: - uuids = [uuid for uuid in re.split(' |,',baseline) if uuid] - uuids.append(uuid) - buildUrls = orion_funcs.get_build_urls(fingerprint_index, uuids,match) - - fingerprint_index=benchmarkIndex - if metadata["benchmark.keyword"] in ["ingress-perf","k8s-netperf"] : - ids = uuids - else: - if baseline == "": - runs = match.match_kube_burner(uuids, fingerprint_index) - ids = match.filter_runs(runs, runs) - else: - ids = uuids - metrics = test["metrics"] - dataframe_list = orion_funcs.get_metric_data(ids, fingerprint_index, metrics, match, logger) - - for i, df in enumerate(dataframe_list): - if i != 0 and ('timestamp' in df.columns): - dataframe_list[i] = df.drop(columns=['timestamp']) - - merged_df = reduce( - lambda left, right: pd.merge(left, right, on="uuid", how="inner"), - dataframe_list, - ) - - shortener = pyshorteners.Shortener() - merged_df["buildUrl"] = merged_df["uuid"].apply( - lambda uuid: shortener.tinyurl.short(buildUrls[uuid])) #pylint: disable = cell-var-from-loop - csv_name = kwargs["output_path"].split(".")[0]+"-"+test['name']+".csv" - match.save_results( - merged_df, csv_file_path=csv_name - ) - - if kwargs["hunter_analyze"]: - orion_funcs.run_hunter_analyze(merged_df,test,kwargs["output"]) + level = logging.DEBUG if debug else logging.INFO + logger_instance.setLevel(level) + logger_instance.info("🏹 Starting Orion in command-line mode") + output = run(config, output_path, hunter_analyze, output_format) + for test_name, result_table in output.items(): + print(test_name) + print("-"*len(test_name)) + print(result_table) + +@cli.command(name="daemon-mode") +@click.option("--debug", default=False, is_flag=True, help="log level") +def rundaemon(debug): + """ + Orion runs on daemon mode on port 8000 + \b + """ + level = logging.DEBUG if debug else logging.INFO + logger_instance.setLevel(level) + logger_instance.info("🏹 Starting Orion in Daemon mode") + uvicorn.run("pkg.daemon:app", port=8000) if __name__ == "__main__": if len(sys.argv) <= 1: - cli.main(['--help']) + cli.main(["--help"]) else: - print(len(sys.argv)) - cli.add_command(orion) + cli.add_command(cmd_analysis) + cli.add_command(rundaemon) cli() diff --git a/utils/__init__.py b/pkg/__init__.py similarity index 100% rename from utils/__init__.py rename to pkg/__init__.py diff --git a/pkg/daemon.py b/pkg/daemon.py new file mode 100644 index 0000000..d471066 --- /dev/null +++ b/pkg/daemon.py @@ -0,0 +1,35 @@ +""" +Module to run orion in daemon mode +""" +import shutil +import os + +from fastapi import FastAPI, File, UploadFile +from pkg.logrus import SingletonLogger + +from . import runTest + +app = FastAPI() +logger_instance = SingletonLogger(debug=False).logger + + +@app.post("/daemon") +async def daemon(file: UploadFile = File(...)): + """starts listening on port 8000 on url /daemon + + Args: + file (UploadFile, optional): config file for the test. Defaults to File(...). + + Returns: + json: json object of the changepoints and metrics + """ + file_name, file_extension = os.path.splitext(file.filename) + new_file_name = f"{file_name}_copy{file_extension}" + with open(new_file_name, "wb") as buffer: + shutil.copyfileobj(file.file, buffer) + result = runTest.run(new_file_name, "output.csv", True, "json") + try: + os.remove(new_file_name) + except OSError as e: + logger_instance.error("error %s", e.strerror) + return result diff --git a/pkg/logrus.py b/pkg/logrus.py new file mode 100644 index 0000000..2f300fb --- /dev/null +++ b/pkg/logrus.py @@ -0,0 +1,42 @@ +""" +Logger for orion +""" +import logging +import sys + +class SingletonLogger: + """Singleton logger to set logging at one single place + + Returns: + _type_: _description_ + """ + _instance = None + + def __new__(cls, debug=False): + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._logger = cls._initialize_logger(debug) + return cls._instance + + @staticmethod + def _initialize_logger(debug): + level = logging.DEBUG if debug else logging.INFO + logger = logging.getLogger("Orion") + logger.setLevel(level) + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(level) + formatter = logging.Formatter( + "%(asctime)s - %(filename)s-%(lineno)d - %(name)s - %(levelname)s - %(message)s" + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + return logger + + @property + def logger(self): + """property to return logger, getter method + + Returns: + _type_: _description_ + """ + return self._logger # pylint: disable = no-member diff --git a/pkg/runTest.py b/pkg/runTest.py new file mode 100644 index 0000000..98ba007 --- /dev/null +++ b/pkg/runTest.py @@ -0,0 +1,37 @@ +""" +run test +""" +from fmatch.matcher import Matcher +from pkg.logrus import SingletonLogger +from pkg.utils import ( + run_hunter_analyze, + load_config, + get_es_url, + process_test +) + +logger_instance= SingletonLogger().logger + +def run(config, output_path, hunter_analyze,output_format): + """run method to start the tests + + Args: + config (_type_): file path to config file + debug (_type_): debug to be true or false + output_path (_type_): output path to save the data + hunter_analyze (_type_): changepoint detection through hunter. defaults to True + output_format (_type_): output to be table or json + + Returns: + _type_: _description_ + """ + data = load_config(config, logger_instance) + ES_URL = get_es_url(data,logger=logger_instance) + result_output = {} + for test in data["tests"]: + match = Matcher(index="perf_scale_ci",level=logger_instance.level, ES_URL=ES_URL) + result = process_test(test, match, logger_instance, output_path) + if hunter_analyze: + testname,result_data=run_hunter_analyze(result, test,output=output_format,matcher=match) + result_output[testname]=result_data + return result_output diff --git a/pkg/utils.py b/pkg/utils.py new file mode 100644 index 0000000..9169b93 --- /dev/null +++ b/pkg/utils.py @@ -0,0 +1,262 @@ +# pylint: disable=cyclic-import +""" +module for all utility functions orion uses +""" +# pylint: disable = import-error + +from functools import reduce +import json +import os +import sys + +import yaml +import pandas as pd + +from hunter.report import Report, ReportType +from hunter.series import Metric, Series + + +def run_hunter_analyze(merged_df, test, output, matcher): + """Start hunter analyze function + + Args: + merged_df (Dataframe): merged dataframe of all the metrics + test (dict): test dictionary with the each test information + """ + merged_df["timestamp"] = pd.to_datetime(merged_df["timestamp"]) + merged_df["timestamp"] = merged_df["timestamp"].astype(int) // 10**9 + metrics = { + column: Metric(1, 1.0) + for column in merged_df.columns + if column not in ["uuid", "timestamp"] + } + data = { + column: merged_df[column] + for column in merged_df.columns + if column not in ["uuid", "timestamp"] + } + attributes = { + column: merged_df[column] for column in merged_df.columns if column in ["uuid"] + } + series = Series( + test_name=test["name"], + branch=None, + time=list(merged_df["timestamp"]), + metrics=metrics, + data=data, + attributes=attributes, + ) + change_points = series.analyze().change_points_by_time + report = Report(series, change_points) + if output == "text": + output_table = report.produce_report( + test_name=test["name"], report_type=ReportType.LOG + ) + return test["name"],output_table + + if output == "json": + change_points_by_metric = series.analyze().change_points + output_json = parse_json_output(merged_df, change_points_by_metric,matcher=matcher) + return test["name"], output_json + return None + + +def parse_json_output(merged_df, change_points_by_metric,matcher): + """json output generator function + + Args: + merged_df (pd.Dataframe): the dataframe to be converted to json + change_points_by_metric (_type_): different change point + + Returns: + _type_: _description_ + """ + + df_json = merged_df.to_json(orient="records") + df_json = json.loads(df_json) + + for index, entry in enumerate(df_json): + entry["metrics"] = { + key: {"value": entry.pop(key), "percentage_change": 0} + for key in entry.keys() - {"uuid", "timestamp"} + } + entry["buildUrl"] = matcher.get_metadata_by_uuid(entry.get("uuid")).get( + "buildUrl" + ) + entry["is_changepoint"] = False + + for key in change_points_by_metric.keys(): + for change_point in change_points_by_metric[key]: + index = change_point.index + percentage_change = ( + (change_point.stats.mean_2 - change_point.stats.mean_1) + / change_point.stats.mean_1 + ) * 100 + df_json[index]["metrics"][key]["percentage_change"] = percentage_change + df_json[index]["is_changepoint"] = True + + return df_json + + +# pylint: disable=too-many-locals +def get_metric_data(ids, index, metrics, match, logger): + """Gets details metrics basked on metric yaml list + + Args: + ids (list): list of all uuids + index (dict): index in es of where to find data + metrics (dict): metrics to gather data on + match (Matcher): current matcher instance + logger (logger): log data to one output + + Returns: + dataframe_list: dataframe of the all metrics + """ + dataframe_list = [] + for metric in metrics: + metric_name = metric["name"] + logger.info("Collecting %s", metric_name) + metric_of_interest = metric["metric_of_interest"] + + if "agg" in metric.keys(): + try: + cpu = match.get_agg_metric_query(ids, index, metric) + agg_value = metric["agg"]["value"] + agg_type = metric["agg"]["agg_type"] + agg_name = agg_value + "_" + agg_type + cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name]) + cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_name}) + dataframe_list.append(cpu_df) + logger.debug(cpu_df) + + except Exception as e: # pylint: disable=broad-exception-caught + logger.error( + "Couldn't get agg metrics %s, exception %s", + metric_name, + e, + ) + else: + try: + podl = match.getResults("", ids, index, metric) + podl_df = match.convert_to_df( + podl, columns=["uuid", "timestamp", metric_of_interest] + ) + dataframe_list.append(podl_df) + logger.debug(podl_df) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error( + "Couldn't get metrics %s, exception %s", + metric_name, + e, + ) + return dataframe_list + + +def get_metadata(test, logger): + """Gets metadata of the run from each test + + Args: + test (dict): test dictionary + + Returns: + dict: dictionary of the metadata + """ + metadata = test["metadata"] + metadata["ocpVersion"] = str(metadata["ocpVersion"]) + logger.debug("metadata" + str(metadata)) + return metadata + + +def load_config(config, logger): + """Loads config file + + Args: + config (str): path to config file + logger (Logger): logger + + Returns: + dict: dictionary of the config file + """ + try: + with open(config, "r", encoding="utf-8") as file: + data = yaml.safe_load(file) + logger.debug("The %s file has successfully loaded", config) + except FileNotFoundError as e: + logger.error("Config file not found: %s", e) + sys.exit(1) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error("An error occurred: %s", e) + sys.exit(1) + return data + + +def get_es_url(data, logger): + """Gets es url from config or env + + Args: + data (_type_): config file data + logger (_type_): logger + + Returns: + str: es url + """ + if "ES_SERVER" in data.keys(): + return data["ES_SERVER"] + if "ES_SERVER" in os.environ: + return os.environ.get("ES_SERVER") + logger.error("ES_SERVER environment variable/config variable not set") + sys.exit(1) + + +def get_index_and_ids(metadata, uuids, match): + """returns the index to be used and runs as uuids + + Args: + metadata (_type_): metadata from config + uuids (_type_): uuids collected + match (_type_): Matcher object + + Returns: + _type_: index and uuids + """ + if metadata["benchmark.keyword"] == "k8s-netperf": + return "k8s-netperf", uuids + if metadata["benchmark.keyword"] == "ingress-perf": + return "ingress-performance", uuids + index = "ripsaw-kube-burner" + runs = match.match_kube_burner(uuids) + return index, match.filter_runs(runs, runs) + + +def process_test(test, match, logger, output): + """generate the dataframe for the test given + + Args: + test (_type_): test from process test + match (_type_): matcher object + logger (_type_): logger object + output (_type_): output file name + + Returns: + _type_: merged dataframe + """ + metadata = get_metadata(test, logger) + logger.info("The test %s has started", test["name"]) + uuids = match.get_uuid_by_metadata(metadata) + if len(uuids) == 0: + print("No UUID present for given metadata") + sys.exit() + + index, ids = get_index_and_ids(metadata, uuids, match) + + metrics = test["metrics"] + dataframe_list = get_metric_data(ids, index, metrics, match, logger) + + merged_df = reduce( + lambda left, right: pd.merge(left, right, on="uuid", how="inner"), + dataframe_list, + ) + + output_file_path = output.split(".")[0] + "-" + test["name"] + ".csv" + match.save_results(merged_df, csv_file_path=output_file_path) + return merged_df diff --git a/requirements.txt b/requirements.txt index d3224dd..acc1754 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,6 @@ PyYAML==6.0.1 six==1.16.0 tzdata==2023.4 urllib3==1.26.18 -pyshorteners==1.0.1 \ No newline at end of file +pyshorteners==1.0.1 +fastapi==0.110.0 +python-multipart==0.0.9 diff --git a/setup.py b/setup.py index 52fdafc..50689d7 100644 --- a/setup.py +++ b/setup.py @@ -14,11 +14,11 @@ ], entry_points={ 'console_scripts': [ - 'orion = orion:orion', + 'orion = orion:cli', ], }, packages=find_packages(), - package_data={'utils': ['utils.py'],'hunter': ['*.py']}, + package_data={'pkg': ['utils.py',"runTest.py","daemon.py","logrus.py"],'hunter': ['*.py']}, classifiers=[ 'Programming Language :: Python :: 3', 'License :: OSI Approved :: MIT License', From cf7d4ef3849294ff3503c4ff433c51c313830708 Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Wed, 20 Mar 2024 23:41:32 -0400 Subject: [PATCH 02/12] fixed logging and added uuid comparison Signed-off-by: Shashank Reddy Boyapally --- orion.py | 15 ++++---- pkg/daemon.py | 15 ++++++-- pkg/logrus.py | 11 +++--- pkg/runTest.py | 34 +++++++++--------- pkg/utils.py | 98 +++++++++++++++++++++++++++++++++++++++++--------- 5 files changed, 127 insertions(+), 46 deletions(-) diff --git a/orion.py b/orion.py index f34b5d8..132747f 100644 --- a/orion.py +++ b/orion.py @@ -12,7 +12,6 @@ from pkg.logrus import SingletonLogger from pkg.runTest import run -logger_instance = SingletonLogger(debug=False).logger warnings.filterwarnings("ignore", message="Unverified HTTPS request.*") @@ -38,7 +37,9 @@ def cli(max_content_width=120): # pylint: disable=unused-argument default="text", help="Choose output format (json or text)", ) -def cmd_analysis(config, debug, output_path, hunter_analyze, output_format): +@click.option("--uuid", default="", help="UUID to use as base for comparisons") +@click.option("--baseline", default="", help="Baseline UUID(s) to to compare against uuid") +def cmd_analysis(**kwargs): """ Orion runs on command line mode, and helps in detecting regressions @@ -51,13 +52,13 @@ def cmd_analysis(config, debug, output_path, hunter_analyze, output_format): output (str): path to the output csv file hunter_analyze (bool): turns on hunter analysis of gathered uuid(s) data """ - level = logging.DEBUG if debug else logging.INFO - logger_instance.setLevel(level) + level = logging.DEBUG if kwargs['debug'] else logging.INFO + logger_instance = SingletonLogger(debug=level).logger logger_instance.info("🏹 Starting Orion in command-line mode") - output = run(config, output_path, hunter_analyze, output_format) + output = run(**kwargs) for test_name, result_table in output.items(): print(test_name) - print("-"*len(test_name)) + print("="*len(test_name)) print(result_table) @cli.command(name="daemon-mode") @@ -68,7 +69,7 @@ def rundaemon(debug): \b """ level = logging.DEBUG if debug else logging.INFO - logger_instance.setLevel(level) + logger_instance = SingletonLogger(debug=level).logger logger_instance.info("🏹 Starting Orion in Daemon mode") uvicorn.run("pkg.daemon:app", port=8000) diff --git a/pkg/daemon.py b/pkg/daemon.py index d471066..0c2ad4d 100644 --- a/pkg/daemon.py +++ b/pkg/daemon.py @@ -1,6 +1,7 @@ """ Module to run orion in daemon mode """ +import logging import shutil import os @@ -10,11 +11,11 @@ from . import runTest app = FastAPI() -logger_instance = SingletonLogger(debug=False).logger +logger_instance = SingletonLogger(debug=logging.INFO).logger @app.post("/daemon") -async def daemon(file: UploadFile = File(...)): +async def daemon(file: UploadFile = File(...), uuid: str = "", baseline: str = ""): """starts listening on port 8000 on url /daemon Args: @@ -27,7 +28,15 @@ async def daemon(file: UploadFile = File(...)): new_file_name = f"{file_name}_copy{file_extension}" with open(new_file_name, "wb") as buffer: shutil.copyfileobj(file.file, buffer) - result = runTest.run(new_file_name, "output.csv", True, "json") + argDict={ + 'config': new_file_name, + 'output_path': "output.csv", + 'hunter_analyze': True, + 'output_format': "json", + 'uuid':uuid, + 'baseline':baseline, + } + result = runTest.run(**argDict) try: os.remove(new_file_name) except OSError as e: diff --git a/pkg/logrus.py b/pkg/logrus.py index 2f300fb..d9c9539 100644 --- a/pkg/logrus.py +++ b/pkg/logrus.py @@ -1,18 +1,21 @@ """ Logger for orion """ + import logging import sys + class SingletonLogger: """Singleton logger to set logging at one single place Returns: _type_: _description_ """ + _instance = None - def __new__(cls, debug=False): + def __new__(cls, debug): if cls._instance is None: cls._instance = super().__new__(cls) cls._instance._logger = cls._initialize_logger(debug) @@ -20,13 +23,13 @@ def __new__(cls, debug=False): @staticmethod def _initialize_logger(debug): - level = logging.DEBUG if debug else logging.INFO + level = debug # if debug else logging.INFO logger = logging.getLogger("Orion") logger.setLevel(level) handler = logging.StreamHandler(sys.stdout) handler.setLevel(level) formatter = logging.Formatter( - "%(asctime)s - %(filename)s-%(lineno)d - %(name)s - %(levelname)s - %(message)s" + "%(asctime)s - %(name)s - %(levelname)s - file: %(filename)s - line: %(lineno)d - %(message)s" # pylint: disable = line-too-long ) handler.setFormatter(formatter) logger.addHandler(handler) @@ -39,4 +42,4 @@ def logger(self): Returns: _type_: _description_ """ - return self._logger # pylint: disable = no-member + return self._logger # pylint: disable = no-member diff --git a/pkg/runTest.py b/pkg/runTest.py index 98ba007..c355f59 100644 --- a/pkg/runTest.py +++ b/pkg/runTest.py @@ -1,23 +1,18 @@ """ run test """ + +import logging from fmatch.matcher import Matcher from pkg.logrus import SingletonLogger -from pkg.utils import ( - run_hunter_analyze, - load_config, - get_es_url, - process_test -) +from pkg.utils import run_hunter_analyze, load_config, get_es_url, process_test -logger_instance= SingletonLogger().logger -def run(config, output_path, hunter_analyze,output_format): +def run(**kwargs): """run method to start the tests Args: config (_type_): file path to config file - debug (_type_): debug to be true or false output_path (_type_): output path to save the data hunter_analyze (_type_): changepoint detection through hunter. defaults to True output_format (_type_): output to be table or json @@ -25,13 +20,20 @@ def run(config, output_path, hunter_analyze,output_format): Returns: _type_: _description_ """ - data = load_config(config, logger_instance) - ES_URL = get_es_url(data,logger=logger_instance) + logger_instance = SingletonLogger(debug=logging.INFO).logger + data = load_config(kwargs["config"]) + ES_URL = get_es_url(data) result_output = {} for test in data["tests"]: - match = Matcher(index="perf_scale_ci",level=logger_instance.level, ES_URL=ES_URL) - result = process_test(test, match, logger_instance, output_path) - if hunter_analyze: - testname,result_data=run_hunter_analyze(result, test,output=output_format,matcher=match) - result_output[testname]=result_data + match = Matcher( + index="perf_scale_ci", level=logger_instance.level, ES_URL=ES_URL + ) + result = process_test( + test, match, kwargs["output_path"], kwargs["uuid"], kwargs["baseline"] + ) + if kwargs["hunter_analyze"]: + testname, result_data = run_hunter_analyze( + result, test, output=kwargs["output_format"], matcher=match + ) + result_output[testname] = result_data return result_output diff --git a/pkg/utils.py b/pkg/utils.py index 9169b93..778bb36 100644 --- a/pkg/utils.py +++ b/pkg/utils.py @@ -6,7 +6,9 @@ from functools import reduce import json +import logging import os +import re import sys import yaml @@ -15,6 +17,11 @@ from hunter.report import Report, ReportType from hunter.series import Metric, Series +from pkg.logrus import SingletonLogger + + + + def run_hunter_analyze(merged_df, test, output, matcher): """Start hunter analyze function @@ -71,7 +78,6 @@ def parse_json_output(merged_df, change_points_by_metric,matcher): Returns: _type_: _description_ """ - df_json = merged_df.to_json(orient="records") df_json = json.loads(df_json) @@ -99,7 +105,7 @@ def parse_json_output(merged_df, change_points_by_metric,matcher): # pylint: disable=too-many-locals -def get_metric_data(ids, index, metrics, match, logger): +def get_metric_data(ids, index, metrics, match): """Gets details metrics basked on metric yaml list Args: @@ -112,6 +118,7 @@ def get_metric_data(ids, index, metrics, match, logger): Returns: dataframe_list: dataframe of the all metrics """ + logger= SingletonLogger(debug=logging.INFO).logger dataframe_list = [] for metric in metrics: metric_name = metric["name"] @@ -152,7 +159,7 @@ def get_metric_data(ids, index, metrics, match, logger): return dataframe_list -def get_metadata(test, logger): +def get_metadata(test): """Gets metadata of the run from each test Args: @@ -161,13 +168,14 @@ def get_metadata(test, logger): Returns: dict: dictionary of the metadata """ + logger= SingletonLogger(debug=logging.INFO).logger metadata = test["metadata"] metadata["ocpVersion"] = str(metadata["ocpVersion"]) logger.debug("metadata" + str(metadata)) return metadata -def load_config(config, logger): +def load_config(config): """Loads config file Args: @@ -177,6 +185,7 @@ def load_config(config, logger): Returns: dict: dictionary of the config file """ + logger= SingletonLogger(debug=logging.INFO).logger try: with open(config, "r", encoding="utf-8") as file: data = yaml.safe_load(file) @@ -190,7 +199,7 @@ def load_config(config, logger): return data -def get_es_url(data, logger): +def get_es_url(data): """Gets es url from config or env Args: @@ -200,6 +209,7 @@ def get_es_url(data, logger): Returns: str: es url """ + logger= SingletonLogger(debug=logging.INFO).logger if "ES_SERVER" in data.keys(): return data["ES_SERVER"] if "ES_SERVER" in os.environ: @@ -208,7 +218,7 @@ def get_es_url(data, logger): sys.exit(1) -def get_index_and_ids(metadata, uuids, match): +def get_index_and_ids(metadata, uuids, match, baseline): """returns the index to be used and runs as uuids Args: @@ -224,11 +234,15 @@ def get_index_and_ids(metadata, uuids, match): if metadata["benchmark.keyword"] == "ingress-perf": return "ingress-performance", uuids index = "ripsaw-kube-burner" - runs = match.match_kube_burner(uuids) - return index, match.filter_runs(runs, runs) + if baseline == "": + runs = match.match_kube_burner(uuids) + ids = match.filter_runs(runs, runs) + else: + ids = uuids + return index, ids -def process_test(test, match, logger, output): +def process_test(test, match, output, uuid, baseline): """generate the dataframe for the test given Args: @@ -240,17 +254,25 @@ def process_test(test, match, logger, output): Returns: _type_: merged dataframe """ - metadata = get_metadata(test, logger) + logger= SingletonLogger(debug=logging.INFO).logger + if uuid in ('', None): + metadata = get_metadata(test) + else: + metadata = filter_metadata(uuid,match) logger.info("The test %s has started", test["name"]) uuids = match.get_uuid_by_metadata(metadata) - if len(uuids) == 0: - print("No UUID present for given metadata") - sys.exit() - - index, ids = get_index_and_ids(metadata, uuids, match) + if baseline in ('', None): + uuids = match.get_uuid_by_metadata(metadata) + if len(uuids) == 0: + logger.error("No UUID present for given metadata") + sys.exit() + else: + uuids = re.split(' |,',baseline) + uuids.append(uuid) + index, ids = get_index_and_ids(metadata, uuids, match, baseline) metrics = test["metrics"] - dataframe_list = get_metric_data(ids, index, metrics, match, logger) + dataframe_list = get_metric_data(ids, index, metrics, match) merged_df = reduce( lambda left, right: pd.merge(left, right, on="uuid", how="inner"), @@ -260,3 +282,47 @@ def process_test(test, match, logger, output): output_file_path = output.split(".")[0] + "-" + test["name"] + ".csv" match.save_results(merged_df, csv_file_path=output_file_path) return merged_df + +def filter_metadata(uuid,match): + """Gets metadata of the run from each test + + Args: + uuid (str): str of uuid ot find metadata of + match: the fmatch instance + + + Returns: + dict: dictionary of the metadata + """ + logger= SingletonLogger(debug=logging.INFO).logger + test = match.get_metadata_by_uuid(uuid) + metadata = { + 'platform': '', + 'clusterType': '', + 'masterNodesCount': 0, + 'workerNodesCount': 0, + 'infraNodesCount': 0, + 'masterNodesType': '', + 'workerNodesType': '', + 'infraNodesType': '', + 'totalNodesCount': 0, + 'ocpVersion': '', + 'networkType': '', + 'ipsec': '', + 'fips': '', + 'encrypted': '', + 'publish': '', + 'computeArch': '', + 'controlPlaneArch': '' + } + for k,v in test.items(): + if k not in metadata: + continue + metadata[k] = v + metadata['benchmark.keyword'] = test['benchmark'] + metadata["ocpVersion"] = str(metadata["ocpVersion"]) + + #Remove any keys that have blank values + no_blank_meta = {k: v for k, v in metadata.items() if v} + logger.debug('No blank metadata dict: ' + str(no_blank_meta)) + return no_blank_meta From abf710fa94de81bd174606eabc1f038aa31c102c Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Thu, 21 Mar 2024 11:20:52 -0400 Subject: [PATCH 03/12] readme and requirements update Signed-off-by: Shashank Reddy Boyapally --- README.md | 25 +++++++++++++++++++++++++ pkg/runTest.py | 1 + pkg/utils.py | 38 +++++++++++++++++++------------------- requirements.txt | 1 + 4 files changed, 46 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 112a99e..66733fd 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,31 @@ The core purpose of Daemon mode is to operate Orion as a self-contained server, ``` >> orion daemon-mode ``` +**Querying a Request to the Daemon Service** + +To interact with the Daemon Service, you can send a POST request using `curl` with specific parameters. This allows you to submit a file along with additional information for processing. + +*Request URL* + +``` +POST http://127.0.0.1:8000/daemon +``` + +*Parameters* + +- uuid (optional): The uuid of the run you want to compare with similar runs. +- baseline (optional): The runs you want to compare. + +*Request Body* + +The request body should contain the file you want to submit for processing. Ensure that the file is in the proper format (e.g., YAML). + +Example +``` +curl -X POST 'http://127.0.0.1:8000/daemon?uuid=4cb3efec-609a-4ac5-985d-4cbbcbb11625' \ +--form 'file=@"/path/to/your/config.yaml"' +``` + Below is a sample output structure: the top level of the JSON contains the test name, while within each test, runs are organized into arrays. Each run includes succinct metadata alongside corresponding metrics for comprehensive analysis. ``` diff --git a/pkg/runTest.py b/pkg/runTest.py index c355f59..674a498 100644 --- a/pkg/runTest.py +++ b/pkg/runTest.py @@ -36,4 +36,5 @@ def run(**kwargs): result, test, output=kwargs["output_format"], matcher=match ) result_output[testname] = result_data + del match return result_output diff --git a/pkg/utils.py b/pkg/utils.py index 778bb36..8b1c216 100644 --- a/pkg/utils.py +++ b/pkg/utils.py @@ -118,11 +118,11 @@ def get_metric_data(ids, index, metrics, match): Returns: dataframe_list: dataframe of the all metrics """ - logger= SingletonLogger(debug=logging.INFO).logger + logger_instance= SingletonLogger(debug=logging.INFO).logger dataframe_list = [] for metric in metrics: metric_name = metric["name"] - logger.info("Collecting %s", metric_name) + logger_instance.info("Collecting %s", metric_name) metric_of_interest = metric["metric_of_interest"] if "agg" in metric.keys(): @@ -134,10 +134,10 @@ def get_metric_data(ids, index, metrics, match): cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name]) cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_name}) dataframe_list.append(cpu_df) - logger.debug(cpu_df) + logger_instance.debug(cpu_df) except Exception as e: # pylint: disable=broad-exception-caught - logger.error( + logger_instance.error( "Couldn't get agg metrics %s, exception %s", metric_name, e, @@ -149,9 +149,9 @@ def get_metric_data(ids, index, metrics, match): podl, columns=["uuid", "timestamp", metric_of_interest] ) dataframe_list.append(podl_df) - logger.debug(podl_df) + logger_instance.debug(podl_df) except Exception as e: # pylint: disable=broad-exception-caught - logger.error( + logger_instance.error( "Couldn't get metrics %s, exception %s", metric_name, e, @@ -168,10 +168,10 @@ def get_metadata(test): Returns: dict: dictionary of the metadata """ - logger= SingletonLogger(debug=logging.INFO).logger + logger_instance= SingletonLogger(debug=logging.INFO).logger metadata = test["metadata"] metadata["ocpVersion"] = str(metadata["ocpVersion"]) - logger.debug("metadata" + str(metadata)) + logger_instance.debug("metadata" + str(metadata)) return metadata @@ -185,16 +185,16 @@ def load_config(config): Returns: dict: dictionary of the config file """ - logger= SingletonLogger(debug=logging.INFO).logger + logger_instance= SingletonLogger(debug=logging.INFO).logger try: with open(config, "r", encoding="utf-8") as file: data = yaml.safe_load(file) - logger.debug("The %s file has successfully loaded", config) + logger_instance.debug("The %s file has successfully loaded", config) except FileNotFoundError as e: - logger.error("Config file not found: %s", e) + logger_instance.error("Config file not found: %s", e) sys.exit(1) except Exception as e: # pylint: disable=broad-exception-caught - logger.error("An error occurred: %s", e) + logger_instance.error("An error occurred: %s", e) sys.exit(1) return data @@ -209,12 +209,12 @@ def get_es_url(data): Returns: str: es url """ - logger= SingletonLogger(debug=logging.INFO).logger + logger_instance= SingletonLogger(debug=logging.INFO).logger if "ES_SERVER" in data.keys(): return data["ES_SERVER"] if "ES_SERVER" in os.environ: return os.environ.get("ES_SERVER") - logger.error("ES_SERVER environment variable/config variable not set") + logger_instance.error("ES_SERVER environment variable/config variable not set") sys.exit(1) @@ -254,17 +254,17 @@ def process_test(test, match, output, uuid, baseline): Returns: _type_: merged dataframe """ - logger= SingletonLogger(debug=logging.INFO).logger + logger_instance= SingletonLogger(debug=logging.INFO).logger if uuid in ('', None): metadata = get_metadata(test) else: metadata = filter_metadata(uuid,match) - logger.info("The test %s has started", test["name"]) + logger_instance.info("The test %s has started", test["name"]) uuids = match.get_uuid_by_metadata(metadata) if baseline in ('', None): uuids = match.get_uuid_by_metadata(metadata) if len(uuids) == 0: - logger.error("No UUID present for given metadata") + logger_instance.error("No UUID present for given metadata") sys.exit() else: uuids = re.split(' |,',baseline) @@ -294,7 +294,7 @@ def filter_metadata(uuid,match): Returns: dict: dictionary of the metadata """ - logger= SingletonLogger(debug=logging.INFO).logger + logger_instance= SingletonLogger(debug=logging.INFO).logger test = match.get_metadata_by_uuid(uuid) metadata = { 'platform': '', @@ -324,5 +324,5 @@ def filter_metadata(uuid,match): #Remove any keys that have blank values no_blank_meta = {k: v for k, v in metadata.items() if v} - logger.debug('No blank metadata dict: ' + str(no_blank_meta)) + logger_instance.debug('No blank metadata dict: ' + str(no_blank_meta)) return no_blank_meta diff --git a/requirements.txt b/requirements.txt index acc1754..0eb2549 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ urllib3==1.26.18 pyshorteners==1.0.1 fastapi==0.110.0 python-multipart==0.0.9 +uvicorn==0.28.0 From 10d230071bf83c3d83ba428c51449e2606702cc3 Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Mon, 25 Mar 2024 14:19:29 -0400 Subject: [PATCH 04/12] readme and utils changes Signed-off-by: Shashank Reddy Boyapally --- README.md | 4 ++-- orion.py | 4 ++-- pkg/runTest.py | 1 - pkg/utils.py | 9 +++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 66733fd..c44e22e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Orion - CLI tool to find regressions -Orion stands as a powerful command-line tool/Daemon designed for identifying regressions within perf-scale CPT runs, leveraging metadata provided during the process. The detection mechanism relies on [hunter](https://github.com/datastax-labs/hunter). +Orion stands as a powerful command-line tool/daemon designed for identifying regressions within perf-scale CPT runs, leveraging metadata provided during the process. The detection mechanism relies on [hunter](https://github.com/datastax-labs/hunter). Below is an illustrative example of the config and metadata that Orion can handle: @@ -114,7 +114,7 @@ POST http://127.0.0.1:8000/daemon *Parameters* - uuid (optional): The uuid of the run you want to compare with similar runs. -- baseline (optional): The runs you want to compare. +- baseline (optional): The runs you want to compare with. *Request Body* diff --git a/orion.py b/orion.py index 132747f..684eea7 100644 --- a/orion.py +++ b/orion.py @@ -23,7 +23,7 @@ def cli(max_content_width=120): # pylint: disable=unused-argument # pylint: disable=too-many-locals -@cli.command(name="cmd-mode") +@cli.command(name="cmd") @click.option("--config", default="config.yaml", help="Path to the configuration file") @click.option( "--output-path", default="output.csv", help="Path to save the output csv file" @@ -61,7 +61,7 @@ def cmd_analysis(**kwargs): print("="*len(test_name)) print(result_table) -@cli.command(name="daemon-mode") +@cli.command(name="daemon") @click.option("--debug", default=False, is_flag=True, help="log level") def rundaemon(debug): """ diff --git a/pkg/runTest.py b/pkg/runTest.py index 674a498..c355f59 100644 --- a/pkg/runTest.py +++ b/pkg/runTest.py @@ -36,5 +36,4 @@ def run(**kwargs): result, test, output=kwargs["output_format"], matcher=match ) result_output[testname] = result_data - del match return result_output diff --git a/pkg/utils.py b/pkg/utils.py index 8b1c216..07f884e 100644 --- a/pkg/utils.py +++ b/pkg/utils.py @@ -229,10 +229,11 @@ def get_index_and_ids(metadata, uuids, match, baseline): Returns: _type_: index and uuids """ - if metadata["benchmark.keyword"] == "k8s-netperf": - return "k8s-netperf", uuids - if metadata["benchmark.keyword"] == "ingress-perf": - return "ingress-performance", uuids + index_map={"k8s-netperf":"k8s-netperf", + "ingress-perf":"ingress-performance", + } + if metadata["benchmark.keyword"] in index_map.keys(): + return index_map[metadata["benchmark.keyword"]], uuids index = "ripsaw-kube-burner" if baseline == "": runs = match.match_kube_burner(uuids) From 3d3c72998c248e3f6a8f62b254185b9c55618246 Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Mon, 25 Mar 2024 14:22:50 -0400 Subject: [PATCH 05/12] fixed pylint error Signed-off-by: Shashank Reddy Boyapally --- pkg/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/utils.py b/pkg/utils.py index 07f884e..b38054c 100644 --- a/pkg/utils.py +++ b/pkg/utils.py @@ -232,7 +232,7 @@ def get_index_and_ids(metadata, uuids, match, baseline): index_map={"k8s-netperf":"k8s-netperf", "ingress-perf":"ingress-performance", } - if metadata["benchmark.keyword"] in index_map.keys(): + if metadata["benchmark.keyword"] in index_map: return index_map[metadata["benchmark.keyword"]], uuids index = "ripsaw-kube-burner" if baseline == "": From 994b7ff0372422e87a84c9660efc0107870de726 Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Mon, 25 Mar 2024 23:49:27 -0400 Subject: [PATCH 06/12] changed accordingly to fmatch 0.0.6, and filtering changepoints Signed-off-by: Shashank Reddy Boyapally --- pkg/daemon.py | 28 ++++++++++++++++++++-------- pkg/runTest.py | 2 +- pkg/utils.py | 27 +++++++++++++++------------ 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/pkg/daemon.py b/pkg/daemon.py index 0c2ad4d..c4d80cb 100644 --- a/pkg/daemon.py +++ b/pkg/daemon.py @@ -1,6 +1,7 @@ """ Module to run orion in daemon mode """ + import logging import shutil import os @@ -15,7 +16,12 @@ @app.post("/daemon") -async def daemon(file: UploadFile = File(...), uuid: str = "", baseline: str = ""): +async def daemon( + file: UploadFile = File(...), + uuid: str = "", + baseline: str = "", + filter_changepoints="", +): """starts listening on port 8000 on url /daemon Args: @@ -28,15 +34,21 @@ async def daemon(file: UploadFile = File(...), uuid: str = "", baseline: str = " new_file_name = f"{file_name}_copy{file_extension}" with open(new_file_name, "wb") as buffer: shutil.copyfileobj(file.file, buffer) - argDict={ - 'config': new_file_name, - 'output_path': "output.csv", - 'hunter_analyze': True, - 'output_format': "json", - 'uuid':uuid, - 'baseline':baseline, + argDict = { + "config": new_file_name, + "output_path": "output.csv", + "hunter_analyze": True, + "output_format": "json", + "uuid": uuid, + "baseline": baseline, } + filter_changepoints = ( + True if filter_changepoints == "true" else False # pylint: disable = R1719 + ) result = runTest.run(**argDict) + if filter_changepoints: + for key, value in result.items(): + result[key] = list(filter(lambda x: x.get("is_changepoint", False), value)) try: os.remove(new_file_name) except OSError as e: diff --git a/pkg/runTest.py b/pkg/runTest.py index c355f59..661af6e 100644 --- a/pkg/runTest.py +++ b/pkg/runTest.py @@ -33,7 +33,7 @@ def run(**kwargs): ) if kwargs["hunter_analyze"]: testname, result_data = run_hunter_analyze( - result, test, output=kwargs["output_format"], matcher=match + result, test, output=kwargs["output_format"] ) result_output[testname] = result_data return result_output diff --git a/pkg/utils.py b/pkg/utils.py index b38054c..4db3163 100644 --- a/pkg/utils.py +++ b/pkg/utils.py @@ -16,6 +16,7 @@ from hunter.report import Report, ReportType from hunter.series import Metric, Series +import pyshorteners from pkg.logrus import SingletonLogger @@ -23,7 +24,7 @@ -def run_hunter_analyze(merged_df, test, output, matcher): +def run_hunter_analyze(merged_df, test, output): """Start hunter analyze function Args: @@ -35,15 +36,15 @@ def run_hunter_analyze(merged_df, test, output, matcher): metrics = { column: Metric(1, 1.0) for column in merged_df.columns - if column not in ["uuid", "timestamp"] + if column not in ["uuid","timestamp","buildUrl"] } data = { column: merged_df[column] for column in merged_df.columns - if column not in ["uuid", "timestamp"] + if column not in ["uuid","timestamp","buildUrl"] } attributes = { - column: merged_df[column] for column in merged_df.columns if column in ["uuid"] + column: merged_df[column] for column in merged_df.columns if column in ["uuid","buildUrl"] } series = Series( test_name=test["name"], @@ -63,12 +64,12 @@ def run_hunter_analyze(merged_df, test, output, matcher): if output == "json": change_points_by_metric = series.analyze().change_points - output_json = parse_json_output(merged_df, change_points_by_metric,matcher=matcher) + output_json = parse_json_output(merged_df, change_points_by_metric) return test["name"], output_json return None -def parse_json_output(merged_df, change_points_by_metric,matcher): +def parse_json_output(merged_df, change_points_by_metric): """json output generator function Args: @@ -84,11 +85,8 @@ def parse_json_output(merged_df, change_points_by_metric,matcher): for index, entry in enumerate(df_json): entry["metrics"] = { key: {"value": entry.pop(key), "percentage_change": 0} - for key in entry.keys() - {"uuid", "timestamp"} + for key in entry.keys() - {"uuid", "timestamp", "buildUrl"} } - entry["buildUrl"] = matcher.get_metadata_by_uuid(entry.get("uuid")).get( - "buildUrl" - ) entry["is_changepoint"] = False for key in change_points_by_metric.keys(): @@ -261,7 +259,9 @@ def process_test(test, match, output, uuid, baseline): else: metadata = filter_metadata(uuid,match) logger_instance.info("The test %s has started", test["name"]) - uuids = match.get_uuid_by_metadata(metadata) + runs = match.get_uuid_by_metadata(metadata) + uuids = [run["uuid"] for run in runs] + buildUrls = {run["uuid"]: run["buildUrl"] for run in runs} if baseline in ('', None): uuids = match.get_uuid_by_metadata(metadata) if len(uuids) == 0: @@ -279,7 +279,10 @@ def process_test(test, match, output, uuid, baseline): lambda left, right: pd.merge(left, right, on="uuid", how="inner"), dataframe_list, ) - + shortener = pyshorteners.Shortener() + merged_df["buildUrl"] = merged_df["uuid"].apply( + lambda uuid: shortener.tinyurl.short(buildUrls[uuid]) #pylint: disable = cell-var-from-loop + ) output_file_path = output.split(".")[0] + "-" + test["name"] + ".csv" match.save_results(merged_df, csv_file_path=output_file_path) return merged_df From 1be20713848e6be6c912d06062d0fa2d132db408 Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Wed, 27 Mar 2024 12:27:42 -0400 Subject: [PATCH 07/12] updated requirements.txt Signed-off-by: Shashank Reddy Boyapally --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 0eb2549..941393b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,5 +12,6 @@ tzdata==2023.4 urllib3==1.26.18 pyshorteners==1.0.1 fastapi==0.110.0 +pyshorteners==1.0.1 python-multipart==0.0.9 uvicorn==0.28.0 From 7be9fcb9347d992073edfb3c4c162bf5f3c9231e Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Tue, 9 Apr 2024 16:08:54 -0400 Subject: [PATCH 08/12] added generic indexes, daemon mode opinionated, and version as param Signed-off-by: Shashank Reddy Boyapally --- README.md | 1 + configs/small-scale-cluster-density.yml | 48 +++++++++++++++++++++++++ orion.py | 7 ++-- pkg/daemon.py | 40 ++++++++++++++------- pkg/runTest.py | 7 ++-- pkg/utils.py | 10 +++--- requirements.txt | 1 + setup.py | 4 ++- 8 files changed, 95 insertions(+), 23 deletions(-) create mode 100644 configs/small-scale-cluster-density.yml diff --git a/README.md b/README.md index c44e22e..c1251eb 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ Below is an illustrative example of the config and metadata that Orion can handl ``` tests : - name : aws-small-scale-cluster-density-v2 + index: ospst-perf-scale-ci-* metadata: platform: AWS masterNodesType: m6a.xlarge diff --git a/configs/small-scale-cluster-density.yml b/configs/small-scale-cluster-density.yml new file mode 100644 index 0000000..decaad6 --- /dev/null +++ b/configs/small-scale-cluster-density.yml @@ -0,0 +1,48 @@ +tests : + - name : aws-small-scale-cluster-density-v2 + index: ospst-perf-scale-ci-* + metadata: + platform: AWS + masterNodesType: m6a.xlarge + masterNodesCount: 3 + workerNodesType: m6a.xlarge + workerNodesCount: 24 + benchmark.keyword: cluster-density-v2 + ocpVersion: {{ version }} + networkType: OVNKubernetes + # encrypted: true + # fips: false + # ipsec: false + + metrics : + - name: podReadyLatency + metricName: podLatencyQuantilesMeasurement + quantileName: Ready + metric_of_interest: P99 + not: + jobConfig.name: "garbage-collection" + + - name: apiserverCPU + metricName : containerCPU + labels.namespace.keyword: openshift-kube-apiserver + metric_of_interest: value + agg: + value: cpu + agg_type: avg + + - name: ovnCPU + metricName : containerCPU + labels.namespace.keyword: openshift-ovn-kubernetes + metric_of_interest: value + agg: + value: cpu + agg_type: avg + + - name: etcdCPU + metricName : containerCPU + labels.namespace.keyword: openshift-etcd + metric_of_interest: value + agg: + value: cpu + agg_type: avg + diff --git a/orion.py b/orion.py index 684eea7..7769b78 100644 --- a/orion.py +++ b/orion.py @@ -6,11 +6,13 @@ import logging import sys import warnings - import click import uvicorn from pkg.logrus import SingletonLogger from pkg.runTest import run +from pkg.utils import load_config + +warnings.filterwarnings("ignore", message="Unverified HTTPS request.*") warnings.filterwarnings("ignore", message="Unverified HTTPS request.*") @@ -29,7 +31,7 @@ def cli(max_content_width=120): # pylint: disable=unused-argument "--output-path", default="output.csv", help="Path to save the output csv file" ) @click.option("--debug", default=False, is_flag=True, help="log level") -@click.option("--hunter-analyze", default=True, is_flag=True, help="run hunter analyze") +@click.option("--hunter-analyze", is_flag=True, help="run hunter analyze") @click.option( "-o", "--output-format", @@ -55,6 +57,7 @@ def cmd_analysis(**kwargs): level = logging.DEBUG if kwargs['debug'] else logging.INFO logger_instance = SingletonLogger(debug=level).logger logger_instance.info("🏹 Starting Orion in command-line mode") + kwargs['configMap']=load_config(kwargs["config"]) output = run(**kwargs) for test_name, result_table in output.items(): print(test_name) diff --git a/pkg/daemon.py b/pkg/daemon.py index c4d80cb..160b9b8 100644 --- a/pkg/daemon.py +++ b/pkg/daemon.py @@ -3,10 +3,10 @@ """ import logging -import shutil -import os -from fastapi import FastAPI, File, UploadFile +from fastapi import FastAPI +from jinja2 import Template +import yaml from pkg.logrus import SingletonLogger from . import runTest @@ -17,7 +17,7 @@ @app.post("/daemon") async def daemon( - file: UploadFile = File(...), + version: str = "4.15", uuid: str = "", baseline: str = "", filter_changepoints="", @@ -30,17 +30,18 @@ async def daemon( Returns: json: json object of the changepoints and metrics """ - file_name, file_extension = os.path.splitext(file.filename) - new_file_name = f"{file_name}_copy{file_extension}" - with open(new_file_name, "wb") as buffer: - shutil.copyfileobj(file.file, buffer) + config_file_name="configs/small-scale-cluster-density.yml" + parameters={ + "version": version + } argDict = { - "config": new_file_name, + "config": config_file_name, "output_path": "output.csv", "hunter_analyze": True, "output_format": "json", "uuid": uuid, "baseline": baseline, + "configMap": render_template(config_file_name, parameters) } filter_changepoints = ( True if filter_changepoints == "true" else False # pylint: disable = R1719 @@ -49,8 +50,21 @@ async def daemon( if filter_changepoints: for key, value in result.items(): result[key] = list(filter(lambda x: x.get("is_changepoint", False), value)) - try: - os.remove(new_file_name) - except OSError as e: - logger_instance.error("error %s", e.strerror) return result + +def render_template(file_name, parameters): + """replace parameters in the config file + + Args: + file_name (str): the config file + parameters (dict): parameters to be replaces + + Returns: + dict: configMap in dict + """ + with open(file_name, 'r', encoding="utf-8") as template_file: + template_content = template_file.read() + template = Template(template_content) + rendered_config_yaml = template.render(parameters) + rendered_config = yaml.safe_load(rendered_config_yaml) + return rendered_config diff --git a/pkg/runTest.py b/pkg/runTest.py index 661af6e..2feabb4 100644 --- a/pkg/runTest.py +++ b/pkg/runTest.py @@ -5,7 +5,7 @@ import logging from fmatch.matcher import Matcher from pkg.logrus import SingletonLogger -from pkg.utils import run_hunter_analyze, load_config, get_es_url, process_test +from pkg.utils import run_hunter_analyze, get_es_url, process_test def run(**kwargs): @@ -21,12 +21,13 @@ def run(**kwargs): _type_: _description_ """ logger_instance = SingletonLogger(debug=logging.INFO).logger - data = load_config(kwargs["config"]) + data = kwargs["configMap"] + ES_URL = get_es_url(data) result_output = {} for test in data["tests"]: match = Matcher( - index="perf_scale_ci", level=logger_instance.level, ES_URL=ES_URL + index=test["index"], level=logger_instance.level, ES_URL=ES_URL, verify_certs=False ) result = process_test( test, match, kwargs["output_path"], kwargs["uuid"], kwargs["baseline"] diff --git a/pkg/utils.py b/pkg/utils.py index 4db3163..2204e3e 100644 --- a/pkg/utils.py +++ b/pkg/utils.py @@ -131,6 +131,7 @@ def get_metric_data(ids, index, metrics, match): agg_name = agg_value + "_" + agg_type cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name]) cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_name}) + cpu_df = cpu_df.drop_duplicates() dataframe_list.append(cpu_df) logger_instance.debug(cpu_df) @@ -146,6 +147,7 @@ def get_metric_data(ids, index, metrics, match): podl_df = match.convert_to_df( podl, columns=["uuid", "timestamp", metric_of_interest] ) + podl_df=podl_df.drop_duplicates() dataframe_list.append(podl_df) logger_instance.debug(podl_df) except Exception as e: # pylint: disable=broad-exception-caught @@ -227,14 +229,14 @@ def get_index_and_ids(metadata, uuids, match, baseline): Returns: _type_: index and uuids """ - index_map={"k8s-netperf":"k8s-netperf", - "ingress-perf":"ingress-performance", + index_map={"k8s-netperf":"ospst-k8s-netperf", + "ingress-perf":"ospst-ingress-performance", } if metadata["benchmark.keyword"] in index_map: return index_map[metadata["benchmark.keyword"]], uuids - index = "ripsaw-kube-burner" + index = "ospst-ripsaw-kube-burner*" if baseline == "": - runs = match.match_kube_burner(uuids) + runs = match.match_kube_burner(uuids,index) ids = match.filter_runs(runs, runs) else: ids = uuids diff --git a/requirements.txt b/requirements.txt index 941393b..f217a61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ click==8.1.7 elastic-transport==8.11.0 elasticsearch==7.13.0 fmatch==0.0.7 +Jinja2==3.1.3 python-dateutil==2.8.2 pytz==2023.3.post1 PyYAML==6.0.1 diff --git a/setup.py b/setup.py index 50689d7..778ea28 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,9 @@ ], }, packages=find_packages(), - package_data={'pkg': ['utils.py',"runTest.py","daemon.py","logrus.py"],'hunter': ['*.py']}, + package_data={'pkg': ['utils.py',"runTest.py","daemon.py","logrus.py"], + 'hunter': ['*.py'], + 'configs':['*.yml']}, classifiers=[ 'Programming Language :: Python :: 3', 'License :: OSI Approved :: MIT License', From 8e1b95bb76ceff933b5c9419b805e80a56ea74be Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Tue, 28 May 2024 16:10:21 -0400 Subject: [PATCH 09/12] enabled templating for daemon mode, docs update, opinionated daemon mode, rebased, fixed trivial bugs Signed-off-by: Shashank Reddy Boyapally --- README.md | 69 ++++-- configs/__init__.py | 0 configs/small-scale-cluster-density.yml | 9 +- configs/small-scale-node-density-cni.yml | 58 +++++ examples/small-scale-node-density-cni.yaml | 2 +- orion.py | 27 +-- pkg/daemon.py | 50 +++- pkg/runTest.py | 2 + pkg/utils.py | 66 +++-- setup.py | 3 +- utils/orion_funcs.py | 269 --------------------- 11 files changed, 216 insertions(+), 339 deletions(-) create mode 100644 configs/__init__.py create mode 100644 configs/small-scale-node-density-cni.yml delete mode 100644 utils/orion_funcs.py diff --git a/README.md b/README.md index c1251eb..71c0cc0 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ Executing Orion is as seamless as its building it. With the latest enhancements, ### Command-line mode Running Orion in command-line Mode is straightforward. Simply follow these instructions: ``` ->> orion cmd-mode +>> orion cmd --hunter-analyze ``` At the moment, @@ -97,14 +97,14 @@ Activate Orion's regression detection tool for performance-scale CPT runs effort Additionally, users can specify a custom path for the output CSV file using the ```--output``` flag, providing control over the location where the generated CSV will be stored. ### Daemon mode -The core purpose of Daemon mode is to operate Orion as a self-contained server, dedicated to handling incoming requests. By sending a POST request accompanied by a configuration file, users can trigger change point detection on the provided metadata and metrics. Following the processing, the response is formatted in JSON, providing a structured output for seamless integration and analysis. To trigger daemon mode just use the following commands +The core purpose of Daemon mode is to operate Orion as a self-contained server, dedicated to handling incoming requests. By sending a POST request accompanied by a test name of predefined tests, users can trigger change point detection on the provided metadata and metrics. Following the processing, the response is formatted in JSON, providing a structured output for seamless integration and analysis. To trigger daemon mode just use the following commands ``` ->> orion daemon-mode +>> orion daemon ``` -**Querying a Request to the Daemon Service** +**Querying a Test Request to the Daemon Service** -To interact with the Daemon Service, you can send a POST request using `curl` with specific parameters. This allows you to submit a file along with additional information for processing. +To interact with the Daemon Service, you can send a POST request using `curl` with specific parameters. *Request URL* @@ -116,15 +116,14 @@ POST http://127.0.0.1:8000/daemon - uuid (optional): The uuid of the run you want to compare with similar runs. - baseline (optional): The runs you want to compare with. +- version (optional): The ocpVersion you want to use for metadata defaults to `4.15` +- filter_changepoints (optional): set to `true` if you only want changepoints to show up in the response +- test_name (optional): name of the test you want to perform defaults to `small-scale-cluster-density` -*Request Body* - -The request body should contain the file you want to submit for processing. Ensure that the file is in the proper format (e.g., YAML). Example ``` -curl -X POST 'http://127.0.0.1:8000/daemon?uuid=4cb3efec-609a-4ac5-985d-4cbbcbb11625' \ ---form 'file=@"/path/to/your/config.yaml"' +curl -L -X POST 'http://127.0.0.1:8000/daemon?filter_changepoints=true&version=4.14&test_name=small-scale-node-density-cni' ``` @@ -135,25 +134,25 @@ Below is a sample output structure: the top level of the JSON contains the test { "uuid": "4cb3efec-609a-4ac5-985d-4cbbcbb11625", "timestamp": 1704889895, + "buildUrl": "https://tinyurl.com/2ya4ka9z", "metrics": { - "etcdCPU_cpu_avg": { - "value": 8.7663162253, + "ovnCPU_avg": { + "value": 2.8503958847, "percentage_change": 0 }, - "ovnCPU_cpu_avg": { - "value": 2.8503958847, + "apiserverCPU_avg": { + "value": 10.2344511574, "percentage_change": 0 }, - "P99": { - "value": 13000, + "etcdCPU_avg": { + "value": 8.7663162253, "percentage_change": 0 }, - "apiserverCPU_cpu_avg": { - "value": 10.2344511574, + "P99": { + "value": 13000, "percentage_change": 0 } }, - "buildUrl": "https://prow.ci.openshift.org/view/gs/origin-ci-test/logs/periodic-ci-openshift-qe-ocp-qe-perfscale-ci-main-aws-4.16-nightly-x86-control-plane-24nodes/1745037917119582208", "is_changepoint": false }, ] @@ -161,6 +160,34 @@ Below is a sample output structure: the top level of the JSON contains the test ``` +**Querying List of Tests Available to the Daemon Service** + +To list the tests available, you can send a GET request using `curl`. + +*Request URL* + +``` +GET http://127.0.0.1:8000/daemon/options +``` + +*Request Body* + +The request body should contain the file you want to submit for processing. Ensure that the file is in the proper format (e.g., YAML). + +Example +``` +curl -L 'http://127.0.0.1:8000/daemon/options' +``` + +Below is a sample output structure: It contains the opinionated approach list of files available +``` +{ + "options": [ + "small-scale-cluster-density", + "small-scale-node-density-cni" + ] +} +``` Orion's seamless integration with metadata and hunter ensures a robust regression detection tool for perf-scale CPT runs. @@ -179,5 +206,7 @@ tests : agg_type: avg ``` -Orion provides flexibility if you know the comparison uuid you want to compare among, use the ```--baseline``` flag. This should only be used in conjunction when setting uuid. Similar to the uuid section mentioned above, you'll have to set a metrics section to specify the data points you want to collect on +Orion provides flexibility if you know the comparison uuid you want to compare among, use the ```--baseline``` flag. This should only be used in conjunction when setting uuid. Similar to the uuid section mentioned above, you'll have to set a metrics section to specify the data points you want to collect on. + +`--uuid` and `--baseline` options are available both in cmd and daemon mode diff --git a/configs/__init__.py b/configs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/configs/small-scale-cluster-density.yml b/configs/small-scale-cluster-density.yml index decaad6..7eed915 100644 --- a/configs/small-scale-cluster-density.yml +++ b/configs/small-scale-cluster-density.yml @@ -1,6 +1,7 @@ tests : - name : aws-small-scale-cluster-density-v2 index: ospst-perf-scale-ci-* + benchmarkIndex: ospst-ripsaw-kube-burner* metadata: platform: AWS masterNodesType: m6a.xlarge @@ -45,4 +46,10 @@ tests : agg: value: cpu agg_type: avg - + + - name: etcdDisk + metricName : 99thEtcdDiskBackendCommitDurationSeconds + metric_of_interest: value + agg: + value: duration + agg_type: avg diff --git a/configs/small-scale-node-density-cni.yml b/configs/small-scale-node-density-cni.yml new file mode 100644 index 0000000..fcf7dbe --- /dev/null +++ b/configs/small-scale-node-density-cni.yml @@ -0,0 +1,58 @@ +tests : + - name : aws-small-scale-node-density-cni + index: ospst-perf-scale-ci-* + benchmarkIndex: ospst-ripsaw-kube-burner* + metadata: + platform: AWS + masterNodesType: m6a.xlarge + masterNodesCount: 3 + workerNodesType: m6a.xlarge + workerNodesCount: 6 + infraNodesCount: 3 + benchmark.keyword: node-density-cni + ocpVersion: {{ version }} + networkType: OVNKubernetes + infraNodesType: r5.2xlarge + # encrypted: true + # fips: false + # ipsec: false + + metrics : + - name: podReadyLatency + metricName: podLatencyQuantilesMeasurement + quantileName: Ready + metric_of_interest: P99 + not: + jobConfig.name: "garbage-collection" + + - name: apiserverCPU + metricName : containerCPU + labels.namespace.keyword: openshift-kube-apiserver + metric_of_interest: value + agg: + value: cpu + agg_type: avg + + - name: ovnCPU + metricName : containerCPU + labels.namespace.keyword: openshift-ovn-kubernetes + metric_of_interest: value + agg: + value: cpu + agg_type: avg + + - name: etcdCPU + metricName : containerCPU + labels.namespace.keyword: openshift-etcd + metric_of_interest: value + agg: + value: cpu + agg_type: avg + + - name: etcdDisk + metricName : 99thEtcdDiskBackendCommitDurationSeconds + metric_of_interest: value + agg: + value: duration + agg_type: avg + diff --git a/examples/small-scale-node-density-cni.yaml b/examples/small-scale-node-density-cni.yaml index 28536d1..64b99f4 100644 --- a/examples/small-scale-node-density-cni.yaml +++ b/examples/small-scale-node-density-cni.yaml @@ -10,7 +10,7 @@ tests : workerNodesCount: 6 infraNodesCount: 3 benchmark.keyword: node-density-cni - ocpVersion: 4.15 + ocpVersion: 4.14 networkType: OVNKubernetes infraNodesType: r5.2xlarge # encrypted: true diff --git a/orion.py b/orion.py index 7769b78..223f49f 100644 --- a/orion.py +++ b/orion.py @@ -15,12 +15,10 @@ warnings.filterwarnings("ignore", message="Unverified HTTPS request.*") -warnings.filterwarnings("ignore", message="Unverified HTTPS request.*") - @click.group() def cli(max_content_width=120): # pylint: disable=unused-argument """ - cli function to group commands + Orion is a tool which can run change point detection for set of runs using statistical models """ @@ -40,30 +38,27 @@ def cli(max_content_width=120): # pylint: disable=unused-argument help="Choose output format (json or text)", ) @click.option("--uuid", default="", help="UUID to use as base for comparisons") -@click.option("--baseline", default="", help="Baseline UUID(s) to to compare against uuid") +@click.option( + "--baseline", default="", help="Baseline UUID(s) to to compare against uuid" +) def cmd_analysis(**kwargs): """ Orion runs on command line mode, and helps in detecting regressions - - \b - Args: - uuid (str): gather metrics based on uuid - baseline (str): baseline uuid to compare against uuid (uuid must be set when using baseline) - config (str): path to the config file - debug (bool): lets you log debug mode - output (str): path to the output csv file - hunter_analyze (bool): turns on hunter analysis of gathered uuid(s) data """ - level = logging.DEBUG if kwargs['debug'] else logging.INFO + level = logging.DEBUG if kwargs["debug"] else logging.INFO logger_instance = SingletonLogger(debug=level).logger logger_instance.info("🏹 Starting Orion in command-line mode") - kwargs['configMap']=load_config(kwargs["config"]) + kwargs["configMap"] = load_config(kwargs["config"]) output = run(**kwargs) + if output is None: + logger_instance.error("Terminating test") + sys.exit(0) for test_name, result_table in output.items(): print(test_name) - print("="*len(test_name)) + print("=" * len(test_name)) print(result_table) + @cli.command(name="daemon") @click.option("--debug", default=False, is_flag=True, help="log level") def rundaemon(debug): diff --git a/pkg/daemon.py b/pkg/daemon.py index 160b9b8..7a77a0e 100644 --- a/pkg/daemon.py +++ b/pkg/daemon.py @@ -3,9 +3,11 @@ """ import logging +import os -from fastapi import FastAPI +from fastapi import FastAPI, HTTPException from jinja2 import Template +import pkg_resources import yaml from pkg.logrus import SingletonLogger @@ -21,6 +23,7 @@ async def daemon( uuid: str = "", baseline: str = "", filter_changepoints="", + test_name="small-scale-cluster-density", ): """starts listening on port 8000 on url /daemon @@ -30,10 +33,8 @@ async def daemon( Returns: json: json object of the changepoints and metrics """ - config_file_name="configs/small-scale-cluster-density.yml" - parameters={ - "version": version - } + parameters = {"version": version} + config_file_name=test_name+".yml" argDict = { "config": config_file_name, "output_path": "output.csv", @@ -41,18 +42,48 @@ async def daemon( "output_format": "json", "uuid": uuid, "baseline": baseline, - "configMap": render_template(config_file_name, parameters) + "configMap": render_template(config_file_name, parameters), } filter_changepoints = ( - True if filter_changepoints == "true" else False # pylint: disable = R1719 + True if filter_changepoints == "true" else False # pylint: disable = R1719 ) result = runTest.run(**argDict) + if result is None: + return {"Error":"No UUID with given metadata"} if filter_changepoints: for key, value in result.items(): result[key] = list(filter(lambda x: x.get("is_changepoint", False), value)) return result -def render_template(file_name, parameters): + +@app.get("/daemon/options") +async def get_options(): + """Lists all the tests available in daemon mode + + Raises: + HTTPException: Config not found + HTTPException: cannot find files for config + + Returns: + config: list of files + """ + config_dir = pkg_resources.resource_filename("configs", "") + if not os.path.isdir(config_dir): + raise HTTPException(status_code=404, detail="Config directory not found") + try: + files = [ + os.path.splitext(file)[0] + for file in os.listdir(config_dir) + if file != "__init__.py" + and not file.endswith(".pyc") + and file != "__pycache__" + ] + return {"options": files} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) from e + + +def render_template(test_name, parameters): """replace parameters in the config file Args: @@ -62,7 +93,8 @@ def render_template(file_name, parameters): Returns: dict: configMap in dict """ - with open(file_name, 'r', encoding="utf-8") as template_file: + config_path = pkg_resources.resource_filename("configs", test_name) + with open(config_path, "r", encoding="utf-8") as template_file: template_content = template_file.read() template = Template(template_content) rendered_config_yaml = template.render(parameters) diff --git a/pkg/runTest.py b/pkg/runTest.py index 2feabb4..0a19401 100644 --- a/pkg/runTest.py +++ b/pkg/runTest.py @@ -32,6 +32,8 @@ def run(**kwargs): result = process_test( test, match, kwargs["output_path"], kwargs["uuid"], kwargs["baseline"] ) + if result is None: + return None if kwargs["hunter_analyze"]: testname, result_data = run_hunter_analyze( result, test, output=kwargs["output_format"] diff --git a/pkg/utils.py b/pkg/utils.py index 2204e3e..87ff013 100644 --- a/pkg/utils.py +++ b/pkg/utils.py @@ -43,9 +43,8 @@ def run_hunter_analyze(merged_df, test, output): for column in merged_df.columns if column not in ["uuid","timestamp","buildUrl"] } - attributes = { - column: merged_df[column] for column in merged_df.columns if column in ["uuid","buildUrl"] - } + attributes={column: merged_df[column] + for column in merged_df.columns if column in ["uuid","buildUrl"]} series = Series( test_name=test["name"], branch=None, @@ -60,7 +59,7 @@ def run_hunter_analyze(merged_df, test, output): output_table = report.produce_report( test_name=test["name"], report_type=ReportType.LOG ) - return test["name"],output_table + return test["name"], output_table if output == "json": change_points_by_metric = series.analyze().change_points @@ -129,9 +128,9 @@ def get_metric_data(ids, index, metrics, match): agg_value = metric["agg"]["value"] agg_type = metric["agg"]["agg_type"] agg_name = agg_value + "_" + agg_type - cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name]) - cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_name}) - cpu_df = cpu_df.drop_duplicates() + cpu_df = match.convert_to_df(cpu, columns=["uuid", "timestamp", agg_name]) + cpu_df= cpu_df.drop_duplicates(subset=['uuid'],keep='first') + cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_type}) dataframe_list.append(cpu_df) logger_instance.debug(cpu_df) @@ -147,6 +146,8 @@ def get_metric_data(ids, index, metrics, match): podl_df = match.convert_to_df( podl, columns=["uuid", "timestamp", metric_of_interest] ) + podl_df = podl_df.rename( + columns={metric_of_interest: metric_name+"_"+metric_of_interest}) podl_df=podl_df.drop_duplicates() dataframe_list.append(podl_df) logger_instance.debug(podl_df) @@ -218,7 +219,7 @@ def get_es_url(data): sys.exit(1) -def get_index_and_ids(metadata, uuids, match, baseline): +def get_ids_from_index(metadata, fingerprint_index, uuids, match, baseline): """returns the index to be used and runs as uuids Args: @@ -229,18 +230,31 @@ def get_index_and_ids(metadata, uuids, match, baseline): Returns: _type_: index and uuids """ - index_map={"k8s-netperf":"ospst-k8s-netperf", - "ingress-perf":"ospst-ingress-performance", - } - if metadata["benchmark.keyword"] in index_map: - return index_map[metadata["benchmark.keyword"]], uuids - index = "ospst-ripsaw-kube-burner*" + if metadata["benchmark.keyword"] in ["ingress-perf","k8s-netperf"] : + return uuids if baseline == "": - runs = match.match_kube_burner(uuids,index) + runs = match.match_kube_burner(uuids,fingerprint_index) ids = match.filter_runs(runs, runs) else: ids = uuids - return index, ids + return ids + +def get_build_urls(index, uuids,match): + """Gets metadata of the run from each test + to get the build url + + Args: + uuids (list): str list of uuid to find build urls of + match: the fmatch instance + + + Returns: + dict: dictionary of the metadata + """ + + test = match.getResults("",uuids,index,{}) + buildUrls = {run["uuid"]: run["buildUrl"] for run in test} + return buildUrls def process_test(test, match, output, uuid, baseline): @@ -256,6 +270,8 @@ def process_test(test, match, output, uuid, baseline): _type_: merged dataframe """ logger_instance= SingletonLogger(debug=logging.INFO).logger + benchmarkIndex=test['benchmarkIndex'] + fingerprint_index=test['index'] if uuid in ('', None): metadata = get_metadata(test) else: @@ -265,17 +281,25 @@ def process_test(test, match, output, uuid, baseline): uuids = [run["uuid"] for run in runs] buildUrls = {run["uuid"]: run["buildUrl"] for run in runs} if baseline in ('', None): - uuids = match.get_uuid_by_metadata(metadata) + runs = match.get_uuid_by_metadata(metadata) + uuids = [run["uuid"] for run in runs] + buildUrls = {run["uuid"]: run["buildUrl"] for run in runs} if len(uuids) == 0: logger_instance.error("No UUID present for given metadata") - sys.exit() + return None else: - uuids = re.split(' |,',baseline) + uuids = [uuid for uuid in re.split(' |,',baseline) if uuid] uuids.append(uuid) - index, ids = get_index_and_ids(metadata, uuids, match, baseline) + buildUrls = get_build_urls(fingerprint_index, uuids,match) + fingerprint_index=benchmarkIndex + ids = get_ids_from_index(metadata, fingerprint_index, uuids, match, baseline) metrics = test["metrics"] - dataframe_list = get_metric_data(ids, index, metrics, match) + dataframe_list = get_metric_data(ids, fingerprint_index, metrics, match) + + for i, df in enumerate(dataframe_list): + if i != 0 and ('timestamp' in df.columns): + dataframe_list[i] = df.drop(columns=['timestamp']) merged_df = reduce( lambda left, right: pd.merge(left, right, on="uuid", how="inner"), diff --git a/setup.py b/setup.py index 778ea28..bfbb8ce 100644 --- a/setup.py +++ b/setup.py @@ -19,8 +19,7 @@ }, packages=find_packages(), package_data={'pkg': ['utils.py',"runTest.py","daemon.py","logrus.py"], - 'hunter': ['*.py'], - 'configs':['*.yml']}, + 'configs':['*.yml','*.yaml']}, classifiers=[ 'Programming Language :: Python :: 3', 'License :: OSI Approved :: MIT License', diff --git a/utils/orion_funcs.py b/utils/orion_funcs.py deleted file mode 100644 index 995a2ff..0000000 --- a/utils/orion_funcs.py +++ /dev/null @@ -1,269 +0,0 @@ -# pylint: disable=cyclic-import -""" -module for all utility functions orion uses -""" -# pylint: disable = import-error - -import json -import logging -import sys - -import yaml -import pandas as pd - -from hunter.report import Report, ReportType -from hunter.series import Metric, Series - - -def run_hunter_analyze(merged_df, test, output): - """Start hunter analyze function - - Args: - merged_df (Dataframe): merged dataframe of all the metrics - test (dict): test dictionary with the each test information - """ - merged_df["timestamp"] = pd.to_datetime(merged_df["timestamp"]) - merged_df["timestamp"] = merged_df["timestamp"].astype(int) // 10**9 - metrics = { - column: Metric(1, 1.0) - for column in merged_df.columns - if column not in ["uuid","timestamp","buildUrl"] - } - data = { - column: merged_df[column] - for column in merged_df.columns - if column not in ["uuid","timestamp","buildUrl"] - } - attributes={column: merged_df[column] - for column in merged_df.columns if column in ["uuid","buildUrl"]} - series = Series( - test_name=test["name"], - branch=None, - time=list(merged_df["timestamp"]), - metrics=metrics, - data=data, - attributes=attributes, - ) - change_points = series.analyze().change_points_by_time - report = Report(series, change_points) - if output == "text": - output_table = report.produce_report( - test_name="test", report_type=ReportType.LOG - ) - print(output_table) - elif output == "json": - change_points_by_metric = series.analyze().change_points - output_json = parse_json_output(merged_df, change_points_by_metric) - print(json.dumps(output_json, indent=4)) - - -def parse_json_output(merged_df, change_points_by_metric): - """json output generator function - - Args: - merged_df (pd.Dataframe): the dataframe to be converted to json - change_points_by_metric (_type_): different change point - - Returns: - _type_: _description_ - """ - df_json = merged_df.to_json(orient="records") - df_json = json.loads(df_json) - - for index, entry in enumerate(df_json): - entry["metrics"] = { - key: {"value": entry.pop(key), "percentage_change": 0} - for key in entry.keys() - {"uuid", "timestamp", "buildUrl"} - } - entry["is_changepoint"] = False - - for key in change_points_by_metric.keys(): - for change_point in change_points_by_metric[key]: - index = change_point.index - percentage_change = ( - (change_point.stats.mean_2 - change_point.stats.mean_1) - / change_point.stats.mean_1 - ) * 100 - df_json[index]["metrics"][key]["percentage_change"] = percentage_change - df_json[index]["is_changepoint"] = True - - return df_json - - -# pylint: disable=too-many-locals -def get_metric_data(ids, index, metrics, match, logger): - """Gets details metrics basked on metric yaml list - - Args: - ids (list): list of all uuids - index (dict): index in es of where to find data - metrics (dict): metrics to gather data on - match (Matcher): current matcher instance - logger (logger): log data to one output - - Returns: - dataframe_list: dataframe of the all metrics - """ - dataframe_list = [] - for metric in metrics: - metric_name = metric["name"] - logger.info("Collecting %s", metric_name) - metric_of_interest = metric["metric_of_interest"] - - if "agg" in metric.keys(): - try: - cpu = match.get_agg_metric_query(ids, index, metric) - agg_value = metric["agg"]["value"] - agg_type = metric["agg"]["agg_type"] - agg_name = agg_value + "_" + agg_type - cpu_df = match.convert_to_df(cpu, columns=["uuid", "timestamp", agg_name]) - cpu_df= cpu_df.drop_duplicates(subset=['uuid'],keep='first') - cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_type}) - dataframe_list.append(cpu_df) - logger.debug(cpu_df) - - except Exception as e: # pylint: disable=broad-exception-caught - logger.error( - "Couldn't get agg metrics %s, exception %s", - metric_name, - e, - ) - else: - try: - podl = match.getResults("", ids, index, metric) - podl_df = match.convert_to_df( - podl, columns=["uuid", "timestamp", metric_of_interest] - ) - podl_df= podl_df.drop_duplicates(subset=['uuid'],keep='first') - podl_df = podl_df.rename(columns={metric_of_interest: - metric_name + "_" + metric_of_interest}) - dataframe_list.append(podl_df) - logger.debug(podl_df) - except Exception as e: # pylint: disable=broad-exception-caught - logger.error( - "Couldn't get metrics %s, exception %s", - metric_name, - e, - ) - return dataframe_list - - -def get_metadata(test, logger): - """Gets metadata of the run from each test - - Args: - test (dict): test dictionary - - Returns: - dict: dictionary of the metadata - """ - metadata = test["metadata"] - metadata["ocpVersion"] = str(metadata["ocpVersion"]) - logger.debug("metadata" + str(metadata)) - return metadata - -def get_build_urls(index, uuids,match): - """Gets metadata of the run from each test - to get the build url - - Args: - uuids (list): str list of uuid to find build urls of - match: the fmatch instance - - - Returns: - dict: dictionary of the metadata - """ - - test = match.getResults("",uuids,index,{}) - buildUrls = {run["uuid"]: run["buildUrl"] for run in test} - return buildUrls - -def filter_metadata(uuid,match,logger): - """Gets metadata of the run from each test - - Args: - uuid (str): str of uuid ot find metadata of - match: the fmatch instance - - - Returns: - dict: dictionary of the metadata - """ - - test = match.get_metadata_by_uuid(uuid) - metadata = { - 'platform': '', - 'clusterType': '', - 'masterNodesCount': 0, - 'workerNodesCount': 0, - 'infraNodesCount': 0, - 'masterNodesType': '', - 'workerNodesType': '', - 'infraNodesType': '', - 'totalNodesCount': 0, - 'ocpVersion': '', - 'networkType': '', - 'ipsec': '', - 'fips': '', - 'encrypted': '', - 'publish': '', - 'computeArch': '', - 'controlPlaneArch': '' - } - for k,v in test.items(): - if k not in metadata: - continue - metadata[k] = v - metadata['benchmark.keyword'] = test['benchmark'] - metadata["ocpVersion"] = str(metadata["ocpVersion"]) - - #Remove any keys that have blank values - no_blank_meta = {k: v for k, v in metadata.items() if v} - logger.debug('No blank metadata dict: ' + str(no_blank_meta)) - return no_blank_meta - - - -def set_logging(level, logger): - """sets log level and format - - Args: - level (_type_): level of the log - logger (_type_): logger object - - Returns: - logging.Logger: a formatted and level set logger - """ - logger.setLevel(level) - handler = logging.StreamHandler(sys.stdout) - handler.setLevel(level) - formatter = logging.Formatter( - "%(asctime)s [%(name)s:%(filename)s:%(lineno)d] %(levelname)s: %(message)s" - ) - handler.setFormatter(formatter) - logger.addHandler(handler) - return logger - - -def load_config(config, logger): - """Loads config file - - Args: - config (str): path to config file - logger (Logger): logger - - Returns: - dict: dictionary of the config file - """ - try: - with open(config, "r", encoding="utf-8") as file: - data = yaml.safe_load(file) - logger.debug("The %s file has successfully loaded", config) - except FileNotFoundError as e: - logger.error("Config file not found: %s", e) - sys.exit(1) - except Exception as e: # pylint: disable=broad-exception-caught - logger.error("An error occurred: %s", e) - sys.exit(1) - return data From 627caaf6cc067f3397d5173ea19829d8988c06e8 Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Fri, 31 May 2024 14:13:23 -0400 Subject: [PATCH 10/12] docs update Signed-off-by: Shashank Reddy Boyapally --- README.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 71c0cc0..f21dab9 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Below is an illustrative example of the config and metadata that Orion can handl tests : - name : aws-small-scale-cluster-density-v2 index: ospst-perf-scale-ci-* + benchmarkIndex: ospst-ripsaw-kube-burner* metadata: platform: AWS masterNodesType: m6a.xlarge @@ -14,7 +15,7 @@ tests : workerNodesType: m6a.xlarge workerNodesCount: 24 benchmark.keyword: cluster-density-v2 - ocpVersion: 4.15 + ocpVersion: {{ version }} networkType: OVNKubernetes # encrypted: true # fips: false @@ -51,14 +52,14 @@ tests : agg: value: cpu agg_type: avg - - - name: etcdDisck + + - name: etcdDisk metricName : 99thEtcdDiskBackendCommitDurationSeconds metric_of_interest: value agg: value: duration agg_type: avg - + ``` ## Build Orion @@ -66,13 +67,13 @@ Building Orion is a straightforward process. Follow these commands: **Note: Orion Compatibility** -Orion currently supports Python versions `3.8.x`, `3.9.x`, `3.10.x`, and `3.11.x`. Please be aware that using other Python versions might lead to dependency conflicts caused by hunter, creating a challenging situation known as "dependency hell." It's crucial to highlight that Python `3.12.x` may result in errors due to the removal of distutils, a dependency used by numpy. This information is essential to ensure a smooth experience with Orion and avoid potential compatibility issues. +Orion currently supports Python version `3.11.x`. Please be aware that using other Python versions might lead to dependency conflicts caused by hunter, creating a challenging situation known as "dependency hell." It's crucial to highlight that Python `3.12.x` may result in errors due to the removal of distutils, a dependency used by numpy. This information is essential to ensure a smooth experience with Orion and avoid potential compatibility issues. Clone the current repository using git clone. ``` >> git clone ->> pip install venv +>> python3 -m venv venv >> source venv/bin/activate >> pip install -r requirements.txt >> export ES_SERVER = @@ -196,7 +197,9 @@ Orion's seamless integration with metadata and hunter ensures a robust regressio ``` tests : - - name : current-uuid-etcd-duration + - name : aws-small-scale-cluster-density-v2 + index: ospst-perf-scale-ci-* + benchmarkIndex: ospst-ripsaw-kube-burner* metrics : - name: etcdDisck metricName : 99thEtcdDiskBackendCommitDurationSeconds From 33250ac32b852db4c791849f741772047ab7b3ee Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Fri, 31 May 2024 14:39:52 -0400 Subject: [PATCH 11/12] mentioned templates Signed-off-by: Shashank Reddy Boyapally --- configs/small-scale-cluster-density.yml | 1 + configs/small-scale-node-density-cni.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/configs/small-scale-cluster-density.yml b/configs/small-scale-cluster-density.yml index 7eed915..63646b2 100644 --- a/configs/small-scale-cluster-density.yml +++ b/configs/small-scale-cluster-density.yml @@ -1,3 +1,4 @@ +# This is a template file tests : - name : aws-small-scale-cluster-density-v2 index: ospst-perf-scale-ci-* diff --git a/configs/small-scale-node-density-cni.yml b/configs/small-scale-node-density-cni.yml index fcf7dbe..48e3622 100644 --- a/configs/small-scale-node-density-cni.yml +++ b/configs/small-scale-node-density-cni.yml @@ -1,3 +1,4 @@ +# This is a template file tests : - name : aws-small-scale-node-density-cni index: ospst-perf-scale-ci-* From 306515eecfae5f125ec8e309bc0658380c99aef5 Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Fri, 31 May 2024 15:26:28 -0400 Subject: [PATCH 12/12] increased url shortener timeout to 10s Signed-off-by: Shashank Reddy Boyapally --- pkg/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/utils.py b/pkg/utils.py index 87ff013..bf606bf 100644 --- a/pkg/utils.py +++ b/pkg/utils.py @@ -305,7 +305,7 @@ def process_test(test, match, output, uuid, baseline): lambda left, right: pd.merge(left, right, on="uuid", how="inner"), dataframe_list, ) - shortener = pyshorteners.Shortener() + shortener = pyshorteners.Shortener(timeout=10) merged_df["buildUrl"] = merged_df["uuid"].apply( lambda uuid: shortener.tinyurl.short(buildUrls[uuid]) #pylint: disable = cell-var-from-loop )