diff --git a/examples/small-scale-cluster-density.yaml b/examples/small-scale-cluster-density.yaml index 703f1b5..c2b13b3 100644 --- a/examples/small-scale-cluster-density.yaml +++ b/examples/small-scale-cluster-density.yaml @@ -1,5 +1,7 @@ tests : - name : aws-small-scale-cluster-density-v2 + index: ospst-perf-scale-ci-* + benchmarkIndex: ospst-ripsaw-kube-burner* metadata: platform: AWS masterNodesType: m6a.xlarge diff --git a/examples/small-scale-node-density-cni.yaml b/examples/small-scale-node-density-cni.yaml index 1821f1a..28536d1 100644 --- a/examples/small-scale-node-density-cni.yaml +++ b/examples/small-scale-node-density-cni.yaml @@ -1,5 +1,7 @@ tests : - name : aws-small-scale-node-density-cni + index: ospst-perf-scale-ci-* + benchmarkIndex: ospst-ripsaw-kube-burner* metadata: platform: AWS masterNodesType: m6a.xlarge diff --git a/orion.py b/orion.py index a65ee5d..ce0e6ea 100644 --- a/orion.py +++ b/orion.py @@ -1,12 +1,15 @@ """ This is the cli file for orion, tool to detect regressions using hunter """ + # pylint: disable = import-error import sys +import warnings from functools import reduce import logging import os import re +import pyshorteners import click import pandas as pd @@ -14,6 +17,7 @@ from fmatch.matcher import Matcher from utils import orion_funcs +warnings.filterwarnings("ignore", message="Unverified HTTPS request.*") @click.group() # pylint: disable=unused-argument @@ -22,7 +26,8 @@ def cli(max_content_width=120): cli function to group commands """ -# pylint: disable=too-many-locals + +# pylint: disable=too-many-locals, too-many-statements @click.command() @click.option("--uuid", default="", help="UUID to use as base for comparisons") @click.option("--baseline", default="", help="Baseline UUID(s) to to compare against uuid") @@ -50,18 +55,20 @@ def orion(**kwargs): ES_URL=None if "ES_SERVER" in data.keys(): - ES_URL = data['ES_SERVER'] + ES_URL = data["ES_SERVER"] else: - if 'ES_SERVER' in os.environ: - ES_URL=os.environ.get("ES_SERVER") + if "ES_SERVER" in os.environ: + ES_URL = os.environ.get("ES_SERVER") else: logger.error("ES_SERVER environment variable/config variable not set") sys.exit(1) - + shortener = pyshorteners.Shortener() for test in data["tests"]: + benchmarkIndex=test['benchmarkIndex'] uuid = kwargs["uuid"] baseline = kwargs["baseline"] - match = Matcher(index="perf_scale_ci", level=level, ES_URL=ES_URL) + match = Matcher(index="ospst-perf-scale-ci-*", + level=level, ES_URL=ES_URL, verify_certs=False) if uuid == "": metadata = orion_funcs.get_metadata(test, logger) else: @@ -69,23 +76,21 @@ def orion(**kwargs): logger.info("The test %s has started", test["name"]) if baseline == "": - uuids = match.get_uuid_by_metadata(metadata) + runs = match.get_uuid_by_metadata(metadata) + uuids = [run["uuid"] for run in runs] + buildUrls = {run["uuid"]: run["buildUrl"] for run in runs} if len(uuids) == 0: logging.info("No UUID present for given metadata") sys.exit() else: uuids = [uuid for uuid in re.split(' |,',baseline) if uuid] uuids.append(uuid) - if metadata["benchmark.keyword"] == "k8s-netperf" : - index = "k8s-netperf" - ids = uuids - elif metadata["benchmark.keyword"] == "ingress-perf" : - index = "ingress-performance" + index=benchmarkIndex + if metadata["benchmark.keyword"] in ["ingress-perf","k8s-netperf"] : ids = uuids else: - index = "ripsaw-kube-burner" if baseline == "": - runs = match.match_kube_burner(uuids) + runs = match.match_kube_burner(uuids, index) ids = match.filter_runs(runs, runs) else: ids = uuids @@ -93,18 +98,25 @@ def orion(**kwargs): metrics = test["metrics"] dataframe_list = orion_funcs.get_metric_data(ids, index, metrics, match, logger) + for i, df in enumerate(dataframe_list): + if i != 0: + dataframe_list[i] = df.drop(columns=['timestamp']) + merged_df = reduce( lambda left, right: pd.merge(left, right, on="uuid", how="inner"), dataframe_list, ) + shortener = pyshorteners.Shortener() + merged_df["buildUrl"] = merged_df["uuid"].apply( + lambda uuid: shortener.tinyurl.short(buildUrls[uuid])) #pylint: disable = cell-var-from-loop csv_name = kwargs["output"].split(".")[0]+"-"+test['name']+".csv" match.save_results( merged_df, csv_file_path=csv_name ) if kwargs["hunter_analyze"]: - orion_funcs.run_hunter_analyze(merged_df,test) + _ = orion_funcs.run_hunter_analyze(merged_df,test) if __name__ == "__main__": diff --git a/requirements.txt b/requirements.txt index e298835..d3224dd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,10 +3,11 @@ certifi==2023.11.17 click==8.1.7 elastic-transport==8.11.0 elasticsearch==7.13.0 -fmatch==0.0.5 +fmatch==0.0.7 python-dateutil==2.8.2 pytz==2023.3.post1 PyYAML==6.0.1 six==1.16.0 tzdata==2023.4 urllib3==1.26.18 +pyshorteners==1.0.1 \ No newline at end of file diff --git a/utils/orion_funcs.py b/utils/orion_funcs.py index 58f2730..1359367 100644 --- a/utils/orion_funcs.py +++ b/utils/orion_funcs.py @@ -25,11 +25,12 @@ def run_hunter_analyze(merged_df,test): merged_df["timestamp"] = merged_df["timestamp"].astype(int) // 10**9 metrics = {column: Metric(1, 1.0) for column in merged_df.columns - if column not in ["uuid","timestamp"]} + if column not in ["uuid","timestamp","buildUrl"]} data = {column: merged_df[column] for column in merged_df.columns - if column not in ["uuid","timestamp"]} - attributes={column: merged_df[column] for column in merged_df.columns if column in ["uuid"]} + if column not in ["uuid","timestamp","buildUrl"]} + attributes={column: merged_df[column] + for column in merged_df.columns if column in ["uuid","buildUrl"]} series=Series( test_name=test["name"], branch=None, @@ -42,6 +43,7 @@ def run_hunter_analyze(merged_df,test): report=Report(series,change_points) output = report.produce_report(test_name="test",report_type=ReportType.LOG) print(output) + return change_points # pylint: disable=too-many-locals def get_metric_data(ids, index, metrics, match, logger): @@ -71,7 +73,7 @@ def get_metric_data(ids, index, metrics, match, logger): agg_value = metric['agg']['value'] agg_type = metric['agg']['agg_type'] agg_name = agg_value + "_" + agg_type - cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name]) + cpu_df = match.convert_to_df(cpu, columns=["uuid","timestamp", agg_name]) cpu_df = cpu_df.rename( columns={agg_name: metric_name+ "_" + agg_name} )