From c810e06f714be313481498e137a0c9df6ba60de3 Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Fri, 2 Feb 2024 12:29:31 -0500 Subject: [PATCH 1/5] hunter integration and refactor Signed-off-by: Shashank Reddy Boyapally --- README.md | 4 ++ orion.py | 131 ++++++---------------------------- requirements.txt | 35 ++++++++- setup.py | 4 +- utils/__init__.py | 0 utils/orion_funcs.py | 164 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 225 insertions(+), 113 deletions(-) create mode 100644 utils/__init__.py create mode 100644 utils/orion_funcs.py diff --git a/README.md b/README.md index 24d81fa..19e3407 100644 --- a/README.md +++ b/README.md @@ -84,8 +84,12 @@ Orion provides flexibility in configuring its behavior by allowing users to set For enhanced troubleshooting and debugging, Orion supports the ```--debug``` flag, enabling the generation of detailed debug logs. +Activate Orion's regression detection tool for performance-scale CPT runs effortlessly with the ```--hunter-analyze``` command. This seamlessly integrates with metadata and hunter, ensuring a robust and efficient regression detection process. + Additionally, users can specify a custom path for the output CSV file using the ```--output``` flag, providing control over the location where the generated CSV will be stored. + + Orion's seamless integration with metadata and hunter ensures a robust regression detection tool for perf-scale CPT runs. diff --git a/orion.py b/orion.py index 891d0bd..b0e24c3 100644 --- a/orion.py +++ b/orion.py @@ -8,9 +8,11 @@ import os import click -import yaml import pandas as pd + from fmatch.matcher import Matcher +from utils.orion_funcs import run_hunter_analyze, get_metadata, \ + set_logging, load_config, get_metric_data @click.group() @@ -24,7 +26,8 @@ def cli(): @click.option("--config", default="config.yaml", help="Path to the configuration file") @click.option("--output", default="output.csv", help="Path to save the output csv file") @click.option("--debug", is_flag=True, help="log level ") -def orion(config, debug, output): +@click.option("--hunter-analyze",is_flag=True, help="run hunter analyze") +def orion(config, debug, output,hunter_analyze): """Orion is the cli tool to detect regressions over the runs Args: @@ -35,25 +38,22 @@ def orion(config, debug, output): level = logging.DEBUG if debug else logging.INFO logger = logging.getLogger("Orion") logger = set_logging(level, logger) + data = load_config(config,logger) + ES_URL=None + + if "ES_SERVER" in data.keys(): + ES_URL = data['ES_SERVER'] + else: + if 'ES_SERVER' in os.environ: + ES_URL=os.environ.get("ES_SERVER") + else: + logger.error("ES_SERVER environment variable/config variable not set") + sys.exit(1) - if "ES_SERVER" not in os.environ: - logger.error("ES_SERVER environment variable not set") - sys.exit(1) - - try: - with open(config, "r", encoding="utf-8") as file: - data = yaml.safe_load(file) - logger.debug("The %s file has successfully loaded", config) - except FileNotFoundError as e: - logger.error("Config file not found: %s", e) - sys.exit(1) - except Exception as e: # pylint: disable=broad-exception-caught - logger.error("An error occurred: %s", e) - sys.exit(1) for test in data["tests"]: metadata = get_metadata(test, logger) logger.info("The test %s has started", test["name"]) - match = Matcher(index="perf_scale_ci", level=level) + match = Matcher(index="perf_scale_ci", level=level, ES_URL=ES_URL) uuids = match.get_uuid_by_metadata(metadata) if len(uuids) == 0: print("No UUID present for given metadata") @@ -77,103 +77,12 @@ def orion(config, debug, output): lambda left, right: pd.merge(left, right, on="uuid", how="inner"), dataframe_list, ) - match.save_results(merged_df, csv_file_path=output) - - -def get_metric_data(ids, index, metrics, match, logger): - """Gets details metrics basked on metric yaml list + match.save_results(merged_df, csv_file_path=output.split(".")[0]+"-"+test['name']+".csv") - Args: - ids (list): list of all uuids - index (dict): index in es of where to find data - metrics (dict): metrics to gather data on - match (Matcher): current matcher instance - logger (logger): log data to one output - - Returns: - dataframe_list: dataframe of the all metrics - """ - dataframe_list = [] - for metric in metrics: - metric_name = metric['name'] - logger.info("Collecting %s", metric_name) - metric_of_interest = metric['metric_of_interest'] - - if "agg" in metric.keys(): - try: - cpu = match.get_agg_metric_query( - ids, index, metric - ) - agg_value = metric['agg']['value'] - agg_type = metric['agg']['agg_type'] - agg_name = agg_value + "_" + agg_type - cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name]) - cpu_df = cpu_df.rename( - columns={agg_name: metric_name+ "_" + agg_name} - ) - dataframe_list.append(cpu_df) - logger.debug(cpu_df) - - except Exception as e: # pylint: disable=broad-exception-caught - logger.error( - "Couldn't get agg metrics %s, exception %s", - metric_name, - e, - ) - else: - try: - podl = match.getResults("", ids, index, metric) - podl_df = match.convert_to_df( - podl, columns=["uuid", "timestamp", metric_of_interest] - ) - dataframe_list.append(podl_df) - logger.debug(podl_df) - except Exception as e: # pylint: disable=broad-exception-caught - logger.error( - "Couldn't get metrics %s, exception %s", - metric_name, - e, - ) - return dataframe_list - -def get_metadata(test,logger): - """Gets metadata of the run from each test + if hunter_analyze: + run_hunter_analyze(merged_df,test) - Args: - test (dict): test dictionary - Returns: - dict: dictionary of the metadata - """ - metadata = {} - for k,v in test.items(): - if k in ["metrics","name"]: - continue - metadata[k] = v - metadata["ocpVersion"] = str(metadata["ocpVersion"]) - logger.debug('metadata' + str(metadata)) - return metadata - - -def set_logging(level, logger): - """sets log level and format - - Args: - level (_type_): level of the log - logger (_type_): logger object - - Returns: - logging.Logger: a formatted and level set logger - """ - logger.setLevel(level) - handler = logging.StreamHandler(sys.stdout) - handler.setLevel(level) - formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - ) - handler.setFormatter(formatter) - logger.addHandler(handler) - return logger if __name__ == "__main__": diff --git a/requirements.txt b/requirements.txt index 218fe88..39850b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,47 @@ +astroid==3.0.2 certifi==2023.11.17 +charset-normalizer==3.3.2 click==8.1.7 +dateparser==1.2.0 +DateTime==5.4 +decorator==5.1.1 +dill==0.3.7 elastic-transport==8.11.0 elasticsearch==8.11.1 elasticsearch7==7.13.0 +expandvars==0.6.5 +gevent==23.9.1 +greenlet==3.0.3 +hunter @ git+https://github.com/datastax-labs/hunter.git@8ff166979d000780ad548e49f006ef2a15d54123 +idna==3.6 +isort==5.13.2 +mccabe==0.7.0 +more-itertools==8.14.0 +numpy==1.24.0 fmatch==0.0.4 -numpy==1.26.3 pandas==2.1.4 +platformdirs==4.1.0 +pylint==3.0.3 +pystache==0.6.5 python-dateutil==2.8.2 pytz==2023.3.post1 PyYAML==6.0.1 +regex==2023.12.25 +requests==2.31.0 +ruamel.yaml==0.17.21 +ruamel.yaml.clib==0.2.8 +scipy==1.12.0 +signal-processing-algorithms==1.3.5 six==1.16.0 +slack_sdk==3.26.2 +structlog==19.2.0 +tabulate==0.8.10 +tomlkit==0.12.3 +typed-ast==1.5.5 +typing-extensions==3.10.0.2 tzdata==2023.4 +tzlocal==5.2 urllib3==1.26.18 +validators==0.18.2 +zope.event==5.0 +zope.interface==6.1 diff --git a/setup.py b/setup.py index be07410..52fdafc 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ """ setup.py for orion cli tool """ -from setuptools import setup +from setuptools import setup, find_packages setup( name='orion', @@ -17,6 +17,8 @@ 'orion = orion:orion', ], }, + packages=find_packages(), + package_data={'utils': ['utils.py'],'hunter': ['*.py']}, classifiers=[ 'Programming Language :: Python :: 3', 'License :: OSI Approved :: MIT License', diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/orion_funcs.py b/utils/orion_funcs.py new file mode 100644 index 0000000..f0fdd4b --- /dev/null +++ b/utils/orion_funcs.py @@ -0,0 +1,164 @@ +# pylint: disable=cyclic-import +""" +module for all utility functions orion uses +""" +# pylint: disable = import-error + +import logging +import sys + +import yaml +import pandas as pd + +from hunter.report import Report, ReportType +from hunter.series import Metric, Series + + +def run_hunter_analyze(merged_df,test): + """Start hunter analyze function + + Args: + merged_df (Dataframe): merged dataframe of all the metrics + test (dict): test dictionary with the each test information + """ + merged_df["timestamp"] = pd.to_datetime(merged_df["timestamp"]) + merged_df["timestamp"] = merged_df["timestamp"].astype(int) // 10**9 + metrics = {column: Metric(1, 1.0) + for column in merged_df.columns + if column not in ["uuid","timestamp"]} + data = {column: merged_df[column] + for column in merged_df.columns + if column not in ["uuid","timestamp"]} + attributes={column: merged_df[column] for column in merged_df.columns if column in ["uuid"]} + series=Series( + test_name=test["name"], + branch=None, + time=list(merged_df["timestamp"]), + metrics=metrics, + data=data, + attributes=attributes + ) + change_points=series.analyze().change_points_by_time + report=Report(series,change_points) + output = report.produce_report(test_name="test",report_type=ReportType.LOG) + print(output) + +# pylint: disable=too-many-locals +def get_metric_data(ids, index, metrics, match, logger): + """Gets details metrics basked on metric yaml list + + Args: + ids (list): list of all uuids + index (dict): index in es of where to find data + metrics (dict): metrics to gather data on + match (Matcher): current matcher instance + logger (logger): log data to one output + + Returns: + dataframe_list: dataframe of the all metrics + """ + dataframe_list = [] + for metric in metrics: + metric_name = metric['name'] + logger.info("Collecting %s", metric_name) + metric_of_interest = metric['metric_of_interest'] + + if "agg" in metric.keys(): + try: + cpu = match.get_agg_metric_query( + ids, index, metric + ) + agg_value = metric['agg']['value'] + agg_type = metric['agg']['agg_type'] + agg_name = agg_value + "_" + agg_type + cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name]) + cpu_df = cpu_df.rename( + columns={agg_name: metric_name+ "_" + agg_name} + ) + dataframe_list.append(cpu_df) + logger.debug(cpu_df) + + except Exception as e: # pylint: disable=broad-exception-caught + logger.error( + "Couldn't get agg metrics %s, exception %s", + metric_name, + e, + ) + else: + try: + podl = match.getResults("", ids, index, metric) + podl_df = match.convert_to_df( + podl, columns=["uuid", "timestamp", metric_of_interest] + ) + dataframe_list.append(podl_df) + logger.debug(podl_df) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error( + "Couldn't get metrics %s, exception %s", + metric_name, + e, + ) + return dataframe_list + + +def get_metadata(test,logger): + """Gets metadata of the run from each test + + Args: + test (dict): test dictionary + + Returns: + dict: dictionary of the metadata + """ + metadata = {} + for k,v in test.items(): + if k in ["metrics","name"]: + continue + metadata[k] = v + metadata["ocpVersion"] = str(metadata["ocpVersion"]) + logger.debug('metadata' + str(metadata)) + return metadata + + + +def set_logging(level, logger): + """sets log level and format + + Args: + level (_type_): level of the log + logger (_type_): logger object + + Returns: + logging.Logger: a formatted and level set logger + """ + logger.setLevel(level) + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(level) + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + return logger + +def load_config(config,logger): + """Loads config file + + Args: + config (str): path to config file + logger (Logger): logger + + Returns: + dict: dictionary of the config file + """ + try: + with open(config, "r", encoding="utf-8") as file: + data = yaml.safe_load(file) + logger.debug("The %s file has successfully loaded", config) + except FileNotFoundError as e: + logger.error("Config file not found: %s", e) + sys.exit(1) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error("An error occurred: %s", e) + sys.exit(1) + return data From 8e9c9e658b8ba1ba4937395e357360351658d33d Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Mon, 5 Feb 2024 15:20:55 -0500 Subject: [PATCH 2/5] updated version windows for requirements and updated pylint workflow Signed-off-by: Shashank Reddy Boyapally --- .github/workflows/pylint.yml | 2 ++ requirements.txt | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 9ded563..383186f 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -18,6 +18,8 @@ jobs: run: | python -m pip install --upgrade pip pip install pylint + pip install -r requirements.txt + pip install . - name: Analysing the code with pylint run: | pylint -d C0103 $(git ls-files '*.py') \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 39850b0..16505ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -astroid==3.0.2 +astroid==3.0.3 certifi==2023.11.17 charset-normalizer==3.3.2 click==8.1.7 @@ -10,6 +10,7 @@ elastic-transport==8.11.0 elasticsearch==8.11.1 elasticsearch7==7.13.0 expandvars==0.6.5 +fmatch==0.0.4 gevent==23.9.1 greenlet==3.0.3 hunter @ git+https://github.com/datastax-labs/hunter.git@8ff166979d000780ad548e49f006ef2a15d54123 @@ -17,9 +18,8 @@ idna==3.6 isort==5.13.2 mccabe==0.7.0 more-itertools==8.14.0 -numpy==1.24.0 -fmatch==0.0.4 -pandas==2.1.4 +numpy +pandas platformdirs==4.1.0 pylint==3.0.3 pystache==0.6.5 @@ -28,17 +28,18 @@ pytz==2023.3.post1 PyYAML==6.0.1 regex==2023.12.25 requests==2.31.0 -ruamel.yaml==0.17.21 -ruamel.yaml.clib==0.2.8 -scipy==1.12.0 -signal-processing-algorithms==1.3.5 +ruamel.yaml +ruamel.yaml.clib +scipy +signal-processing-algorithms==1.3.4 six==1.16.0 slack_sdk==3.26.2 structlog==19.2.0 tabulate==0.8.10 +tomli==2.0.1 tomlkit==0.12.3 typed-ast==1.5.5 -typing-extensions==3.10.0.2 +typing_extensions==4.9.0 tzdata==2023.4 tzlocal==5.2 urllib3==1.26.18 From e865a890d0260f3bb8d0957cafe13050f52202b0 Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Mon, 5 Feb 2024 15:44:51 -0500 Subject: [PATCH 3/5] added compatibilty notes to Readme Signed-off-by: Shashank Reddy Boyapally --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 19e3407..986e8d4 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,10 @@ tests : ## Build Orion Building Orion is a straightforward process. Follow these commands: +**Note: Orion Compatibility** + +Orion currently supports Python versions `3.8.x`, `3.9.x`, `3.10.x`, and `3.11.x`. Please be aware that using other Python versions might lead to dependency conflicts caused by hunter, creating a challenging situation known as "dependency hell." It's crucial to highlight that Python `3.12.x` may result in errors due to the removal of distutils, a dependency used by numpy. This information is essential to ensure a smooth experience with Orion and avoid potential compatibility issues. + Clone the current repository using git clone. ``` From ecd3ec2ad7b85ab6e5c5828f8cf7aac337f658db Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Wed, 7 Feb 2024 16:11:51 -0500 Subject: [PATCH 4/5] reduced requirements.txt Signed-off-by: Shashank Reddy Boyapally --- requirements.txt | 39 ++------------------------------------- 1 file changed, 2 insertions(+), 37 deletions(-) diff --git a/requirements.txt b/requirements.txt index 16505ee..0c7d487 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,48 +1,13 @@ -astroid==3.0.3 +hunter @ git+https://github.com/datastax-labs/hunter.git@8ff166979d000780ad548e49f006ef2a15d54123 certifi==2023.11.17 -charset-normalizer==3.3.2 click==8.1.7 -dateparser==1.2.0 -DateTime==5.4 -decorator==5.1.1 -dill==0.3.7 elastic-transport==8.11.0 elasticsearch==8.11.1 elasticsearch7==7.13.0 -expandvars==0.6.5 fmatch==0.0.4 -gevent==23.9.1 -greenlet==3.0.3 -hunter @ git+https://github.com/datastax-labs/hunter.git@8ff166979d000780ad548e49f006ef2a15d54123 -idna==3.6 -isort==5.13.2 -mccabe==0.7.0 -more-itertools==8.14.0 -numpy -pandas -platformdirs==4.1.0 -pylint==3.0.3 -pystache==0.6.5 python-dateutil==2.8.2 pytz==2023.3.post1 PyYAML==6.0.1 -regex==2023.12.25 -requests==2.31.0 -ruamel.yaml -ruamel.yaml.clib -scipy -signal-processing-algorithms==1.3.4 six==1.16.0 -slack_sdk==3.26.2 -structlog==19.2.0 -tabulate==0.8.10 -tomli==2.0.1 -tomlkit==0.12.3 -typed-ast==1.5.5 -typing_extensions==4.9.0 tzdata==2023.4 -tzlocal==5.2 -urllib3==1.26.18 -validators==0.18.2 -zope.event==5.0 -zope.interface==6.1 +urllib3==1.26.18 \ No newline at end of file From 3ede92cffc65270f7e6ab6c7b3e5a618a0f6049b Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Fri, 9 Feb 2024 14:19:21 -0500 Subject: [PATCH 5/5] pylinting only 3.11 Signed-off-by: Shashank Reddy Boyapally --- .github/workflows/pylint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 383186f..d8c23ef 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.11"] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }}