From c598081ddd0f5b47f326a3bad3f741262f59714d Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Fri, 9 Feb 2024 14:46:16 -0500 Subject: [PATCH] hunter integration and refactor (#11) * hunter integration and refactor Signed-off-by: Shashank Reddy Boyapally * updated version windows for requirements and updated pylint workflow Signed-off-by: Shashank Reddy Boyapally * added compatibilty notes to Readme Signed-off-by: Shashank Reddy Boyapally * reduced requirements.txt Signed-off-by: Shashank Reddy Boyapally * pylinting only 3.11 Signed-off-by: Shashank Reddy Boyapally --------- Signed-off-by: Shashank Reddy Boyapally --- .github/workflows/pylint.yml | 4 +- README.md | 8 ++ orion.py | 131 +++++----------------------- requirements.txt | 5 +- setup.py | 4 +- utils/__init__.py | 0 utils/orion_funcs.py | 164 +++++++++++++++++++++++++++++++++++ 7 files changed, 200 insertions(+), 116 deletions(-) create mode 100644 utils/__init__.py create mode 100644 utils/orion_funcs.py diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 9ded563..d8c23ef 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.11"] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} @@ -18,6 +18,8 @@ jobs: run: | python -m pip install --upgrade pip pip install pylint + pip install -r requirements.txt + pip install . - name: Analysing the code with pylint run: | pylint -d C0103 $(git ls-files '*.py') \ No newline at end of file diff --git a/README.md b/README.md index 24d81fa..986e8d4 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,10 @@ tests : ## Build Orion Building Orion is a straightforward process. Follow these commands: +**Note: Orion Compatibility** + +Orion currently supports Python versions `3.8.x`, `3.9.x`, `3.10.x`, and `3.11.x`. Please be aware that using other Python versions might lead to dependency conflicts caused by hunter, creating a challenging situation known as "dependency hell." It's crucial to highlight that Python `3.12.x` may result in errors due to the removal of distutils, a dependency used by numpy. This information is essential to ensure a smooth experience with Orion and avoid potential compatibility issues. + Clone the current repository using git clone. ``` @@ -84,8 +88,12 @@ Orion provides flexibility in configuring its behavior by allowing users to set For enhanced troubleshooting and debugging, Orion supports the ```--debug``` flag, enabling the generation of detailed debug logs. +Activate Orion's regression detection tool for performance-scale CPT runs effortlessly with the ```--hunter-analyze``` command. This seamlessly integrates with metadata and hunter, ensuring a robust and efficient regression detection process. + Additionally, users can specify a custom path for the output CSV file using the ```--output``` flag, providing control over the location where the generated CSV will be stored. + + Orion's seamless integration with metadata and hunter ensures a robust regression detection tool for perf-scale CPT runs. diff --git a/orion.py b/orion.py index 891d0bd..b0e24c3 100644 --- a/orion.py +++ b/orion.py @@ -8,9 +8,11 @@ import os import click -import yaml import pandas as pd + from fmatch.matcher import Matcher +from utils.orion_funcs import run_hunter_analyze, get_metadata, \ + set_logging, load_config, get_metric_data @click.group() @@ -24,7 +26,8 @@ def cli(): @click.option("--config", default="config.yaml", help="Path to the configuration file") @click.option("--output", default="output.csv", help="Path to save the output csv file") @click.option("--debug", is_flag=True, help="log level ") -def orion(config, debug, output): +@click.option("--hunter-analyze",is_flag=True, help="run hunter analyze") +def orion(config, debug, output,hunter_analyze): """Orion is the cli tool to detect regressions over the runs Args: @@ -35,25 +38,22 @@ def orion(config, debug, output): level = logging.DEBUG if debug else logging.INFO logger = logging.getLogger("Orion") logger = set_logging(level, logger) + data = load_config(config,logger) + ES_URL=None + + if "ES_SERVER" in data.keys(): + ES_URL = data['ES_SERVER'] + else: + if 'ES_SERVER' in os.environ: + ES_URL=os.environ.get("ES_SERVER") + else: + logger.error("ES_SERVER environment variable/config variable not set") + sys.exit(1) - if "ES_SERVER" not in os.environ: - logger.error("ES_SERVER environment variable not set") - sys.exit(1) - - try: - with open(config, "r", encoding="utf-8") as file: - data = yaml.safe_load(file) - logger.debug("The %s file has successfully loaded", config) - except FileNotFoundError as e: - logger.error("Config file not found: %s", e) - sys.exit(1) - except Exception as e: # pylint: disable=broad-exception-caught - logger.error("An error occurred: %s", e) - sys.exit(1) for test in data["tests"]: metadata = get_metadata(test, logger) logger.info("The test %s has started", test["name"]) - match = Matcher(index="perf_scale_ci", level=level) + match = Matcher(index="perf_scale_ci", level=level, ES_URL=ES_URL) uuids = match.get_uuid_by_metadata(metadata) if len(uuids) == 0: print("No UUID present for given metadata") @@ -77,103 +77,12 @@ def orion(config, debug, output): lambda left, right: pd.merge(left, right, on="uuid", how="inner"), dataframe_list, ) - match.save_results(merged_df, csv_file_path=output) - - -def get_metric_data(ids, index, metrics, match, logger): - """Gets details metrics basked on metric yaml list + match.save_results(merged_df, csv_file_path=output.split(".")[0]+"-"+test['name']+".csv") - Args: - ids (list): list of all uuids - index (dict): index in es of where to find data - metrics (dict): metrics to gather data on - match (Matcher): current matcher instance - logger (logger): log data to one output - - Returns: - dataframe_list: dataframe of the all metrics - """ - dataframe_list = [] - for metric in metrics: - metric_name = metric['name'] - logger.info("Collecting %s", metric_name) - metric_of_interest = metric['metric_of_interest'] - - if "agg" in metric.keys(): - try: - cpu = match.get_agg_metric_query( - ids, index, metric - ) - agg_value = metric['agg']['value'] - agg_type = metric['agg']['agg_type'] - agg_name = agg_value + "_" + agg_type - cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name]) - cpu_df = cpu_df.rename( - columns={agg_name: metric_name+ "_" + agg_name} - ) - dataframe_list.append(cpu_df) - logger.debug(cpu_df) - - except Exception as e: # pylint: disable=broad-exception-caught - logger.error( - "Couldn't get agg metrics %s, exception %s", - metric_name, - e, - ) - else: - try: - podl = match.getResults("", ids, index, metric) - podl_df = match.convert_to_df( - podl, columns=["uuid", "timestamp", metric_of_interest] - ) - dataframe_list.append(podl_df) - logger.debug(podl_df) - except Exception as e: # pylint: disable=broad-exception-caught - logger.error( - "Couldn't get metrics %s, exception %s", - metric_name, - e, - ) - return dataframe_list - -def get_metadata(test,logger): - """Gets metadata of the run from each test + if hunter_analyze: + run_hunter_analyze(merged_df,test) - Args: - test (dict): test dictionary - Returns: - dict: dictionary of the metadata - """ - metadata = {} - for k,v in test.items(): - if k in ["metrics","name"]: - continue - metadata[k] = v - metadata["ocpVersion"] = str(metadata["ocpVersion"]) - logger.debug('metadata' + str(metadata)) - return metadata - - -def set_logging(level, logger): - """sets log level and format - - Args: - level (_type_): level of the log - logger (_type_): logger object - - Returns: - logging.Logger: a formatted and level set logger - """ - logger.setLevel(level) - handler = logging.StreamHandler(sys.stdout) - handler.setLevel(level) - formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - ) - handler.setFormatter(formatter) - logger.addHandler(handler) - return logger if __name__ == "__main__": diff --git a/requirements.txt b/requirements.txt index 218fe88..0c7d487 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,13 @@ +hunter @ git+https://github.com/datastax-labs/hunter.git@8ff166979d000780ad548e49f006ef2a15d54123 certifi==2023.11.17 click==8.1.7 elastic-transport==8.11.0 elasticsearch==8.11.1 elasticsearch7==7.13.0 fmatch==0.0.4 -numpy==1.26.3 -pandas==2.1.4 python-dateutil==2.8.2 pytz==2023.3.post1 PyYAML==6.0.1 six==1.16.0 tzdata==2023.4 -urllib3==1.26.18 +urllib3==1.26.18 \ No newline at end of file diff --git a/setup.py b/setup.py index be07410..52fdafc 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ """ setup.py for orion cli tool """ -from setuptools import setup +from setuptools import setup, find_packages setup( name='orion', @@ -17,6 +17,8 @@ 'orion = orion:orion', ], }, + packages=find_packages(), + package_data={'utils': ['utils.py'],'hunter': ['*.py']}, classifiers=[ 'Programming Language :: Python :: 3', 'License :: OSI Approved :: MIT License', diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/orion_funcs.py b/utils/orion_funcs.py new file mode 100644 index 0000000..f0fdd4b --- /dev/null +++ b/utils/orion_funcs.py @@ -0,0 +1,164 @@ +# pylint: disable=cyclic-import +""" +module for all utility functions orion uses +""" +# pylint: disable = import-error + +import logging +import sys + +import yaml +import pandas as pd + +from hunter.report import Report, ReportType +from hunter.series import Metric, Series + + +def run_hunter_analyze(merged_df,test): + """Start hunter analyze function + + Args: + merged_df (Dataframe): merged dataframe of all the metrics + test (dict): test dictionary with the each test information + """ + merged_df["timestamp"] = pd.to_datetime(merged_df["timestamp"]) + merged_df["timestamp"] = merged_df["timestamp"].astype(int) // 10**9 + metrics = {column: Metric(1, 1.0) + for column in merged_df.columns + if column not in ["uuid","timestamp"]} + data = {column: merged_df[column] + for column in merged_df.columns + if column not in ["uuid","timestamp"]} + attributes={column: merged_df[column] for column in merged_df.columns if column in ["uuid"]} + series=Series( + test_name=test["name"], + branch=None, + time=list(merged_df["timestamp"]), + metrics=metrics, + data=data, + attributes=attributes + ) + change_points=series.analyze().change_points_by_time + report=Report(series,change_points) + output = report.produce_report(test_name="test",report_type=ReportType.LOG) + print(output) + +# pylint: disable=too-many-locals +def get_metric_data(ids, index, metrics, match, logger): + """Gets details metrics basked on metric yaml list + + Args: + ids (list): list of all uuids + index (dict): index in es of where to find data + metrics (dict): metrics to gather data on + match (Matcher): current matcher instance + logger (logger): log data to one output + + Returns: + dataframe_list: dataframe of the all metrics + """ + dataframe_list = [] + for metric in metrics: + metric_name = metric['name'] + logger.info("Collecting %s", metric_name) + metric_of_interest = metric['metric_of_interest'] + + if "agg" in metric.keys(): + try: + cpu = match.get_agg_metric_query( + ids, index, metric + ) + agg_value = metric['agg']['value'] + agg_type = metric['agg']['agg_type'] + agg_name = agg_value + "_" + agg_type + cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name]) + cpu_df = cpu_df.rename( + columns={agg_name: metric_name+ "_" + agg_name} + ) + dataframe_list.append(cpu_df) + logger.debug(cpu_df) + + except Exception as e: # pylint: disable=broad-exception-caught + logger.error( + "Couldn't get agg metrics %s, exception %s", + metric_name, + e, + ) + else: + try: + podl = match.getResults("", ids, index, metric) + podl_df = match.convert_to_df( + podl, columns=["uuid", "timestamp", metric_of_interest] + ) + dataframe_list.append(podl_df) + logger.debug(podl_df) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error( + "Couldn't get metrics %s, exception %s", + metric_name, + e, + ) + return dataframe_list + + +def get_metadata(test,logger): + """Gets metadata of the run from each test + + Args: + test (dict): test dictionary + + Returns: + dict: dictionary of the metadata + """ + metadata = {} + for k,v in test.items(): + if k in ["metrics","name"]: + continue + metadata[k] = v + metadata["ocpVersion"] = str(metadata["ocpVersion"]) + logger.debug('metadata' + str(metadata)) + return metadata + + + +def set_logging(level, logger): + """sets log level and format + + Args: + level (_type_): level of the log + logger (_type_): logger object + + Returns: + logging.Logger: a formatted and level set logger + """ + logger.setLevel(level) + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(level) + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + return logger + +def load_config(config,logger): + """Loads config file + + Args: + config (str): path to config file + logger (Logger): logger + + Returns: + dict: dictionary of the config file + """ + try: + with open(config, "r", encoding="utf-8") as file: + data = yaml.safe_load(file) + logger.debug("The %s file has successfully loaded", config) + except FileNotFoundError as e: + logger.error("Config file not found: %s", e) + sys.exit(1) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error("An error occurred: %s", e) + sys.exit(1) + return data