Skip to content

Commit

Permalink
Json output (#21)
Browse files Browse the repository at this point in the history
* added output flag

Signed-off-by: Shashank Reddy Boyapally <[email protected]>

* json output

Signed-off-by: Shashank Reddy Boyapally <[email protected]>

* pylint error fixed and formatted

Signed-off-by: Shashank Reddy Boyapally <[email protected]>

* removed prints

Signed-off-by: Shashank Reddy Boyapally <[email protected]>

* rebased, handled duplicates, changed table column names

Signed-off-by: Shashank Reddy Boyapally <[email protected]>

* rebased

Signed-off-by: Shashank Reddy Boyapally <[email protected]>

* fixed pylint errors

Signed-off-by: Shashank Reddy Boyapally <[email protected]>

---------

Signed-off-by: Shashank Reddy Boyapally <[email protected]>
  • Loading branch information
shashank-boyapally authored May 22, 2024
1 parent 4546fd6 commit daad3af
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 42 deletions.
28 changes: 17 additions & 11 deletions orion.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,16 @@ def cli(max_content_width=120):
@click.option("--uuid", default="", help="UUID to use as base for comparisons")
@click.option("--baseline", default="", help="Baseline UUID(s) to to compare against uuid")
@click.option("--config", default="config.yaml", help="Path to the configuration file")
@click.option("--output", default="output.csv", help="Path to save the output csv file")
@click.option("--output-path", default="output.csv", help="Path to save the output csv file")
@click.option("--debug", is_flag=True, help="log level ")
@click.option("--hunter-analyze",is_flag=True, help="run hunter analyze")
@click.option(
"-o",
"--output",
type=click.Choice(["json", "text"]),
default="text",
help="Choose output format (json or text)",
)
def orion(**kwargs):
"""Orion is the cli tool to detect regressions over the runs
Expand Down Expand Up @@ -67,8 +74,8 @@ def orion(**kwargs):
benchmarkIndex=test['benchmarkIndex']
uuid = kwargs["uuid"]
baseline = kwargs["baseline"]
index = "ospst-perf-scale-ci-*"
match = Matcher(index=index,
fingerprint_index = test["index"]
match = Matcher(index=fingerprint_index,
level=level, ES_URL=ES_URL, verify_certs=False)
if uuid == "":
metadata = orion_funcs.get_metadata(test, logger)
Expand All @@ -86,23 +93,22 @@ def orion(**kwargs):
else:
uuids = [uuid for uuid in re.split(' |,',baseline) if uuid]
uuids.append(uuid)
buildUrls = orion_funcs.get_build_urls(index, uuids,match)
buildUrls = orion_funcs.get_build_urls(fingerprint_index, uuids,match)

index=benchmarkIndex
fingerprint_index=benchmarkIndex
if metadata["benchmark.keyword"] in ["ingress-perf","k8s-netperf"] :
ids = uuids
else:
if baseline == "":
runs = match.match_kube_burner(uuids, index)
runs = match.match_kube_burner(uuids, fingerprint_index)
ids = match.filter_runs(runs, runs)
else:
ids = uuids

metrics = test["metrics"]
dataframe_list = orion_funcs.get_metric_data(ids, index, metrics, match, logger)
dataframe_list = orion_funcs.get_metric_data(ids, fingerprint_index, metrics, match, logger)

for i, df in enumerate(dataframe_list):
if i != 0:
if i != 0 and ('timestamp' in df.columns):
dataframe_list[i] = df.drop(columns=['timestamp'])

merged_df = reduce(
Expand All @@ -113,13 +119,13 @@ def orion(**kwargs):
shortener = pyshorteners.Shortener()
merged_df["buildUrl"] = merged_df["uuid"].apply(
lambda uuid: shortener.tinyurl.short(buildUrls[uuid])) #pylint: disable = cell-var-from-loop
csv_name = kwargs["output"].split(".")[0]+"-"+test['name']+".csv"
csv_name = kwargs["output_path"].split(".")[0]+"-"+test['name']+".csv"
match.save_results(
merged_df, csv_file_path=csv_name
)

if kwargs["hunter_analyze"]:
_ = orion_funcs.run_hunter_analyze(merged_df,test)
orion_funcs.run_hunter_analyze(merged_df,test,kwargs["output"])


if __name__ == "__main__":
Expand Down
106 changes: 75 additions & 31 deletions utils/orion_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
# pylint: disable = import-error

import json
import logging
import sys

Expand All @@ -14,7 +15,7 @@
from hunter.series import Metric, Series


def run_hunter_analyze(merged_df,test):
def run_hunter_analyze(merged_df, test, output):
"""Start hunter analyze function
Args:
Expand All @@ -23,27 +24,71 @@ def run_hunter_analyze(merged_df,test):
"""
merged_df["timestamp"] = pd.to_datetime(merged_df["timestamp"])
merged_df["timestamp"] = merged_df["timestamp"].astype(int) // 10**9
metrics = {column: Metric(1, 1.0)
for column in merged_df.columns
if column not in ["uuid","timestamp","buildUrl"]}
data = {column: merged_df[column]
for column in merged_df.columns
if column not in ["uuid","timestamp","buildUrl"]}
metrics = {
column: Metric(1, 1.0)
for column in merged_df.columns
if column not in ["uuid","timestamp","buildUrl"]
}
data = {
column: merged_df[column]
for column in merged_df.columns
if column not in ["uuid","timestamp","buildUrl"]
}
attributes={column: merged_df[column]
for column in merged_df.columns if column in ["uuid","buildUrl"]}
series=Series(
series = Series(
test_name=test["name"],
branch=None,
time=list(merged_df["timestamp"]),
metrics=metrics,
data=data,
attributes=attributes
attributes=attributes,
)
change_points=series.analyze().change_points_by_time
report=Report(series,change_points)
output = report.produce_report(test_name="test",report_type=ReportType.LOG)
print(output)
return change_points
change_points = series.analyze().change_points_by_time
report = Report(series, change_points)
if output == "text":
output_table = report.produce_report(
test_name="test", report_type=ReportType.LOG
)
print(output_table)
elif output == "json":
change_points_by_metric = series.analyze().change_points
output_json = parse_json_output(merged_df, change_points_by_metric)
print(json.dumps(output_json, indent=4))


def parse_json_output(merged_df, change_points_by_metric):
"""json output generator function
Args:
merged_df (pd.Dataframe): the dataframe to be converted to json
change_points_by_metric (_type_): different change point
Returns:
_type_: _description_
"""
df_json = merged_df.to_json(orient="records")
df_json = json.loads(df_json)

for index, entry in enumerate(df_json):
entry["metrics"] = {
key: {"value": entry.pop(key), "percentage_change": 0}
for key in entry.keys() - {"uuid", "timestamp", "buildUrl"}
}
entry["is_changepoint"] = False

for key in change_points_by_metric.keys():
for change_point in change_points_by_metric[key]:
index = change_point.index
percentage_change = (
(change_point.stats.mean_2 - change_point.stats.mean_1)
/ change_point.stats.mean_1
) * 100
df_json[index]["metrics"][key]["percentage_change"] = percentage_change
df_json[index]["is_changepoint"] = True

return df_json


# pylint: disable=too-many-locals
def get_metric_data(ids, index, metrics, match, logger):
Expand All @@ -61,22 +106,19 @@ def get_metric_data(ids, index, metrics, match, logger):
"""
dataframe_list = []
for metric in metrics:
metric_name = metric['name']
metric_name = metric["name"]
logger.info("Collecting %s", metric_name)
metric_of_interest = metric['metric_of_interest']
metric_of_interest = metric["metric_of_interest"]

if "agg" in metric.keys():
try:
cpu = match.get_agg_metric_query(
ids, index, metric
)
agg_value = metric['agg']['value']
agg_type = metric['agg']['agg_type']
cpu = match.get_agg_metric_query(ids, index, metric)
agg_value = metric["agg"]["value"]
agg_type = metric["agg"]["agg_type"]
agg_name = agg_value + "_" + agg_type
cpu_df = match.convert_to_df(cpu, columns=["uuid","timestamp", agg_name])
cpu_df = cpu_df.rename(
columns={agg_name: metric_name+ "_" + agg_name}
)
cpu_df = match.convert_to_df(cpu, columns=["uuid", "timestamp", agg_name])
cpu_df= cpu_df.drop_duplicates(subset=['uuid'],keep='first')
cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_type})
dataframe_list.append(cpu_df)
logger.debug(cpu_df)

Expand All @@ -92,6 +134,9 @@ def get_metric_data(ids, index, metrics, match, logger):
podl_df = match.convert_to_df(
podl, columns=["uuid", "timestamp", metric_of_interest]
)
podl_df= podl_df.drop_duplicates(subset=['uuid'],keep='first')
podl_df = podl_df.rename(columns={metric_of_interest:
metric_name + "_" + metric_of_interest})
dataframe_list.append(podl_df)
logger.debug(podl_df)
except Exception as e: # pylint: disable=broad-exception-caught
Expand All @@ -103,7 +148,7 @@ def get_metric_data(ids, index, metrics, match, logger):
return dataframe_list


def get_metadata(test,logger):
def get_metadata(test, logger):
"""Gets metadata of the run from each test
Args:
Expand All @@ -112,12 +157,11 @@ def get_metadata(test,logger):
Returns:
dict: dictionary of the metadata
"""
metadata=test['metadata']
metadata = test["metadata"]
metadata["ocpVersion"] = str(metadata["ocpVersion"])
logger.debug('metadata' + str(metadata))
logger.debug("metadata" + str(metadata))
return metadata


def get_build_urls(index, uuids,match):
"""Gets metadata of the run from each test
to get the build url
Expand All @@ -135,7 +179,6 @@ def get_build_urls(index, uuids,match):
buildUrls = {run["uuid"]: run["buildUrl"] for run in test}
return buildUrls


def filter_metadata(uuid,match,logger):
"""Gets metadata of the run from each test
Expand Down Expand Up @@ -202,7 +245,8 @@ def set_logging(level, logger):
logger.addHandler(handler)
return logger

def load_config(config,logger):

def load_config(config, logger):
"""Loads config file
Args:
Expand Down

0 comments on commit daad3af

Please sign in to comment.