Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor #55

Merged
merged 3 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions orion.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging
import sys
import warnings
from typing import Any
import click
import uvicorn
from fmatch.logrus import SingletonLogger
Expand All @@ -26,7 +27,7 @@ class MutuallyExclusiveOption(click.Option):
click (Option): _description_
"""

def __init__(self, *args, **kwargs):
def __init__(self, *args: tuple, **kwargs: dict[str, dict]) -> None:
self.mutually_exclusive = set(kwargs.pop("mutually_exclusive", []))
help = kwargs.get("help", "") # pylint: disable=redefined-builtin
if self.mutually_exclusive:
Expand All @@ -46,7 +47,7 @@ def handle_parse_result(self, ctx, opts, args):
return super().handle_parse_result(ctx, opts, args)


def validate_anomaly_options(ctx, param, value): # pylint: disable = W0613
def validate_anomaly_options(ctx, param, value: Any) -> Any: # pylint: disable = W0613
""" validate options so that can only be used with certain flags
"""
if value or (
Expand Down Expand Up @@ -130,7 +131,7 @@ def cmd_analysis(**kwargs):
@cli.command(name="daemon")
@click.option("--debug", default=False, is_flag=True, help="log level")
@click.option("--port", default=8080, help="set port")
def rundaemon(debug, port):
def rundaemon(debug: bool, port: int):
"""
Orion runs on daemon mode
\b
Expand Down
47 changes: 0 additions & 47 deletions pkg/algorithm.py

This file was deleted.

8 changes: 8 additions & 0 deletions pkg/algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""
Init for pkg module
"""

from .edivisive.edivisive import EDivisive
from .isolationforest.isolationForest import IsolationForestWeightedMean
from .algorithmFactory import AlgorithmFactory
from .algorithm import Algorithm
177 changes: 177 additions & 0 deletions pkg/algorithms/algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
"""Module for Generic Algorithm class"""

from abc import ABC, abstractmethod
from itertools import groupby
import json
from typing import Any, Dict, List, Tuple, Union
import pandas as pd
from fmatch.matcher import Matcher
from hunter.report import Report, ReportType
from hunter.series import Series, Metric, ChangePoint, ChangePointGroup
import pkg.constants as cnsts


from pkg.utils import json_to_junit


class Algorithm(ABC):
"""Generic Algorithm class for algorithm factory"""

def __init__( # pylint: disable = too-many-arguments
self,
matcher: Matcher,
dataframe: pd.DataFrame,
test: dict,
options: dict,
metrics_config: dict[str, dict],
) -> None:
self.matcher = matcher
self.dataframe = dataframe
self.test = test
self.options = options
self.metrics_config = metrics_config

def output_json(self) -> Tuple[str, str]:
"""Method to output json output

Returns:
Tuple[str, str]: returns test_name and json output
"""
_, change_points_by_metric = self._analyze()
dataframe_json = self.dataframe.to_json(orient="records")
dataframe_json = json.loads(dataframe_json)

for index, entry in enumerate(dataframe_json):
entry["metrics"] = {
key: {"value": entry.pop(key), "percentage_change": 0}
for key in self.metrics_config
}
entry["is_changepoint"] = False

for key, value in change_points_by_metric.items():
for change_point in value:
index = change_point.index
percentage_change = (
(change_point.stats.mean_2 - change_point.stats.mean_1)
/ change_point.stats.mean_1
) * 100
if (
percentage_change * self.metrics_config[key]["direction"] > 0
or self.metrics_config[key]["direction"] == 0
):
dataframe_json[index]["metrics"][key][
"percentage_change"
] = percentage_change
dataframe_json[index]["is_changepoint"] = True

return self.test["name"], json.dumps(dataframe_json, indent=2)

def output_text(self) -> Tuple[str,str]:
"""Outputs the data in text/tabular format"""
series, change_points_by_metric = self._analyze()
change_points_by_time = self.group_change_points_by_time(
series, change_points_by_metric
)
report = Report(series, change_points_by_time)
output_table = report.produce_report(
test_name=self.test["name"], report_type=ReportType.LOG
)
return self.test["name"], output_table

def output_junit(self) -> Tuple[str,str]:
"""Output junit format

Returns:
_type_: return
"""
test_name, data_json = self.output_json()
data_json = json.loads(data_json)
data_junit = json_to_junit(
test_name=test_name,
data_json=data_json,
metrics_config=self.metrics_config,
options=self.options,
)
return test_name, data_junit

@abstractmethod
def _analyze(self):
"""Analyze algorithm"""

def group_change_points_by_time(
self, series: Series, change_points: Dict[str, List[ChangePoint]]
) -> List[ChangePointGroup]:
"""Return changepoint by time

Args:
series (Series): Series of data
change_points (Dict[str, List[ChangePoint]]): Group of changepoints wrt time

Returns:
List[ChangePointGroup]: _description_
"""
changes: List[ChangePoint] = []
for metric in change_points.keys():
changes += change_points[metric]

changes.sort(key=lambda c: c.index)
points = []
for k, g in groupby(changes, key=lambda c: c.index):
cp = ChangePointGroup(
index=k,
time=series.time[k],
prev_time=series.time[k - 1],
attributes=series.attributes_at(k),
prev_attributes=series.attributes_at(k - 1),
changes=list(g),
)
points.append(cp)

return points

def setup_series(self) -> Series:
"""
Returns series
Returns:
_type_: _description_
"""
metrics = {
column: Metric(value.get("direction", 1), 1.0)
for column, value in self.metrics_config.items()
}
data = {column: self.dataframe[column] for column in self.metrics_config}
attributes = {
column: self.dataframe[column]
for column in self.dataframe.columns
if column in ["uuid", "buildUrl"]
}
series = Series(
test_name=self.test["name"],
branch=None,
time=list(self.dataframe["timestamp"]),
metrics=metrics,
data=data,
attributes=attributes,
)

return series

def output(self, output_format) -> Union[Any,None]:
"""Method to select output method

Args:
output_format (str): format of the output

Raises:
ValueError: In case of unmatched output

Returns:
method: return method to be used
"""
if output_format == cnsts.JSON:
return self.output_json()
if output_format == cnsts.TEXT:
return self.output_text()
if output_format == cnsts.JUNIT:
return self.output_junit()
raise ValueError("Unsupported output format {output_format} selected")
11 changes: 7 additions & 4 deletions pkg/algorithmFactory.py → pkg/algorithms/algorithmFactory.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
"""
Algorithm Factory to choose avaiable algorithms
"""
from pkg.edivisive import EDivisive
from pkg.isolationForest import IsolationForestWeightedMean
from fmatch.matcher import Matcher
import pandas as pd
import pkg.constants as cnsts
from .edivisive import EDivisive
from .isolationforest import IsolationForestWeightedMean

class AlgorithmFactory: # pylint: disable= too-few-public-methods, too-many-arguments

class AlgorithmFactory: # pylint: disable= too-few-public-methods, too-many-arguments, line-too-long
"""Algorithm Factory to choose algorithm
"""
def instantiate_algorithm(self, algorithm, matcher, dataframe, test, options, metrics_config):
def instantiate_algorithm(self, algorithm: str, matcher: Matcher, dataframe:pd.DataFrame, test: dict, options: dict, metrics_config: dict[str,dict]):
"""Algorithm instantiation method

Args:
Expand Down
4 changes: 4 additions & 0 deletions pkg/algorithms/edivisive/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""
Init for E-Divisive Algorithm
"""
from .edivisive import EDivisive
29 changes: 29 additions & 0 deletions pkg/algorithms/edivisive/edivisive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""EDivisive Algorithm from hunter"""

# pylint: disable = line-too-long
import pandas as pd
from pkg.algorithms.algorithm import Algorithm


class EDivisive(Algorithm):
"""Implementation of the EDivisive algorithm using hunter

Args:
Algorithm (Algorithm): Inherits
"""


def _analyze(self):
self.dataframe["timestamp"] = pd.to_datetime(self.dataframe["timestamp"])
self.dataframe["timestamp"] = self.dataframe["timestamp"].astype(int) // 10**9
series= self.setup_series()
change_points_by_metric = series.analyze().change_points

# filter by direction
for metric, changepoint_list in change_points_by_metric.items():
for i in range(len(changepoint_list)-1, -1, -1):
if ((self.metrics_config[metric]["direction"] == 1 and changepoint_list[i].stats.mean_1 > changepoint_list[i].stats.mean_2) or
(self.metrics_config[metric]["direction"] == -1 and changepoint_list[i].stats.mean_1 < changepoint_list[i].stats.mean_2) ):
del changepoint_list[i]

return series, change_points_by_metric
4 changes: 4 additions & 0 deletions pkg/algorithms/isolationforest/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""
init for isolation forest
"""
from .isolationForest import IsolationForestWeightedMean
Loading
Loading