cloud-bulldozer · vishnuchalla · Aug 1, 2024 · Jul 15, 2024 · Jul 29, 2024 · Jul 29, 2024
diff --git a/orion.py b/orion.py
@@ -6,6 +6,7 @@
 import logging
 import sys
 import warnings
+from typing import Any
 import click
 import uvicorn
 from fmatch.logrus import SingletonLogger
@@ -26,7 +27,7 @@ class MutuallyExclusiveOption(click.Option):
         click (Option): _description_
     """
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args: tuple, **kwargs: dict[str, dict]) -> None:
         self.mutually_exclusive = set(kwargs.pop("mutually_exclusive", []))
         help = kwargs.get("help", "")  # pylint: disable=redefined-builtin
         if self.mutually_exclusive:
@@ -46,7 +47,7 @@ def handle_parse_result(self, ctx, opts, args):
         return super().handle_parse_result(ctx, opts, args)
 
 
-def validate_anomaly_options(ctx, param, value): # pylint: disable = W0613
+def validate_anomaly_options(ctx, param, value: Any) -> Any: # pylint: disable = W0613
     """ validate options so that can only be used with certain flags
     """
     if value or (
@@ -130,7 +131,7 @@ def cmd_analysis(**kwargs):
 @cli.command(name="daemon")
 @click.option("--debug", default=False, is_flag=True, help="log level")
 @click.option("--port", default=8080, help="set port")
-def rundaemon(debug, port):
+def rundaemon(debug: bool, port: int):
     """
     Orion runs on daemon mode
     \b

diff --git a/pkg/algorithm.py b/pkg/algorithm.py
diff --git a/pkg/algorithms/__init__.py b/pkg/algorithms/__init__.py
@@ -0,0 +1,8 @@
+"""
+Init for pkg module
+"""
+
+from .edivisive.edivisive import EDivisive
+from .isolationforest.isolationForest import IsolationForestWeightedMean
+from .algorithmFactory import AlgorithmFactory
+from .algorithm import Algorithm
diff --git a/pkg/algorithms/algorithm.py b/pkg/algorithms/algorithm.py
@@ -0,0 +1,177 @@
+"""Module for Generic Algorithm class"""
+
+from abc import ABC, abstractmethod
+from itertools import groupby
+import json
+from typing import Any, Dict, List, Tuple, Union
+import pandas as pd
+from fmatch.matcher import Matcher
+from hunter.report import Report, ReportType
+from hunter.series import Series, Metric, ChangePoint, ChangePointGroup
+import pkg.constants as cnsts
+
+
+from pkg.utils import json_to_junit
+
+
+class Algorithm(ABC):
+    """Generic Algorithm class for algorithm factory"""
+
+    def __init__(  # pylint: disable = too-many-arguments
+        self,
+        matcher: Matcher,
+        dataframe: pd.DataFrame,
+        test: dict,
+        options: dict,
+        metrics_config: dict[str, dict],
+    ) -> None:
+        self.matcher = matcher
+        self.dataframe = dataframe
+        self.test = test
+        self.options = options
+        self.metrics_config = metrics_config
+
+    def output_json(self) -> Tuple[str, str]:
+        """Method to output json output
+
+        Returns:
+            Tuple[str, str]: returns test_name and json output
+        """
+        _, change_points_by_metric = self._analyze()
+        dataframe_json = self.dataframe.to_json(orient="records")
+        dataframe_json = json.loads(dataframe_json)
+
+        for index, entry in enumerate(dataframe_json):
+            entry["metrics"] = {
+                key: {"value": entry.pop(key), "percentage_change": 0}
+                for key in self.metrics_config
+            }
+            entry["is_changepoint"] = False
+
+        for key, value in change_points_by_metric.items():
+            for change_point in value:
+                index = change_point.index
+                percentage_change = (
+                    (change_point.stats.mean_2 - change_point.stats.mean_1)
+                    / change_point.stats.mean_1
+                ) * 100
+                if (
+                    percentage_change * self.metrics_config[key]["direction"] > 0
+                    or self.metrics_config[key]["direction"] == 0
+                ):
+                    dataframe_json[index]["metrics"][key][
+                        "percentage_change"
+                    ] = percentage_change
+                    dataframe_json[index]["is_changepoint"] = True
+
+        return self.test["name"], json.dumps(dataframe_json, indent=2)
+
+    def output_text(self) -> Tuple[str,str]:
+        """Outputs the data in text/tabular format"""
+        series, change_points_by_metric = self._analyze()
+        change_points_by_time = self.group_change_points_by_time(
+            series, change_points_by_metric
+        )
+        report = Report(series, change_points_by_time)
+        output_table = report.produce_report(
+            test_name=self.test["name"], report_type=ReportType.LOG
+        )
+        return self.test["name"], output_table
+
+    def output_junit(self) -> Tuple[str,str]:
+        """Output junit format
+
+        Returns:
+            _type_: return
+        """
+        test_name, data_json = self.output_json()
+        data_json = json.loads(data_json)
+        data_junit = json_to_junit(
+            test_name=test_name,
+            data_json=data_json,
+            metrics_config=self.metrics_config,
+            options=self.options,
+        )
+        return test_name, data_junit
+
+    @abstractmethod
+    def _analyze(self):
+        """Analyze algorithm"""
+
+    def group_change_points_by_time(
+        self, series: Series, change_points: Dict[str, List[ChangePoint]]
+    ) -> List[ChangePointGroup]:
+        """Return changepoint by time
+
+        Args:
+            series (Series): Series of data
+            change_points (Dict[str, List[ChangePoint]]): Group of changepoints wrt time
+
+        Returns:
+            List[ChangePointGroup]: _description_
+        """
+        changes: List[ChangePoint] = []
+        for metric in change_points.keys():
+            changes += change_points[metric]
+
+        changes.sort(key=lambda c: c.index)
+        points = []
+        for k, g in groupby(changes, key=lambda c: c.index):
+            cp = ChangePointGroup(
+                index=k,
+                time=series.time[k],
+                prev_time=series.time[k - 1],
+                attributes=series.attributes_at(k),
+                prev_attributes=series.attributes_at(k - 1),
+                changes=list(g),
+            )
+            points.append(cp)
+
+        return points
+
+    def setup_series(self) -> Series:
+        """
+        Returns series
+        Returns:
+            _type_: _description_
+        """
+        metrics = {
+            column: Metric(value.get("direction", 1), 1.0)
+            for column, value in self.metrics_config.items()
+        }
+        data = {column: self.dataframe[column] for column in self.metrics_config}
+        attributes = {
+            column: self.dataframe[column]
+            for column in self.dataframe.columns
+            if column in ["uuid", "buildUrl"]
+        }
+        series = Series(
+            test_name=self.test["name"],
+            branch=None,
+            time=list(self.dataframe["timestamp"]),
+            metrics=metrics,
+            data=data,
+            attributes=attributes,
+        )
+
+        return series
+
+    def output(self, output_format) -> Union[Any,None]:
+        """Method to select output method
+
+        Args:
+            output_format (str): format of the output
+
+        Raises:
+            ValueError: In case of unmatched output
+
+        Returns:
+            method: return method to be used
+        """
+        if output_format == cnsts.JSON:
+            return self.output_json()
+        if output_format == cnsts.TEXT:
+            return self.output_text()
+        if output_format == cnsts.JUNIT:
+            return self.output_junit()
+        raise ValueError("Unsupported output format {output_format} selected")
diff --git a/pkg/algorithmFactory.py → pkg/algorithms/algorithmFactory.py b/pkg/algorithmFactory.py → pkg/algorithms/algorithmFactory.py
@@ -1,14 +1,17 @@
 """
 Algorithm Factory to choose avaiable algorithms
 """
-from pkg.edivisive import EDivisive
-from pkg.isolationForest import IsolationForestWeightedMean
+from fmatch.matcher import Matcher
+import pandas as pd
 import pkg.constants as cnsts
+from .edivisive import EDivisive
+from .isolationforest import IsolationForestWeightedMean
 
-class AlgorithmFactory: # pylint: disable= too-few-public-methods, too-many-arguments
+
+class AlgorithmFactory: # pylint: disable= too-few-public-methods, too-many-arguments, line-too-long
     """Algorithm Factory to choose algorithm
     """
-    def instantiate_algorithm(self, algorithm, matcher, dataframe, test, options, metrics_config):
+    def instantiate_algorithm(self, algorithm: str, matcher: Matcher, dataframe:pd.DataFrame, test: dict, options: dict, metrics_config: dict[str,dict]):
         """Algorithm instantiation method
 
         Args:

diff --git a/pkg/algorithms/edivisive/__init__.py b/pkg/algorithms/edivisive/__init__.py
@@ -0,0 +1,4 @@
+"""
+Init for E-Divisive Algorithm
+"""
+from .edivisive import EDivisive
diff --git a/pkg/algorithms/edivisive/edivisive.py b/pkg/algorithms/edivisive/edivisive.py
@@ -0,0 +1,29 @@
+"""EDivisive Algorithm from hunter"""
+
+# pylint: disable = line-too-long
+import pandas as pd
+from pkg.algorithms.algorithm import Algorithm
+
+
+class EDivisive(Algorithm):
+    """Implementation of the EDivisive algorithm using hunter
+
+    Args:
+        Algorithm (Algorithm): Inherits
+    """
+
+
+    def _analyze(self):
+        self.dataframe["timestamp"] = pd.to_datetime(self.dataframe["timestamp"])
+        self.dataframe["timestamp"] = self.dataframe["timestamp"].astype(int) // 10**9
+        series= self.setup_series()
+        change_points_by_metric = series.analyze().change_points
+
+        # filter by direction
+        for metric, changepoint_list in change_points_by_metric.items():
+            for i in range(len(changepoint_list)-1, -1, -1):
+                if ((self.metrics_config[metric]["direction"] == 1 and changepoint_list[i].stats.mean_1 > changepoint_list[i].stats.mean_2) or
+                    (self.metrics_config[metric]["direction"] == -1 and changepoint_list[i].stats.mean_1 < changepoint_list[i].stats.mean_2) ):
+                    del changepoint_list[i]
+
+        return series, change_points_by_metric
diff --git a/pkg/algorithms/isolationforest/__init__.py b/pkg/algorithms/isolationforest/__init__.py
@@ -0,0 +1,4 @@
+"""
+init for isolation forest
+"""
+from .isolationForest import IsolationForestWeightedMean