From fa27298373771bf728da6b6810c8723cf1dfbf20 Mon Sep 17 00:00:00 2001 From: Patrick <55019140+pattplatt@users.noreply.github.com> Date: Fri, 20 Dec 2024 16:47:06 +0100 Subject: [PATCH 1/2] [ENH] Added ROCKAD anomaly detector to aeon (#2376) * Added ROCKAD anomaly detector to aeon * Added ROCKAD to anomaly_detection.rst * Empty commit for CI * Automatic `pre-commit` fixes * Fix newline at end of file * adopted code to fit refactored Rocket arguments * added "capability:multithreading": True to _tags * Catch power transform and disable it if it results in error * Added fallback when power transform fails, added ValueError if number of windows is smaller than n.neighors and other general improvements * Added private attribute for power_transform activation/deactivation * added tests for kneighbors check, adapted univariate and multivariate tests for changes in ROCKAD * Removed pandas, set rocket normalise default to False, use transposed data for power transform to prevent bracket error, set cleaned up code. Set standardize=True on power transform so that StandardScaler can be removed. * Added test for power transform failure * removed transpose, added back user warning when power transform fails, added comments for clarity, other small improvements * removed noop * removed inf_columns_ check * moved parent class init and n_jobs to have consistent structure --------- Co-authored-by: MatthewMiddlehurst Co-authored-by: pattplatt --- aeon/anomaly_detection/__init__.py | 2 + aeon/anomaly_detection/_rockad.py | 262 ++++++++++++++++++++ aeon/anomaly_detection/tests/test_rockad.py | 75 ++++++ docs/api_reference/anomaly_detection.rst | 1 + 4 files changed, 340 insertions(+) create mode 100644 aeon/anomaly_detection/_rockad.py create mode 100644 aeon/anomaly_detection/tests/test_rockad.py diff --git a/aeon/anomaly_detection/__init__.py b/aeon/anomaly_detection/__init__.py index 8ccd9163c3..c1f87846e7 100644 --- a/aeon/anomaly_detection/__init__.py +++ b/aeon/anomaly_detection/__init__.py @@ -10,6 +10,7 @@ "LOF", "MERLIN", "OneClassSVM", + "ROCKAD", "PyODAdapter", "STOMP", "STRAY", @@ -25,5 +26,6 @@ from aeon.anomaly_detection._merlin import MERLIN from aeon.anomaly_detection._one_class_svm import OneClassSVM from aeon.anomaly_detection._pyodadapter import PyODAdapter +from aeon.anomaly_detection._rockad import ROCKAD from aeon.anomaly_detection._stomp import STOMP from aeon.anomaly_detection._stray import STRAY diff --git a/aeon/anomaly_detection/_rockad.py b/aeon/anomaly_detection/_rockad.py new file mode 100644 index 0000000000..603a8732a3 --- /dev/null +++ b/aeon/anomaly_detection/_rockad.py @@ -0,0 +1,262 @@ +"""ROCKAD anomaly detector.""" + +__all__ = ["ROCKAD"] + +import warnings +from typing import Optional + +import numpy as np +from sklearn.neighbors import NearestNeighbors +from sklearn.preprocessing import PowerTransformer +from sklearn.utils import resample + +from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.transformations.collection.convolution_based import Rocket +from aeon.utils.windowing import reverse_windowing, sliding_windows + + +class ROCKAD(BaseAnomalyDetector): + """ + ROCKET-based Anomaly Detector (ROCKAD). + + ROCKAD leverages the ROCKET transformation for feature extraction from + time series data and applies the scikit learn k-nearest neighbors (k-NN) + approach with bootstrap aggregation for robust anomaly detection. + After windowing, the data gets transformed into the ROCKET feature space. + Then the windows are compared based on the feature space by + finding the nearest neighbours. + + This class supports both univariate and multivariate time series and + provides options for normalizing features, applying power transformations, + and customizing the distance metric. + + Parameters + ---------- + n_estimators : int, default=10 + Number of k-NN estimators to use in the bootstrap aggregation. + n_kernels : int, default=100 + Number of kernels to use in the ROCKET transformation. + normalise : bool, default=False + Whether to normalize the ROCKET-transformed features. + n_neighbors : int, default=5 + Number of neighbors to use for the k-NN algorithm. + n_jobs : int, default=1 + Number of parallel jobs to use for the k-NN algorithm and ROCKET transformation. + metric : str, default="euclidean" + Distance metric to use for the k-NN algorithm. + power_transform : bool, default=True + Whether to apply a power transformation (Yeo-Johnson) to the features. + window_size : int, default=10 + Size of the sliding window for segmenting input time series data. + stride : int, default=1 + Step size for moving the sliding window over the time series data. + random_state : int, default=42 + Random seed for reproducibility. + + Attributes + ---------- + rocket_transformer_ : Optional[Rocket] + Instance of the ROCKET transformer used to extract features, set after fitting. + list_baggers_ : Optional[list[NearestNeighbors]] + List containing k-NN estimators used for anomaly scoring, set after fitting. + power_transformer_ : PowerTransformer + Transformer used to apply power transformation to the features. + """ + + _tags = { + "capability:univariate": True, + "capability:multivariate": True, + "capability:missing_values": False, + "capability:multithreading": True, + "fit_is_empty": False, + } + + def __init__( + self, + n_estimators=10, + n_kernels=100, + normalise=False, + n_neighbors=5, + metric="euclidean", + power_transform=True, + window_size: int = 10, + stride: int = 1, + n_jobs=1, + random_state=42, + ): + + self.n_estimators = n_estimators + self.n_kernels = n_kernels + self.normalise = normalise + self.n_neighbors = n_neighbors + self.n_jobs = n_jobs + self.metric = metric + self.power_transform = power_transform + self.window_size = window_size + self.stride = stride + self.random_state = random_state + + self.rocket_transformer_: Optional[Rocket] = None + self.list_baggers_: Optional[list[NearestNeighbors]] = None + self.power_transformer_: Optional[PowerTransformer] = None + + super().__init__(axis=0) + + def _fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "ROCKAD": + self._check_params(X) + # X: (n_timepoints, 1) because __init__(axis==0) + _X, _ = sliding_windows( + X, window_size=self.window_size, stride=self.stride, axis=0 + ) + # _X: (n_windows, window_size) + self._inner_fit(_X) + + return self + + def _check_params(self, X: np.ndarray) -> None: + if self.window_size < 1 or self.window_size > X.shape[0]: + raise ValueError( + "The window size must be at least 1 and at most the length of the " + "time series." + ) + + if self.stride < 1 or self.stride > self.window_size: + raise ValueError( + "The stride must be at least 1 and at most the window size." + ) + + if int((X.shape[0] - self.window_size) / self.stride + 1) < self.n_neighbors: + raise ValueError( + f"Window count ({int((X.shape[0]-self.window_size)/self.stride+1)}) " + f"has to be larger than n_neighbors ({self.n_neighbors})." + "Please choose a smaller n_neighbors value or increase window count " + "by choosing a smaller window size or larger stride." + ) + + def _inner_fit(self, X: np.ndarray) -> None: + + self.rocket_transformer_ = Rocket( + n_kernels=self.n_kernels, + normalise=self.normalise, + n_jobs=self.n_jobs, + random_state=self.random_state, + ) + # X: (n_windows, window_size) + Xt = self.rocket_transformer_.fit_transform(X) + # XT: (n_cases, n_kernels*2) + Xt = Xt.astype(np.float64) + + if self.power_transform: + self.power_transformer_ = PowerTransformer() + try: + Xtp = self.power_transformer_.fit_transform(Xt) + + except Exception: + warnings.warn( + "Power Transform failed and thus has been disabled. " + "Try increasing the window size.", + UserWarning, + stacklevel=2, + ) + self.power_transformer_ = None + Xtp = Xt + else: + Xtp = Xt + + self.list_baggers_ = [] + + for idx_estimator in range(self.n_estimators): + # Initialize estimator + estimator = NearestNeighbors( + n_neighbors=self.n_neighbors, + n_jobs=self.n_jobs, + metric=self.metric, + algorithm="kd_tree", + ) + # Bootstrap Aggregation + Xtp_scaled_sample = resample( + Xtp, + replace=True, + n_samples=None, + random_state=self.random_state + idx_estimator, + stratify=None, + ) + + # Fit estimator and append to estimator list + estimator.fit(Xtp_scaled_sample) + self.list_baggers_.append(estimator) + + def _predict(self, X) -> np.ndarray: + + _X, padding = sliding_windows( + X, window_size=self.window_size, stride=self.stride, axis=0 + ) + + point_anomaly_scores = self._inner_predict(_X, padding) + + return point_anomaly_scores + + def _fit_predict(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: + self._check_params(X) + _X, padding = sliding_windows( + X, window_size=self.window_size, stride=self.stride, axis=0 + ) + + self._inner_fit(_X) + point_anomaly_scores = self._inner_predict(_X, padding) + return point_anomaly_scores + + def _inner_predict(self, X: np.ndarray, padding: int) -> np.ndarray: + + anomaly_scores = self._predict_proba(X) + + point_anomaly_scores = reverse_windowing( + anomaly_scores, self.window_size, np.nanmean, self.stride, padding + ) + + point_anomaly_scores = (point_anomaly_scores - point_anomaly_scores.min()) / ( + point_anomaly_scores.max() - point_anomaly_scores.min() + ) + + return point_anomaly_scores + + def _predict_proba(self, X): + """ + Predicts the probability of anomalies for the input data. + + Parameters + ---------- + X (array-like): The input data. + + Returns + ------- + np.ndarray: The predicted probabilities. + + """ + y_scores = np.zeros((len(X), self.n_estimators)) + # Transform into rocket feature space + Xt = self.rocket_transformer_.transform(X) + + Xt = Xt.astype(np.float64) + + if self.power_transformer_ is not None: + # Power Transform using yeo-johnson + Xtp = self.power_transformer_.transform(Xt) + + else: + Xtp = Xt + + for idx, bagger in enumerate(self.list_baggers_): + # Get scores from each estimator + distances, _ = bagger.kneighbors(Xtp) + + # Compute mean distance of nearest points in window + scores = distances.mean(axis=1).reshape(-1, 1) + scores = scores.squeeze() + + y_scores[:, idx] = scores + + # Average the scores to get the final score for each time series + y_scores = y_scores.mean(axis=1) + + return y_scores diff --git a/aeon/anomaly_detection/tests/test_rockad.py b/aeon/anomaly_detection/tests/test_rockad.py new file mode 100644 index 0000000000..d9d133b9a8 --- /dev/null +++ b/aeon/anomaly_detection/tests/test_rockad.py @@ -0,0 +1,75 @@ +"""Tests for the ROCKAD anomaly detector.""" + +import numpy as np +import pytest +from sklearn.utils import check_random_state + +from aeon.anomaly_detection import ROCKAD + + +def test_rockad_univariate(): + """Test ROCKAD univariate output.""" + rng = check_random_state(seed=2) + series = rng.normal(size=(100,)) + series[50:58] -= 5 + + ad = ROCKAD( + n_estimators=100, + n_kernels=10, + n_neighbors=9, + power_transform=True, + window_size=20, + stride=1, + ) + + pred = ad.fit_predict(series, axis=0) + + assert pred.shape == (100,) + assert pred.dtype == np.float64 + assert 50 <= np.argmax(pred) <= 58 + + +def test_rockad_multivariate(): + """Test ROCKAD multivariate output.""" + rng = check_random_state(seed=2) + series = rng.normal(size=(100, 3)) + series[50:58, 0] -= 5 + series[87:90, 1] += 0.1 + + ad = ROCKAD( + n_estimators=1000, + n_kernels=100, + n_neighbors=20, + power_transform=True, + window_size=10, + stride=1, + ) + + pred = ad.fit_predict(series, axis=0) + + assert pred.shape == (100,) + assert pred.dtype == np.float64 + assert 50 <= np.argmax(pred) <= 58 + + +def test_rockad_incorrect_input(): + """Test ROCKAD incorrect input.""" + rng = check_random_state(seed=2) + series = rng.normal(size=(100,)) + + with pytest.raises(ValueError, match="The window size must be at least 1"): + ad = ROCKAD(window_size=0) + ad.fit_predict(series) + with pytest.raises(ValueError, match="The stride must be at least 1"): + ad = ROCKAD(stride=0) + ad.fit_predict(series) + with pytest.raises( + ValueError, match=r"Window count .* has to be larger than n_neighbors .*" + ): + ad = ROCKAD(stride=1, window_size=100) + ad.fit_predict(series) + with pytest.warns( + UserWarning, match=r"Power Transform failed and thus has been disabled." + ): + ad = ROCKAD(stride=1, window_size=5) + ad.fit_predict(series) diff --git a/docs/api_reference/anomaly_detection.rst b/docs/api_reference/anomaly_detection.rst index 7db535a9be..082c082fc4 100644 --- a/docs/api_reference/anomaly_detection.rst +++ b/docs/api_reference/anomaly_detection.rst @@ -32,6 +32,7 @@ Detectors MERLIN OneClassSVM PyODAdapter + ROCKAD STOMP STRAY From a4b8b3f605c100d9b221c12f0115fe75b24b0be3 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Sat, 21 Dec 2024 18:00:53 +0000 Subject: [PATCH 2/2] [MNT] Swapped tensorflow and pytorch to install only CPU version (#2416) * fix * added periodic to test workflow * swapped files * run all ubuntu versions * remove test file --- .github/actions/cpu_all_extras/action.yml | 35 +++++++++++++++++++++++ .github/workflows/periodic_tests.yml | 21 ++++---------- .github/workflows/pr_examples.yml | 7 ++--- .github/workflows/pr_pytest.yml | 14 +++------ 4 files changed, 47 insertions(+), 30 deletions(-) create mode 100644 .github/actions/cpu_all_extras/action.yml diff --git a/.github/actions/cpu_all_extras/action.yml b/.github/actions/cpu_all_extras/action.yml new file mode 100644 index 0000000000..ff75cd354f --- /dev/null +++ b/.github/actions/cpu_all_extras/action.yml @@ -0,0 +1,35 @@ +name: Pip install all_extras with CPU versions +description: "For CI testing install the CPU version of dependencies with all extras if on ubuntu" + +inputs: + additional_extras: + description: "Comma-separated list of additional extras to install" + required: false + default: "" + +runs: + using: "composite" + steps: + - name: Install CPU TensorFlow + if: runner.os == 'Linux' + uses: nick-fields/retry@v3 + with: + timeout_minutes: 30 + max_attempts: 3 + command: python -m pip install tensorflow-cpu + + - name: Install CPU PyTorch + if: runner.os == 'Linux' + uses: nick-fields/retry@v3 + with: + timeout_minutes: 30 + max_attempts: 3 + command: python -m pip install torch --index-url https://download.pytorch.org/whl/cpu + + - name: Install dependencies + uses: nick-fields/retry@v3 + with: + timeout_minutes: 30 + max_attempts: 3 + command: > + python -m pip install .[all_extras${{ inputs.additional_extras != '' && ',' || '' }}${{ inputs.additional_extras }}] diff --git a/.github/workflows/periodic_tests.yml b/.github/workflows/periodic_tests.yml index 998ec1c887..64e297d68f 100644 --- a/.github/workflows/periodic_tests.yml +++ b/.github/workflows/periodic_tests.yml @@ -65,12 +65,9 @@ jobs: python_version: "3.10" restore_cache: "false" - - name: Install dependencies - uses: nick-fields/retry@v3 + - uses: ./.github/actions/cpu_all_extras with: - timeout_minutes: 30 - max_attempts: 3 - command: python -m pip install .[all_extras,binder,dev] + additional_extras: "dev,binder" - name: Run example notebooks run: .github/utilities/run_examples.sh false @@ -180,12 +177,9 @@ jobs: python_version: ${{ matrix.python-version }} restore_cache: "false" - - name: Install aeon and dependencies - uses: nick-fields/retry@v3 + - uses: ./.github/actions/cpu_all_extras with: - timeout_minutes: 30 - max_attempts: 3 - command: python -m pip install .[all_extras,dev] + additional_extras: "dev" - name: Show dependencies run: python -m pip list @@ -215,12 +209,9 @@ jobs: - name: Disable Numba JIT run: echo "NUMBA_DISABLE_JIT=1" >> $GITHUB_ENV - - name: Install aeon and dependencies - uses: nick-fields/retry@v3 + - uses: ./.github/actions/cpu_all_extras with: - timeout_minutes: 30 - max_attempts: 3 - command: python -m pip install .[all_extras,unstable_extras,dev] + additional_extras: "unstable_extras,dev" - name: Show dependencies run: python -m pip list diff --git a/.github/workflows/pr_examples.yml b/.github/workflows/pr_examples.yml index 5679830cf9..adc266319d 100644 --- a/.github/workflows/pr_examples.yml +++ b/.github/workflows/pr_examples.yml @@ -38,12 +38,9 @@ jobs: runner_os: ${{ runner.os }} python_version: "3.10" - - name: Install aeon and dependencies - uses: nick-fields/retry@v3 + - uses: ./.github/actions/cpu_all_extras with: - timeout_minutes: 30 - max_attempts: 3 - command: python -m pip install .[all_extras,binder,dev] + additional_extras: "dev,binder" - name: Run example notebooks run: .github/utilities/run_examples.sh ${{ github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'full examples run') }} diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml index 4370bf7221..ae1c792243 100644 --- a/.github/workflows/pr_pytest.yml +++ b/.github/workflows/pr_pytest.yml @@ -88,12 +88,9 @@ jobs: runner_os: ${{ runner.os }} python_version: ${{ matrix.python-version }} - - name: Install aeon and dependencies - uses: nick-fields/retry@v3 + - uses: ./.github/actions/cpu_all_extras with: - timeout_minutes: 30 - max_attempts: 3 - command: python -m pip install .[all_extras,dev] + additional_extras: "dev" - name: Show dependencies run: python -m pip list @@ -120,12 +117,9 @@ jobs: - name: Disable Numba JIT run: echo "NUMBA_DISABLE_JIT=1" >> $GITHUB_ENV - - name: Install aeon and dependencies - uses: nick-fields/retry@v3 + - uses: ./.github/actions/cpu_all_extras with: - timeout_minutes: 30 - max_attempts: 3 - command: python -m pip install .[all_extras,unstable_extras,dev] + additional_extras: "unstable_extras,dev" - name: Show dependencies run: python -m pip list