V0.2.0 (#15)

* non determinstic and dep defaults * collection func * example output * example output * examples and non-deterministic tests * test file rename * test file rename * test deterministic * interval bugfix
time-series-machine-learning · Jul 21, 2023 · 3cfb4e2 · 3cfb4e2
1 parent 555b045
commit 3cfb4e2
Show file tree

Hide file tree

Showing 11 changed files with 73 additions and 59 deletions.
diff --git a/tsml/feature_based/_catch22.py b/tsml/feature_based/_catch22.py
@@ -52,7 +52,7 @@ class Catch22Classifier(ClassifierMixin, BaseTimeSeriesEstimator):
         while to process for large values.
     replace_nans : bool, optional, default=True
         Replace NaN or inf values from the Catch22 transform with 0.
-    use_pycatch22 : bool, optional, default=False
+    use_pycatch22 : bool, optional, default=True
         Wraps the C based pycatch22 implementation for tsml.
         (https://github.com/DynamicsAndNeuralSystems/pycatch22). This requires the
         ``pycatch22`` package to be installed if True.
@@ -125,7 +125,7 @@ def __init__(
         catch24=True,
         outlier_norm=False,
         replace_nans=True,
-        use_pycatch22=False,
+        use_pycatch22=True,
         estimator=None,
         random_state=None,
         n_jobs=1,
@@ -385,8 +385,8 @@ class Catch22Regressor(RegressorMixin, BaseTimeSeriesEstimator):
     >>> reg.fit(X, y)
     Catch22Regressor(...)
     >>> reg.predict(X)
-    array([0.42955043, 1.31287811, 1.03757454, 0.68456511, 0.61327938,
-           1.2048977 , 0.56586089, 1.1263876 ])
+    array([0.44505834, 1.28376726, 1.09799075, 0.64209462, 0.59410108,
+           1.1746538 , 0.70590611, 1.13361721])
     """
 
     def __init__(

diff --git a/tsml/feature_based/_fpca.py b/tsml/feature_based/_fpca.py
@@ -74,17 +74,6 @@ class FPCAClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
     --------
     FPCATransformer
     FPCARegressor
-
-    Examples
-    --------
-    >>> from tsml.feature_based import FPCAClassifier
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, y = generate_3d_test_data(n_samples=8, series_length=10, random_state=0)
-    >>> clf = FPCAClassifier(random_state=0, n_components=6)
-    >>> clf.fit(X, y)
-    FPCAClassifier(...)
-    >>> clf.predict(X)
-    array([0, 1, 1, 0, 0, 1, 0, 1])
     """
 
     def __init__(
@@ -302,19 +291,6 @@ class FPCARegressor(RegressorMixin, BaseTimeSeriesEstimator):
     --------
     FPCATransformer
     FPCAClassifier
-
-    Examples
-    --------
-    >>> from tsml.feature_based import FPCARegressor
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, y = generate_3d_test_data(n_samples=8, series_length=10,
-    ...                              regression_target=True, random_state=0)
-    >>> reg = FPCARegressor(random_state=0, n_components=6)
-    >>> reg.fit(X, y)
-    FPCARegressor(...)
-    >>> reg.predict(X)
-    array([0.31804196, 1.4151935 , 1.06572351, 0.68621331, 0.56749254,
-           1.26541066, 0.52730157, 1.09266818])
     """
 
     def __init__(

diff --git a/tsml/hybrid/_rist.py b/tsml/hybrid/_rist.py
@@ -67,6 +67,13 @@ class RISTClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
         A list or tuple of transformers will extract intervals from
         all transformations concatenate the output. Including None in the list or tuple
         will use the series as is for interval extraction.
+    use_pycatch22 : bool, optional, default=True
+        Wraps the C based pycatch22 implementation for aeon.
+        (https://github.com/DynamicsAndNeuralSystems/pycatch22). This requires the
+        ``pycatch22`` package to be installed if True.
+    use_pyfftw : bool, default=True
+        Whether to use the pyfftw library for FFT calculations. Requires the pyfftw
+        package to be installed.
     estimator : sklearn classifier, default=None
         An sklearn estimator to be built using the transformed data. Defaults to an
         ExtraTreesClassifier with 200 trees.
@@ -117,17 +124,17 @@ def __init__(
         n_intervals=None,
         n_shapelets=None,
         series_transformers="default",
-        use_pyfftw=False,
-        use_pycatch22=False,
+        use_pycatch22=True,
+        use_pyfftw=True,
         estimator=None,
         n_jobs=1,
         random_state=None,
     ):
         self.n_intervals = n_intervals
         self.n_shapelets = n_shapelets
         self.series_transformers = series_transformers
-        self.use_pyfftw = use_pyfftw
         self.use_pycatch22 = use_pycatch22
+        self.use_pyfftw = use_pyfftw
         self.estimator = estimator
         self.random_state = random_state
         self.n_jobs = n_jobs
@@ -251,6 +258,7 @@ def predict_proba(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
     def _more_tags(self) -> dict:
         return {
             "optional_dependency": self.use_pycatch22 or self.use_pyfftw,
+            "non_deterministic": True,
         }
 
     @classmethod
@@ -315,6 +323,13 @@ class RISTRegressor(RegressorMixin, BaseTimeSeriesEstimator):
         A list or tuple of transformers will extract intervals from
         all transformations concatenate the output. Including None in the list or tuple
         will use the series as is for interval extraction.
+    use_pycatch22 : bool, optional, default=True
+        Wraps the C based pycatch22 implementation for aeon.
+        (https://github.com/DynamicsAndNeuralSystems/pycatch22). This requires the
+        ``pycatch22`` package to be installed if True.
+    use_pyfftw : bool, default=True
+        Whether to use the pyfftw library for FFT calculations. Requires the pyfftw
+        package to be installed.
     estimator : sklearn classifier, default=None
         An sklearn estimator to be built using the transformed data. Defaults to an
         ExtraTreesRegressor with 200 trees.
@@ -361,17 +376,17 @@ def __init__(
         n_intervals=None,
         n_shapelets=None,
         series_transformers="default",
-        use_pyfftw=False,
-        use_pycatch22=False,
+        use_pycatch22=True,
+        use_pyfftw=True,
         estimator=None,
         n_jobs=1,
         random_state=None,
     ):
         self.n_intervals = n_intervals
         self.n_shapelets = n_shapelets
         self.series_transformers = series_transformers
-        self.use_pyfftw = use_pyfftw
         self.use_pycatch22 = use_pycatch22
+        self.use_pyfftw = use_pyfftw
         self.estimator = estimator
         self.random_state = random_state
         self.n_jobs = n_jobs
@@ -458,6 +473,7 @@ def predict(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
     def _more_tags(self) -> dict:
         return {
             "optional_dependency": self.use_pycatch22 or self.use_pyfftw,
+            "non_deterministic": True,
         }
 
     @classmethod

diff --git a/tsml/shapelet_based/_rdst.py b/tsml/shapelet_based/_rdst.py
@@ -119,8 +119,7 @@ class RDSTClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
     >>> clf = RDSTClassifier(random_state=0)
     >>> clf.fit(X, y)
     RDSTClassifier(...)
-    >>> clf.predict(X)
-    array([0, 1, 1, 0, 0, 1, 0, 1])
+    >>> pred = clf.predict(X)
     """
 
     def __init__(
@@ -292,6 +291,9 @@ def predict_proba(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
                 dists[i, self.class_dictionary_[preds[i]]] = 1
             return dists
 
+    def _more_tags(self) -> dict:
+        return {"non_deterministic": True}
+
     @classmethod
     def get_test_params(
         cls, parameter_set: Union[str, None] = None
@@ -401,9 +403,7 @@ class RDSTRegressor(RegressorMixin, BaseTimeSeriesEstimator):
     >>> reg = RDSTRegressor(random_state=0)
     >>> reg.fit(X, y)
     RDSTRegressor(...)
-    >>> reg.predict(X)
-    array([0.31798367, 1.41426266, 1.06414746, 0.69247204, 0.56660161,
-           1.26538904, 0.52324829, 1.09394045])
+    >>> pred = reg.predict(X)
     """
 
     def __init__(
@@ -516,6 +516,9 @@ def predict(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
 
         return self._estimator.predict(X_t)
 
+    def _more_tags(self) -> dict:
+        return {"non_deterministic": True}
+
     @classmethod
     def get_test_params(
         cls, parameter_set: Union[str, None] = None

diff --git a/tsml/tests/estimator_checks.py → tsml/tests/test_estimator_checks.py b/tsml/tests/estimator_checks.py → tsml/tests/test_estimator_checks.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """Checks for all estimators in tsml."""
 
 __author__ = ["MatthewMiddlehurst"]
@@ -17,7 +16,7 @@
     check_set_params,
 )
 
-import tsml.tests._sklearn_checks as patched_checks
+import tsml.tests.test_estimators_sklearn as patched_checks
 import tsml.utils.testing as test_utils
 from tsml.base import _clone_estimator
 from tsml.utils._tags import _safe_tags
@@ -32,6 +31,7 @@ def _yield_all_time_series_checks(estimator):
         warnings.warn(
             f"Explicit SKIP via _skip_test tag for estimator {name}.",
             SkipTestWarning,
+            stacklevel=2,
         )
         return
 
@@ -56,7 +56,6 @@ def _yield_all_time_series_checks(estimator):
 
 
 def _yield_checks(estimator):
-    """sklearn"""
     tags = _safe_tags(estimator)
 
     yield check_no_attributes_set_in_init
@@ -198,11 +197,11 @@ def check_estimator_input_types(name, estimator_orig):
     # test a single function with this priority
     def _get_func(est):
         if hasattr(est, "predict_proba"):
-            return getattr(est, "predict_proba")
+            return est.predict_proba
         elif hasattr(est, "predict"):
-            return getattr(est, "predict")
+            return est.predict
         elif hasattr(est, "transform"):
-            return getattr(est, "transform")
+            return est.transform
 
     X, y = test_utils.generate_3d_test_data()
     first_result = None
@@ -251,31 +250,37 @@ def _get_func(est):
 
 @ignore_warnings(category=FutureWarning)
 def check_fit3d_predict2d(name, estimator_orig):
+    """Todo."""
     pass
 
 
 @ignore_warnings(category=FutureWarning)
 def check_estimator_cannot_handle_multivariate_data(name, estimator_orig):
+    """Todo."""
     pass
 
 
 @ignore_warnings(category=FutureWarning)
 def check_estimator_handles_multivariate_data(name, estimator_orig):
+    """Todo."""
     pass
 
 
 @ignore_warnings(category=FutureWarning)
 def check_estimator_cannot_handle_unequal_data(name, estimator_orig):
+    """Todo."""
     pass
 
 
 @ignore_warnings(category=FutureWarning)
 def check_estimator_handles_unequal_data(name, estimator_orig):
+    """Todo."""
     pass
 
 
 @ignore_warnings(category=FutureWarning)
 def check_n_features_unequal(name, estimator_orig):
+    """Todo."""
     pass
 
 

diff --git a/tsml/tests/_sklearn_checks.py → tsml/tests/test_estimators_sklearn.py b/tsml/tests/_sklearn_checks.py → tsml/tests/test_estimators_sklearn.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """Patched estimator checks originating from scikit-learn."""
 
 __author__ = ["MatthewMiddlehurst"]
@@ -955,6 +954,7 @@ def check_supervised_y_2d(name, estimator_orig):
         warnings.simplefilter("always", DataConversionWarning)
         warnings.simplefilter("ignore", RuntimeWarning)
         estimator.fit(X, y[:, np.newaxis])
+
     y_pred_2d = estimator.predict(X)
     msg = "expected 1 DataConversionWarning, got: %s" % ", ".join(
         [str(w_x) for w_x in w]
@@ -966,6 +966,9 @@ def check_supervised_y_2d(name, estimator_orig):
         " was passed when a 1d array was expected" in msg
     )
 
+    if _safe_tags(estimator_orig, key="non_deterministic"):
+        raise SkipTest(name + " is non deterministic")
+
     assert_allclose(y_pred.ravel(), y_pred_2d.ravel())
 
 
@@ -1077,6 +1080,10 @@ def check_regressors_int(name, regressor_orig):
     pred1 = regressor_1.predict(X)
     regressor_2.fit(X, y.astype(float))
     pred2 = regressor_2.predict(X)
+
+    if _safe_tags(regressor_orig, key="non_deterministic"):
+        raise SkipTest(name + " is non deterministic")
+
     assert_allclose(pred1, pred2, atol=1e-2, err_msg=name)
 
 
@@ -1223,6 +1230,10 @@ def _check_estimators_data_not_an_array(name, estimator_orig, X, y, obj_type):
     pred1 = estimator_1.predict(X_)
     estimator_2.fit(X, y)
     pred2 = estimator_2.predict(X)
+
+    if _safe_tags(estimator_orig, key="non_deterministic"):
+        raise SkipTest(name + " is non deterministic")
+
     assert_allclose(pred1, pred2, atol=1e-2, err_msg=name)
 
 

diff --git a/tsml/transformations/_catch22.py b/tsml/transformations/_catch22.py
@@ -82,7 +82,7 @@ class Catch22Transformer(TransformerMixin, BaseTimeSeriesEstimator):
         while to process for large values.
     replace_nans : bool, optional, default=False
         Replace NaN or inf values from the Catch22 transform with 0.
-    use_pycatch22 : bool, optional, default=False
+    use_pycatch22 : bool, optional, default=True
         Wraps the C based pycatch22 implementation for tsml.
         (https://github.com/DynamicsAndNeuralSystems/pycatch22). This requires the
         ``pycatch22`` package to be installed if True.
@@ -126,12 +126,12 @@ class Catch22Transformer(TransformerMixin, BaseTimeSeriesEstimator):
     >>> tnf.fit(X)
     Catch22Transformer(...)
     >>> print(tnf.transform(X)[0])
-    [1.15639532e+00 1.31700575e+00 3.00000000e+00 2.00000000e-01
-     0.00000000e+00 1.00000000e+00 2.00000000e+00 1.10933565e-32
-     1.96349541e+00 5.10744398e-01 2.33853577e-01 3.89048349e-01
-     2.00000000e+00 1.00000000e+00 4.00000000e+00 1.88915916e+00
-     1.00000000e+00 1.70859420e-01 0.00000000e+00 0.00000000e+00
-     2.46913580e-02 0.00000000e+00]
+    [6.27596874e-02 3.53871087e-01 4.00000000e+00 7.00000000e-01
+     2.00000000e-01 5.66227710e-01 2.00000000e+00 3.08148791e-34
+     1.96349541e+00 9.99913411e-01 1.39251594e+00 3.89048349e-01
+     2.00000000e+00 1.00000000e+00 3.00000000e+00 2.04319187e+00
+     1.00000000e+00 2.44474814e-01 0.00000000e+00 0.00000000e+00
+     8.23045267e-03 0.00000000e+00]
     """
 
     def __init__(
@@ -140,7 +140,7 @@ def __init__(
         catch24=False,
         outlier_norm=False,
         replace_nans=False,
-        use_pycatch22=False,
+        use_pycatch22=True,
         n_jobs=1,
         parallel_backend=None,
     ):

diff --git a/tsml/transformations/_interval_extraction.py b/tsml/transformations/_interval_extraction.py
@@ -465,7 +465,7 @@ def set_features_to_transform(self, arr, raise_error=True):
             else:
                 length += 1
 
-        if len(arr) != length * self.n_intervals or not all(
+        if len(arr) != length * self.n_intervals_ or not all(
             isinstance(b, bool) for b in arr
         ):
             if raise_error:

diff --git a/tsml/transformations/_shapelet_transform.py b/tsml/transformations/_shapelet_transform.py
@@ -1004,7 +1004,10 @@ def _check_input_params(self):
             self.threshold_percentiles_ = np.asarray(self.threshold_percentiles_)
 
     def _more_tags(self) -> dict:
-        return {"requires_y": True}
+        return {
+            "requires_y": True,
+            "non_deterministic": True,
+        }
 
     @classmethod
     def get_test_params(cls, parameter_set="default"):

diff --git a/tsml/transformations/tests/test_interval_extraction.py b/tsml/transformations/tests/test_interval_extraction.py
@@ -53,4 +53,4 @@ def test_supervised_transformers():
     )
     X_t = sit.fit_transform(X, y)
 
-    assert X_t.shape == (X.shape[0], 7)
+    assert X_t.shape == (X.shape[0], 8)