diff --git a/docs/Tutorial_AB_Joins.ipynb b/docs/Tutorial_AB_Joins.ipynb index a5c4cbe18..4401f48fa 100644 --- a/docs/Tutorial_AB_Joins.ipynb +++ b/docs/Tutorial_AB_Joins.ipynb @@ -289,6 +289,25 @@ "print(f'The motif is located at index {vanilla_ice_motif_index} of \"Ice Ice Baby\"')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[comment]: <> (Myst)\n", + ":::{admonition} **Added after STUMPY version 1.12.0**\n", + ":class: note\n", + "\n", + "In place of array slicing (i.e., `mp[:, 0]`, `mp[:, 1]`), the matrix profile distances can be accessed directly through the `P_` attribute and the matrix profile indices can be accessed through the `I_` attribute:\n", + "\n", + "```\n", + "mp = stumpy.stump(T, m)\n", + "print(mp.P_, mp.I_) # print the matrix profile and the matrix profile indices \n", + "```\n", + "\n", + "Additionally, the left and right matrix profile indices can also be accessed through the `left_I_` and `right_I_` attributes, respectively.\n", + ":::" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -366,7 +385,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/docs/Tutorial_Fast_Approximate_Matrix_Profiles.ipynb b/docs/Tutorial_Fast_Approximate_Matrix_Profiles.ipynb index 6633e10f6..acdc7792c 100644 --- a/docs/Tutorial_Fast_Approximate_Matrix_Profiles.ipynb +++ b/docs/Tutorial_Fast_Approximate_Matrix_Profiles.ipynb @@ -257,6 +257,23 @@ " plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[comment]: <> (Myst)\n", + ":::{admonition} **Added after STUMPY version 1.12.0**\n", + ":class: note\n", + "\n", + "In place of array slicing (i.e., `mp[:, 0]`, `mp[:, 1]`), the matrix profile distances can be accessed directly through the `P_` attribute and the matrix profile indices can be accessed through the `I_` attribute:\n", + "\n", + "```\n", + "mp = stumpy.stump(T, m)\n", + "print(mp.P_, mp.I_) # print the matrix profile and the matrix profile indices \n", + "```\n", + ":::" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -486,7 +503,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/docs/Tutorial_Matrix_Profiles_For_Streaming_Data.ipynb b/docs/Tutorial_Matrix_Profiles_For_Streaming_Data.ipynb index 9ddf94f25..a8e8b3026 100644 --- a/docs/Tutorial_Matrix_Profiles_For_Streaming_Data.ipynb +++ b/docs/Tutorial_Matrix_Profiles_For_Streaming_Data.ipynb @@ -191,6 +191,25 @@ "I_full = mp[:, 1]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[comment]: <> (Myst)\n", + ":::{admonition} **Added after STUMPY version 1.12.0**\n", + ":class: note\n", + "\n", + "In place of array slicing (i.e., `mp[:, 0]`, `mp[:, 1]`), the matrix profile distances can be accessed directly through the `P_` attribute and the matrix profile indices can be accessed through the `I_` attribute:\n", + "\n", + "```\n", + "mp = stumpy.stump(T, m)\n", + "print(mp.P_, mp.I_) # print the matrix profile and the matrix profile indices \n", + "```\n", + "\n", + "Additionally, the left and right matrix profile indices can also be accessed through the `left_I_` and `right_I_` attributes, respectively.\n", + ":::" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -166458,7 +166477,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/docs/Tutorial_STUMPY_Basics.ipynb b/docs/Tutorial_STUMPY_Basics.ipynb index 013af775e..e2a199192 100644 --- a/docs/Tutorial_STUMPY_Basics.ipynb +++ b/docs/Tutorial_STUMPY_Basics.ipynb @@ -392,6 +392,25 @@ "mp[motif_idx, 0]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[comment]: <> (Myst)\n", + ":::{admonition} **Added after STUMPY version 1.12.0**\n", + ":class: note\n", + "\n", + "In place of array slicing (i.e., `mp[:, 0]`, `mp[:, 1]`), the matrix profile distances can be accessed directly through the `P_` attribute and the matrix profile indices can be accessed through the `I_` attribute:\n", + "\n", + "```\n", + "mp = stumpy.stump(T, m)\n", + "print(mp.P_, mp.I_) # print the matrix profile and the matrix profile indices \n", + "```\n", + "\n", + "Additionally, the left and right matrix profile indices can also be accessed through the `left_I_` and `right_I_` attributes, respectively.\n", + ":::" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -985,7 +1004,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/docs/Tutorial_Semantic_Segmentation.ipynb b/docs/Tutorial_Semantic_Segmentation.ipynb index aeece3f82..8cc714b7c 100644 --- a/docs/Tutorial_Semantic_Segmentation.ipynb +++ b/docs/Tutorial_Semantic_Segmentation.ipynb @@ -320,6 +320,25 @@ "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[comment]: <> (Myst)\n", + ":::{admonition} **Added after STUMPY version 1.12.0**\n", + ":class: note\n", + "\n", + "In place of array slicing (i.e., `mp[:, 0]`, `mp[:, 1]`), the matrix profile distances can be accessed directly through the `P_` attribute and the matrix profile indices can be accessed through the `I_` attribute:\n", + "\n", + "```\n", + "mp = stumpy.stump(T, m)\n", + "print(mp.P_, mp.I_) # print the matrix profile and the matrix profile indices \n", + "```\n", + "\n", + "Additionally, the left and right matrix profile indices can also be accessed through the `left_I_` and `right_I_` attributes, respectively.\n", + ":::" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -373,6 +392,21 @@ "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "Starting after STUMPY version 1.12.0, in place of `mp[:, 2]` and `mp[:, 3]`, you can access the left and right matrix profile indices through the `left_I_` and `right_I_` attributes, respectively:\n", + "

\n", + "\n", + "```\n", + "mp = stumpy.stump(T, m)\n", + "print(mp.left_I_, mp.right_I_) # print the left and right matrix profile indices \n", + "```\n", + "
" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -56949,7 +56983,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/docs/Tutorial_Time_Series_Chains.ipynb b/docs/Tutorial_Time_Series_Chains.ipynb index e58e4b0ed..66b8ba77f 100644 --- a/docs/Tutorial_Time_Series_Chains.ipynb +++ b/docs/Tutorial_Time_Series_Chains.ipynb @@ -467,6 +467,25 @@ "all_chain_set, unanchored_chain = stumpy.allc(mp[:, 2], mp[:, 3])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[comment]: <> (Myst)\n", + ":::{admonition} **Added after STUMPY version 1.12.0**\n", + ":class: note\n", + "\n", + "In place of array slicing (i.e., `mp[:, 2]`, `mp[:, 3]`), the left and right matrix profile indices can also be accessed through the `left_I_` and `right_I_` attributes, respectively.\n", + "\n", + "```\n", + "mp = stumpy.stump(T, m)\n", + "print(mp.left_I_, mp.right_I_) # print the left and right matrix profile indices \n", + "```\n", + "\n", + "Additionally, you can also access the matrix profile distances directly through the `P_` attribute and the matrix profile indices can be accessed through the `I_` attribute.\n", + ":::" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -586,7 +605,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/docs/conf.py b/docs/conf.py index f5e6681ca..f34e70442 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -52,8 +52,8 @@ "sphinx.ext.intersphinx", "sphinx.ext.mathjax", "sphinx.ext.viewcode", - "nbsphinx", "numpydoc", + "myst_nb", ] # Add any paths that contain templates here, relative to this directory. @@ -63,7 +63,11 @@ # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = ".rst" +source_suffix = [ + ".rst", + ".md", + ".ipynb", +] # The master toctree document. master_doc = "index" @@ -214,7 +218,11 @@ # -- Options for intersphinx extension --------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {"https://docs.python.org/": None} +intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} # Napoleon settings (see https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html#configuration) # noqa: E501 napoleon_numpy_docstring = True + +myst_enable_extensions = [ + "colon_fence", +] diff --git a/docs/images/performance.png b/docs/images/performance.png index c23e6616f..f2d6d0003 100644 Binary files a/docs/images/performance.png and b/docs/images/performance.png differ diff --git a/docs/requirements.txt b/docs/requirements.txt index fba61ac6e..db9817084 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,5 +1,5 @@ sphinx>=3.5.3 -nbsphinx pydata-sphinx-theme ipython numpydoc +myst-nb diff --git a/docs/setup.sh b/docs/setup.sh index 57c20113b..f935c8bcc 100755 --- a/docs/setup.sh +++ b/docs/setup.sh @@ -1,4 +1,5 @@ #!/bin/bash make html +#sphinx-build -nW --keep-going -b html . ./_build/html python -m http.server diff --git a/environment.yml b/environment.yml index a0c11353b..367420178 100644 --- a/environment.yml +++ b/environment.yml @@ -17,10 +17,11 @@ dependencies: - lxml>=4.5.2 - twine>=3.2.0 - sphinx>=3.5.3 - - nbsphinx>=0.8.1 - pydata-sphinx-theme>=0.5.2 - scikit-learn>=0.21.3 - numpydoc>=1.1.0 - build>=0.7.0 - pytest-check-links>=0.7.1 - isort>=5.11.0 + - jupyterlab-myst>=2.0.0 + - myst-nb>=1.0.0 diff --git a/stumpy/aamp.py b/stumpy/aamp.py index 6c78fe92d..edc2da7af 100644 --- a/stumpy/aamp.py +++ b/stumpy/aamp.py @@ -7,6 +7,7 @@ from numba import njit, prange from . import config, core +from .mparray import mparray @njit( @@ -378,6 +379,12 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively. + For convenience, the matrix profile (distances) and matrix profile indices can + also be accessed via their corresponding named array attributes, `.P_` and + `.I_`,respectively. Similarly, the corresponding left matrix profile indices + and right matrix profile indices may also be accessed via the `.left_I_` and + `.right_I_` array attributes. + Notes ----- `arXiv:1901.05708 \ @@ -431,4 +438,4 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): core._check_P(out[:, 0]) - return out + return mparray(out, m, k, config.STUMPY_EXCL_ZONE_DENOM) diff --git a/stumpy/aamped.py b/stumpy/aamped.py index fbbe365b0..5eb7f9d56 100644 --- a/stumpy/aamped.py +++ b/stumpy/aamped.py @@ -7,6 +7,7 @@ from . import config, core from .aamp import _aamp +from .mparray import mparray def _dask_aamped( @@ -212,6 +213,12 @@ def aamped(client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively. + For convenience, the matrix profile (distances) and matrix profile indices can + also be accessed via their corresponding named array attributes, `.P_` and + `.I_`,respectively. Similarly, the corresponding left matrix profile indices + and right matrix profile indices may also be accessed via the `.left_I_` and + `.right_I_` array attributes. + Notes ----- `arXiv:1901.05708 \ @@ -264,4 +271,4 @@ def aamped(client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): core._check_P(out[:, 0]) - return out + return mparray(out, m, k, config.STUMPY_EXCL_ZONE_DENOM) diff --git a/stumpy/chains.py b/stumpy/chains.py index ae96a7def..e2cd215d9 100644 --- a/stumpy/chains.py +++ b/stumpy/chains.py @@ -52,6 +52,11 @@ def atsc(IL, IR, j): >>> mp = stumpy.stump(np.array([584., -11., 23., 79., 1001., 0., -19.]), m=3) >>> stumpy.atsc(mp[:, 2], mp[:, 3], 1) array([1, 3]) + + # Alternative example using named attributes + >>> mp = stumpy.stump(np.array([584., -11., 23., 79., 1001., 0., -19.]), m=3) + >>> stumpy.atsc(mp.left_I_, mp.right_I_, 1) + array([1, 3]) """ C = deque([j]) for i in range(IL.size): @@ -118,6 +123,11 @@ def allc(IL, IR): >>> mp = stumpy.stump(np.array([584., -11., 23., 79., 1001., 0., -19.]), m=3) >>> stumpy.allc(mp[:, 2], mp[:, 3]) ([array([1, 3]), array([2]), array([0, 4])], array([0, 4])) + + # Alternative example using named attributes + >>> mp = stumpy.stump(np.array([584., -11., 23., 79., 1001., 0., -19.]), m=3) + >>> stumpy.allc(mp.left_I_, mp.right_I_) + ([array([1, 3]), array([2]), array([0, 4])], array([0, 4])) """ L = np.ones(IL.size, dtype=np.int64) S = set() # type: ignore diff --git a/stumpy/floss.py b/stumpy/floss.py index 7a7ea6720..dbd1bd449 100644 --- a/stumpy/floss.py +++ b/stumpy/floss.py @@ -290,6 +290,11 @@ def fluss(I, L, n_regimes, excl_factor=5, custom_iac=None): >>> mp = stumpy.stump(np.array([584., -11., 23., 79., 1001., 0., -19.]), m=3) >>> stumpy.fluss(mp[:, 0], 3, 2) (array([1., 1., 1., 1., 1.]), array([0])) + + # Alternative example using named attributes + >>> mp = stumpy.stump(np.array([584., -11., 23., 79., 1001., 0., -19.]), m=3) + >>> stumpy.fluss(mp.P_, 3, 2) + (array([1., 1., 1., 1., 1.]), array([0])) """ cac = _cac(I, L, bidirectional=True, excl_factor=excl_factor, custom_iac=custom_iac) regime_locs = _rea(cac, n_regimes, L, excl_factor=excl_factor) diff --git a/stumpy/gpu_aamp.py b/stumpy/gpu_aamp.py index 68d2c836b..a4708a5fc 100644 --- a/stumpy/gpu_aamp.py +++ b/stumpy/gpu_aamp.py @@ -9,6 +9,7 @@ from numba import cuda from . import config, core +from .mparray import mparray @cuda.jit( @@ -496,6 +497,12 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0, k=1): equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively. + For convenience, the matrix profile (distances) and matrix profile indices can + also be accessed via their corresponding named array attributes, `.P_` and + `.I_`,respectively. Similarly, the corresponding left matrix profile indices + and right matrix profile indices may also be accessed via the `.left_I_` and + `.right_I_` array attributes. + Notes ----- `arXiv:1901.05708 \ @@ -706,4 +713,4 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0, k=1): core._check_P(out[:, 0]) - return out + return mparray(out, m, k, config.STUMPY_EXCL_ZONE_DENOM) diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py index 7f103b7ee..8f3f4336e 100644 --- a/stumpy/gpu_stump.py +++ b/stumpy/gpu_stump.py @@ -10,6 +10,7 @@ from . import config, core from .gpu_aamp import gpu_aamp +from .mparray import mparray @cuda.jit( @@ -573,6 +574,12 @@ def gpu_stump( equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively. + For convenience, the matrix profile (distances) and matrix profile indices can + also be accessed via their corresponding named array attributes, `.P_` and + `.I_`,respectively. Similarly, the corresponding left matrix profile indices + and right matrix profile indices may also be accessed via the `.left_I_` and + `.right_I_` array attributes. See examples below. + See Also -------- stumpy.stump : Compute the z-normalized matrix profile @@ -615,15 +622,20 @@ def gpu_stump( >>> from numba import cuda >>> if __name__ == "__main__": ... all_gpu_devices = [device.id for device in cuda.list_devices()] - ... stumpy.gpu_stump( + ... mp = stumpy.gpu_stump( ... np.array([584., -11., 23., 79., 1001., 0., -19.]), ... m=3, ... device_id=all_gpu_devices) - array([[0.11633857113691416, 4, -1, 4], - [2.694073918063438, 3, -1, 3], - [3.0000926340485923, 0, 0, 4], - [2.694073918063438, 1, 1, -1], - [0.11633857113691416, 0, 0, -1]], dtype=object) + >>> mp + mparray([[0.11633857113691416, 4, -1, 4], + [2.694073918063438, 3, -1, 3], + [3.0000926340485923, 0, 0, 4], + [2.694073918063438, 1, 1, -1], + [0.11633857113691416, 0, 0, -1]], dtype=object) + >>> mp.P_ + mparray([0.11633857, 2.69407392, 3.00009263, 2.69407392, 0.11633857]) + >>> mp.I_ + mparray([4, 3, 0, 1, 0]) """ if T_B is None: # Self join! T_B = T_A @@ -838,4 +850,4 @@ def gpu_stump( core._check_P(out[:, 0]) - return out + return mparray(out, m, k, config.STUMPY_EXCL_ZONE_DENOM) diff --git a/stumpy/motifs.py b/stumpy/motifs.py index 790bc83a9..69decdd7c 100644 --- a/stumpy/motifs.py +++ b/stumpy/motifs.py @@ -298,6 +298,14 @@ def motifs( ... mp[:, 0], ... max_distance=2.0) (array([[0. , 0.11633857]]), array([[0, 4]])) + + # Alternative example using named attributes + >>> mp = stumpy.stump(np.array([584., -11., 23., 79., 1001., 0., -19.]), m=3) + >>> stumpy.motifs( + ... np.array([584., -11., 23., 79., 1001., 0., -19.]), + ... mp.P_, + ... max_distance=2.0) + (array([[0. , 0.11633857]]), array([[0, 4]])) """ T = core._preprocess(T) diff --git a/stumpy/mparray.py b/stumpy/mparray.py new file mode 100644 index 000000000..6b8d89794 --- /dev/null +++ b/stumpy/mparray.py @@ -0,0 +1,192 @@ +import numpy as np + + +class mparray(np.ndarray): + """ + A matrix profile convenience class that subclasses the numpy ndarray + + Parameters + ---------- + cls : class + The base class + + input_array : ndarray + The input `numpy` array to be subclassed + + m : int + Window size + + k : int + The number of top `k` smallest distances used to construct the + matrix profile. + + excl_zone_denom : int + The denominator used in computing the exclusion zone + + Attributes + ---------- + P_ : numpy.ndarray + The (top-k) matrix profile for `T`. When `k=1`, the first + (and only) column in this 2D array, which consists of the matrix profile, + is returned. When `k > 1`, the output has exactly `k` columns consisting of + the top-k matrix profile. + + I_ : numpy.ndarray + The(top-k) matrix profile indices for `T`. When `k=1`, the first + (and only) column in this 2D array, which consists of the matrix profile, + indices is returned. When `k > 1`, the output has exactly `k` columns + consisting of the top-k matrix profile indices. + + left_I_ : numpy.ndarray + The left (top-1) matrix profile indices for `T` + + right_I_ : numpy.ndarray + The right (top-1) matrix profile indices for `T` + """ + + def __new__(cls, input_array, m, k, excl_zone_denom): + """ + Create the ndarray instance of our type, given the usual + ndarray input arguments. This will call the standard + ndarray constructor, but return an object of our type. + It also triggers a call mparray.__array_finalize__ + + Parameters + ---------- + cls : class + The base class + + input_array : ndarray + The input `numpy` array to be subclassed + + m : int + Window size + + k : int + The number of top `k` smallest distances used to construct the + matrix profile + + excl_zone_denom : int + The denominator used in computing the exclusion zone + """ + obj = np.asarray(input_array).view(cls) + obj._m = m + obj._k = k + obj._excl_zone_denom = excl_zone_denom + # All new attributes will also need to be added to the `__array_finalize__` + # function below so that "new-from-template" objects (e.g., an array slice) + # will also contain the same new attributes + return obj + + def __array_finalize__(self, obj): + """ + Finalize the array + + Parameters + ---------- + obj : object + This is the class object + """ + if obj is None: # pragma: no cover + return + # The lines below ensure that child objects that are created from a slice + # of an `mparray` will also inherit the attributes from the parent `mparray` + self._m = getattr(obj, "_m", None) + self._k = getattr(obj, "_k", None) + self._excl_zone_denom = getattr(obj, "_excl_zone_denom", None) + + def _P(self): + """ + Matrix profile values + + Parameters + ---------- + None + """ + if self._k == 1: + return self[:, : self._k].flatten().astype(np.float64) + else: + return self[:, : self._k].astype(np.float64) + + def _I(self): + """ + Nearest neighbor indices + + Parameters + ---------- + None + """ + if self._k == 1: + return self[:, self._k : 2 * self._k].flatten().astype(np.int64) + else: + return self[:, self._k : 2 * self._k].astype(np.int64) + + def _left_I(self): + """ + Left nearest neighbor indices + + Parameters + ---------- + None + """ + if self._k == 1: + return self[:, 2 * self._k].flatten().astype(np.int64) + else: + return self[:, 2 * self._k].astype(np.int64) + + def _right_I(self): + """ + Right nearest neighbor indices + + Parameters + ---------- + None + """ + if self._k == 1: + return self[:, 2 * self._k + 1].flatten().astype(np.int64) + else: + return self[:, 2 * self._k + 1].astype(np.int64) + + @property + def P_(self): + """ + Matrix profile values + + Parameters + ---------- + None + """ + return self._P() + + @property + def I_(self): + """ + Nearest neighbor indices + + Parameters + ---------- + None + """ + return self._I() + + @property + def left_I_(self): + """ + Left nearest neighbor indices + + Parameters + ---------- + None + """ + return self._left_I() + + @property + def right_I_(self): + """ + Right nearest neighbor indices + + Parameters + ---------- + None + """ + return self._right_I() diff --git a/stumpy/stump.py b/stumpy/stump.py index dba9abcf5..677237c4b 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -8,6 +8,7 @@ from . import config, core from .aamp import aamp +from .mparray import mparray @njit( @@ -595,6 +596,12 @@ def stump( equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively. + For convenience, the matrix profile (distances) and matrix profile indices can + also be accessed via their corresponding named array attributes, `.P_` and + `.I_`,respectively. Similarly, the corresponding left matrix profile indices + and right matrix profile indices may also be accessed via the `.left_I_` and + `.right_I_` array attributes. See examples below. + See Also -------- stumpy.stumped : Compute the z-normalized matrix profile with a distributed dask @@ -652,12 +659,17 @@ def stump( -------- >>> import stumpy >>> import numpy as np - >>> stumpy.stump(np.array([584., -11., 23., 79., 1001., 0., -19.]), m=3) - array([[0.11633857113691416, 4, -1, 4], - [2.694073918063438, 3, -1, 3], - [3.0000926340485923, 0, 0, 4], - [2.694073918063438, 1, 1, -1], - [0.11633857113691416, 0, 0, -1]], dtype=object) + >>> mp = stumpy.stump(np.array([584., -11., 23., 79., 1001., 0., -19.]), m=3) + >>> mp + mparray([[0.11633857113691416, 4, -1, 4], + [2.694073918063438, 3, -1, 3], + [3.0000926340485923, 0, 0, 4], + [2.694073918063438, 1, 1, -1], + [0.11633857113691416, 0, 0, -1]], dtype=object) + >>> mp.P_ + mparray([0.11633857, 2.69407392, 3.00009263, 2.69407392, 0.11633857]) + >>> mp.I_ + mparray([4, 3, 0, 1, 0]) """ if T_B is None: ignore_trivial = True @@ -734,4 +746,4 @@ def stump( core._check_P(out[:, 0]) - return out + return mparray(out, m, k, config.STUMPY_EXCL_ZONE_DENOM) diff --git a/stumpy/stumped.py b/stumpy/stumped.py index 2fff6959b..a89adafe4 100644 --- a/stumpy/stumped.py +++ b/stumpy/stumped.py @@ -7,6 +7,7 @@ from . import config, core from .aamped import aamped +from .mparray import mparray from .stump import _stump @@ -298,6 +299,12 @@ def stumped( equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively. + For convenience, the matrix profile (distances) and matrix profile indices can + also be accessed via their corresponding named array attributes, `.P_` and + `.I_`,respectively. Similarly, the corresponding left matrix profile indices + and right matrix profile indices may also be accessed via the `.left_I_` and + `.right_I_` array attributes. See examples below. + See Also -------- stumpy.stump : Compute the z-normalized matrix profile @@ -362,15 +369,19 @@ def stumped( >>> from dask.distributed import Client >>> if __name__ == "__main__": ... with Client() as dask_client: - ... stumpy.stumped( + ... mp = stumpy.stumped( ... dask_client, ... np.array([584., -11., 23., 79., 1001., 0., -19.]), ... m=3) - array([[0.11633857113691416, 4, -1, 4], - [2.694073918063438, 3, -1, 3], - [3.0000926340485923, 0, 0, 4], - [2.694073918063438, 1, 1, -1], - [0.11633857113691416, 0, 0, -1]], dtype=object) + mparray([[0.11633857113691416, 4, -1, 4], + [2.694073918063438, 3, -1, 3], + [3.0000926340485923, 0, 0, 4], + [2.694073918063438, 1, 1, -1], + [0.11633857113691416, 0, 0, -1]], dtype=object) + >>> mp.P_ + mparray([0.11633857, 2.69407392, 3.00009263, 2.69407392, 0.11633857]) + >>> mp.I_ + mparray([4, 3, 0, 1, 0]) """ if T_B is None: T_B = T_A @@ -444,4 +455,4 @@ def stumped( core._check_P(out[:, 0]) - return out + return mparray(out, m, k, config.STUMPY_EXCL_ZONE_DENOM) diff --git a/tests/test_mparray.py b/tests/test_mparray.py new file mode 100644 index 000000000..4f8ff3926 --- /dev/null +++ b/tests/test_mparray.py @@ -0,0 +1,114 @@ +import naive +import numpy as np +import numpy.testing as npt +import pandas as pd +import pytest + +from stumpy import aamp, config, stump +from stumpy.mparray import mparray + +test_data = [ + ( + np.array([9, 8100, -60, 7], dtype=np.float64), + np.array([584, -11, 23, 79, 1001, 0, -19], dtype=np.float64), + ), + ( + np.random.uniform(-1000, 1000, [8]).astype(np.float64), + np.random.uniform(-1000, 1000, [64]).astype(np.float64), + ), +] + +kNN = [1, 2, 3, 4] + + +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_mparray_init(T_A, T_B): + # Test different `mparray` initialization approaches + m = 3 + k = 2 + arr = stump(T_B, m, ignore_trivial=True, k=k) + mp = mparray(arr, m, k, config.STUMPY_EXCL_ZONE_DENOM) + assert mp._m == m + assert mp._k == k + assert mp._excl_zone_denom == config.STUMPY_EXCL_ZONE_DENOM + + slice_mp = mp[1:, :] # Initialize "new-from-template" + assert slice_mp._m == m + assert slice_mp._k == k + assert mp._excl_zone_denom == config.STUMPY_EXCL_ZONE_DENOM + + +@pytest.mark.parametrize("T_A, T_B", test_data) +@pytest.mark.parametrize("k", kNN) +def test_mparray_self_join(T_A, T_B, k): + m = 3 + zone = int(np.ceil(m / 4)) + + ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k) + comp_mp = stump(T_B, m, ignore_trivial=True, k=k) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(np.squeeze(ref_mp[:, :k]), comp_mp.P_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, k : 2 * k]), comp_mp.I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k]), comp_mp.left_I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k + 1]), comp_mp.right_I_) + + comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True, k=k) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(np.squeeze(ref_mp[:, :k]), comp_mp.P_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, k : 2 * k]), comp_mp.I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k]), comp_mp.left_I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k + 1]), comp_mp.right_I_) + + ref_mp = naive.aamp(T_B, m, exclusion_zone=zone, k=k) + comp_mp = aamp(T_B, m, ignore_trivial=True, k=k) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(np.squeeze(ref_mp[:, :k]), comp_mp.P_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, k : 2 * k]), comp_mp.I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k]), comp_mp.left_I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k + 1]), comp_mp.right_I_) + + comp_mp = aamp(pd.Series(T_B), m, ignore_trivial=True, k=k) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(np.squeeze(ref_mp[:, :k]), comp_mp.P_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, k : 2 * k]), comp_mp.I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k]), comp_mp.left_I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k + 1]), comp_mp.right_I_) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +@pytest.mark.parametrize("k", kNN) +def test_mparray_A_B_join(T_A, T_B, k): + m = 3 + ref_mp = naive.stump(T_A, m, T_B=T_B, k=k) + comp_mp = stump(T_A, m, T_B, ignore_trivial=False, k=k) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(np.squeeze(ref_mp[:, :k]), comp_mp.P_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, k : 2 * k]), comp_mp.I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k]), comp_mp.left_I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k + 1]), comp_mp.right_I_) + + comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False, k=k) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(np.squeeze(ref_mp[:, :k]), comp_mp.P_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, k : 2 * k]), comp_mp.I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k]), comp_mp.left_I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k + 1]), comp_mp.right_I_) + + ref_mp = naive.aamp(T_A, m, T_B=T_B, k=k) + comp_mp = aamp(T_A, m, T_B, ignore_trivial=False, k=k) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(np.squeeze(ref_mp[:, :k]), comp_mp.P_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, k : 2 * k]), comp_mp.I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k]), comp_mp.left_I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k + 1]), comp_mp.right_I_) + + comp_mp = aamp(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False, k=k) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(np.squeeze(ref_mp[:, :k]), comp_mp.P_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, k : 2 * k]), comp_mp.I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k]), comp_mp.left_I_) + npt.assert_almost_equal(np.squeeze(ref_mp[:, 2 * k + 1]), comp_mp.right_I_)