diff --git a/stumpy/aamp.py b/stumpy/aamp.py index 7443db774..ca253c9af 100644 --- a/stumpy/aamp.py +++ b/stumpy/aamp.py @@ -255,7 +255,8 @@ def _aamp( return np.power(P[0, :, :], 1.0 / p), I[0, :, :] -def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0): +def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): + # function needs to be changed to return top-k matrix profile """ Compute the non-normalized (i.e., without z-normalization) matrix profile @@ -282,6 +283,11 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0): p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Returns ------- out : numpy.ndarray diff --git a/stumpy/aamped.py b/stumpy/aamped.py index ed7251a48..3dd346275 100644 --- a/stumpy/aamped.py +++ b/stumpy/aamped.py @@ -12,7 +12,8 @@ logger = logging.getLogger(__name__) -def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0): +def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): + # function needs to be revised to return top-k matrix profile """ Compute the non-normalized (i.e., without z-normalization) matrix profile @@ -46,6 +47,11 @@ def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0): p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Returns ------- out : numpy.ndarray diff --git a/stumpy/aampi.py b/stumpy/aampi.py index be3e7bd6d..1953aa61f 100644 --- a/stumpy/aampi.py +++ b/stumpy/aampi.py @@ -8,6 +8,7 @@ class aampi: + # needs to be enhanced to support top-k matrix profile """ Compute an incremental non-normalized (i.e., without z-normalization) matrix profile for streaming data @@ -28,6 +29,11 @@ class aampi: p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Attributes ---------- P_ : numpy.ndarray @@ -62,7 +68,7 @@ class aampi: Note that we have extended this algorithm for AB-joins as well. """ - def __init__(self, T, m, egress=True, p=2.0): + def __init__(self, T, m, egress=True, p=2.0, k=1): """ Initialize the `stumpi` object @@ -81,6 +87,12 @@ def __init__(self, T, m, egress=True, p=2.0): p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. + + + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix + profile. Note that this will increase the total computational time and + memory usage when k > 1. """ self._T = core._preprocess(T) core.check_window_size(m, max_size=self._T.shape[-1]) diff --git a/stumpy/core.py b/stumpy/core.py index 3f05c60c0..ef99a0746 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -209,6 +209,20 @@ def _gpu_aamp_stimp_driver_not_found(*args, **kwargs): # pragma: no cover driver_not_found() +def _gpu_searchsorted_left_driver_not_found(*args, **kwargs): # pragma: no cover + """ + Dummy function to raise CudaSupportError driver not found error. + """ + driver_not_found() + + +def _gpu_searchsorted_right_driver_not_found(*args, **kwargs): # pragma: no cover + """ + Dummy function to raise CudaSupportError driver not found error. + """ + driver_not_found() + + def get_pkg_name(): # pragma: no cover """ Return package name. @@ -240,7 +254,7 @@ def rolling_window(a, window): return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) -def z_norm(a, axis=0): +def z_norm(a, axis=0, threshold=config.STUMPY_STDDEV_THRESHOLD): """ Calculate the z-normalized input array `a` by subtracting the mean and dividing by the standard deviation along a given axis. @@ -253,13 +267,16 @@ def z_norm(a, axis=0): axis : int, default 0 NumPy array axis + threshold : float, default to config.STUMPY_STDDEV_THRESHOLD + A non-nan std value being less than `threshold` will be replaced with 1.0 + Returns ------- output : numpy.ndarray An array with z-normalized values computed along a specified axis. """ std = np.std(a, axis, keepdims=True) - std[std == 0] = 1 + std[np.less(std, threshold, where=~np.isnan(std))] = 1.0 return (a - np.mean(a, axis, keepdims=True)) / std @@ -2559,6 +2576,201 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None): return MPdist +@njit +def _merge_topk_PI(PA, PB, IA, IB): + """ + Merge two top-k matrix profiles `PA` and `PB`, and update `PA` (in place). + When the inputs are 1D arrays, PA[i] is updated if it is greater than PB[i] and + IA[i] != IB[i]. In such case, PA[i] and IA[i] are replaced with PB[i] and IB[i], + respectively. (Note that it might happen that IA[i]==IB[i] but PA[i] != PB[i]. + This situation can occur if there is slight imprecision in numerical calculations. + In that case, we do not update PA[i] and IA[i]. While updating PA[i] and IA[i] + is harmless in this case, we avoid doing that so to be consistent with the merging + process when the inputs are 2D arrays) + When the inputs are 2D arrays, we always prioritize the values of `PA` over the + values of `PB` in case of ties. (i.e., values from `PB` are always inserted to + the right of values from `PA`). Also, update `IA` accordingly. In case of + overlapping values between two arrays IA[i] and IB[i], the ones in IB[i] (and + their corresponding values in PB[i]) are ignored throughout the updating process + of IA[i] (and PA[i]). + + Unlike `_merge_topk_ρI`, where `top-k` largest values are kept, this function + keeps `top-k` smallest values. + + Parameters + ---------- + PA : numpy.ndarray + A (top-k) matrix profile where values in each row are sorted in ascending + order. `PA` must be 1- or 2-dimensional. + + PB : numpy.ndarray + A (top-k) matrix profile where values in each row are sorted in ascending + order. `PB` must have the same shape as `PA`. + + IA : numpy.ndarray + A (top-k) matrix profile indices corresponding to `PA` + + IB : numpy.ndarray + A (top-k) matrix profile indices corresponding to `PB` + + Returns + ------- + None + """ + if PA.ndim == 1: + mask = (PB < PA) & (IB != IA) + PA[mask] = PB[mask] + IA[mask] = IB[mask] + else: + k = PA.shape[1] + tmp_P = np.empty(k, dtype=np.float64) + tmp_I = np.empty(k, dtype=np.int64) + for i in range(PA.shape[0]): + overlap = set(IB[i]).intersection(set(IA[i])) + aj, bj = 0, 0 + idx = 0 + # 2 * k iterations are required to traverse both A and B if needed. + for _ in range(2 * k): + if idx >= k: + break + if bj < k and PB[i, bj] < PA[i, aj]: + if IB[i, bj] not in overlap: + tmp_P[idx] = PB[i, bj] + tmp_I[idx] = IB[i, bj] + idx += 1 + bj += 1 + else: + tmp_P[idx] = PA[i, aj] + tmp_I[idx] = IA[i, aj] + idx += 1 + aj += 1 + + PA[i] = tmp_P + IA[i] = tmp_I + + +@njit +def _merge_topk_ρI(ρA, ρB, IA, IB): + """ + Merge two top-k pearson profiles `ρA` and `ρB`, and update `ρA` (in place). + When the inputs are 1D arrays, ρA[i] is updated if it is less than ρB[i] and + IA[i] != IB[i]. In such case, ρA[i] and IA[i] are replaced with ρB[i] and IB[i], + respectively. (Note that it might happen that IA[i]==IB[i] but ρA[i] != ρB[i]. + This situation can occur if there is slight imprecision in numerical calculations. + In that case, we do not update ρA[i] and IA[i]. While updating ρA[i] and IA[i] + is harmless in this case, we avoid doing that so to be consistent with the merging + process when the inputs are 2D arrays) + When the inputs are 2D arrays, we always prioritize the values of `ρA` over + the values of `ρB` in case of ties. (i.e., values from `ρB` are always inserted + to the left of values from `ρA`). Also, update `IA` accordingly. In case of + overlapping values between two arrays IA[i] and IB[i], the ones in IB[i] (and + their corresponding values in ρB[i]) are ignored throughout the updating process + of IA[i] (and ρA[i]). + + Unlike `_merge_topk_PI`, where `top-k` smallest values are kept, this function + keeps `top-k` largest values. + + Parameters + ---------- + ρA : numpy.ndarray + A (top-k) pearson profile where values in each row are sorted in ascending + order. `ρA` must be 1- or 2-dimensional. + + ρB : numpy.ndarray + A (top-k) pearson profile, where values in each row are sorted in ascending + order. `ρB` must have the same shape as `ρA`. + + IA : numpy.ndarray + A (top-k) matrix profile indices corresponding to `ρA` + + IB : numpy.ndarray + A (top-k) matrix profile indices corresponding to `ρB` + + Returns + ------- + None + """ + if ρA.ndim == 1: + mask = (ρB > ρA) & (IB != IA) + ρA[mask] = ρB[mask] + IA[mask] = IB[mask] + else: + k = ρA.shape[1] + tmp_ρ = np.empty(k, dtype=np.float64) + tmp_I = np.empty(k, dtype=np.int64) + last_idx = k - 1 + for i in range(len(ρA)): + overlap = set(IB[i]).intersection(set(IA[i])) + aj, bj = last_idx, last_idx + idx = last_idx + # 2 * k iterations are required to traverse both A and B if needed. + for _ in range(2 * k): + if idx < 0: + break + if bj >= 0 and ρB[i, bj] > ρA[i, aj]: + if IB[i, bj] not in overlap: + tmp_ρ[idx] = ρB[i, bj] + tmp_I[idx] = IB[i, bj] + idx -= 1 + bj -= 1 + else: + tmp_ρ[idx] = ρA[i, aj] + tmp_I[idx] = IA[i, aj] + idx -= 1 + aj -= 1 + + ρA[i] = tmp_ρ + IA[i] = tmp_I + + +@njit +def _shift_insert_at_index(a, idx, v, shift="right"): + """ + If `shift=right` (default), all elements in `a[idx:]` are shifted to the right by + one element, `v` in inserted at index `idx` and the last element is discarded. + If `shift=left`, all elements in `a[:idx]` are shifted to the left by one element, + `v` in inserted at index `idx-1`, and the first element is discarded. In both cases, + `a` is updated in place and its length remains unchanged. + + Note that all unrecognized `shift` inputs will default to `shift=right`. + + + Parameters + ---------- + a: numpy.ndarray + A 1d array + + idx: int + The index at which the value `v` should be inserted. This can be any + integer number from `0` to `len(a)`. When `idx=len(a)` and `shift="right"`, + OR when `idx=0` and `shift="left"`, then no change will occur on + the input array `a`. + + v: float + The value that should be inserted into array `a` at index `idx` + + shift: str, default "right" + The value that indicates whether the shifting of elements should be towards + the right or left. If `shift="right"` (default), all elements in `a[idx:]` + are shifted to the right by one element. If `shift="left"`, all elements + in `a[:idx]` are shifted to the left by one element. + + Returns + ------- + None + """ + if shift == "left": + if 0 < idx <= len(a): + a[: idx - 1] = a[1:idx] + # elements were shifted to the left, thus the insertion index becomes + # `idx-1` + a[idx - 1] = v + else: + if 0 <= idx < len(a): + a[idx + 1 :] = a[idx:-1] + a[idx] = v + + def _check_P(P, threshold=1e-6): """ Check if the 1-dimensional matrix profile values are too small and diff --git a/stumpy/gpu_aamp.py b/stumpy/gpu_aamp.py index b4508a473..7bf783525 100644 --- a/stumpy/gpu_aamp.py +++ b/stumpy/gpu_aamp.py @@ -339,7 +339,9 @@ def _gpu_aamp( return profile_fname, indices_fname -def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0): +def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0, k=1): + # function needs to be revised to return (top-k) matrix profile and + # matrix profile indices """ Compute the non-normalized (i.e., without z-normalization) matrix profile with one or more GPU devices @@ -375,6 +377,11 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0): p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Returns ------- out : numpy.ndarray diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py index e50546cca..d0890cceb 100644 --- a/stumpy/gpu_stump.py +++ b/stumpy/gpu_stump.py @@ -15,9 +15,100 @@ logger = logging.getLogger(__name__) +@cuda.jit(device=True) +def _gpu_searchsorted_left(a, v, bfs, nlevel): + """ + A device function, equivalent to numpy.searchsorted(a, v, side='left') + + Parameters + ---------- + a : numpy.ndarray + 1-dim array sorted in ascending order. + + v : float + Value to insert into array `a` + + bfs : numpy.ndarray + The breadth-first-search indices where the missing leaves of its corresponding + binary search tree are filled with -1. + + nlevel : int + The number of levels in the binary search tree from which the array + `bfs` is obtained. + + Returns + ------- + idx : int + The index of the insertion point + """ + n = a.shape[0] + idx = 0 + for level in range(nlevel): + if v <= a[bfs[idx]]: + next_idx = 2 * idx + 1 + else: + next_idx = 2 * idx + 2 + + if level == nlevel - 1 or bfs[next_idx] < 0: + if v <= a[bfs[idx]]: + idx = max(bfs[idx], 0) + else: + idx = min(bfs[idx] + 1, n) + break + idx = next_idx + + return idx + + +@cuda.jit(device=True) +def _gpu_searchsorted_right(a, v, bfs, nlevel): + """ + A device function, equivalent to numpy.searchsorted(a, v, side='right') + + Parameters + ---------- + a : numpy.ndarray + 1-dim array sorted in ascending order. + + v : float + Value to insert into array `a` + + bfs : numpy.ndarray + The breadth-first-search indices where the missing leaves of its corresponding + binary search tree are filled with -1. + + nlevel : int + The number of levels in the binary search tree from which the array + `bfs` is obtained. + + Returns + ------- + idx : int + The index of the insertion point + """ + n = a.shape[0] + idx = 0 + for level in range(nlevel): + if v < a[bfs[idx]]: + next_idx = 2 * idx + 1 + else: + next_idx = 2 * idx + 2 + + if level == nlevel - 1 or bfs[next_idx] < 0: + if v < a[bfs[idx]]: + idx = max(bfs[idx], 0) + else: + idx = min(bfs[idx] + 1, n) + break + idx = next_idx + + return idx + + @cuda.jit( "(i8, f8[:], f8[:], i8, f8[:], f8[:], f8[:], f8[:], f8[:]," - "f8[:], f8[:], i8, b1, i8, f8[:, :], i8[:, :], b1)" + "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:]," + "b1, i8[:], i8, i2)" ) def _compute_and_update_PI_kernel( i, @@ -31,12 +122,19 @@ def _compute_and_update_PI_kernel( Σ_T, μ_Q, σ_Q, - k, + w, ignore_trivial, excl_zone, profile, + profile_L, + profile_R, indices, + indices_L, + indices_R, compute_QT, + bfs, + nlevel, + k, ): """ A Numba CUDA kernel to update the matrix profile and matrix profile indices @@ -44,7 +142,7 @@ def _compute_and_update_PI_kernel( Parameters ---------- i : int - sliding window `i` + Sliding window `i` T_A : numpy.ndarray The time series or sequence for which to compute the dot product @@ -79,7 +177,7 @@ def _compute_and_update_PI_kernel( σ_Q : numpy.ndarray Standard deviation of the query sequence, `Q` - k : int + w : int The total number of sliding windows to iterate over ignore_trivial : bool @@ -91,18 +189,39 @@ def _compute_and_update_PI_kernel( sliding window profile : numpy.ndarray - Matrix profile. The first column consists of the global matrix profile, - the second column consists of the left matrix profile, and the third - column consists of the right matrix profile. + The (top-k) matrix profile, sorted in ascending order per row + + profile_L : numpy.ndarray + The (top-1) left matrix profile + + profile_R : numpy.ndarray + The (top-1) right matrix profile indices : numpy.ndarray - The first column consists of the matrix profile indices, the second - column consists of the left matrix profile indices, and the third - column consists of the right matrix profile indices. + The (top-k) matrix profile indices + + indices_L : numpy.ndarray + The (top-1) left matrix profile indices + + indices_R : numpy.ndarray + The (top-1) right matrix profile indices compute_QT : bool A boolean flag for whether or not to compute QT + bfs : numpy.ndarray + The breadth-first-search indices where the missing leaves of its corresponding + binary search tree are filled with -1. + + nlevel : int + The number of levels in the binary search tree from which the array + `bfs` is obtained. + + k : int + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Returns ------- None @@ -126,7 +245,7 @@ def _compute_and_update_PI_kernel( for j in range(start, QT_out.shape[0], stride): zone_start = max(0, j - excl_zone) - zone_stop = min(k, j + excl_zone) + zone_stop = min(w, j + excl_zone) if compute_QT: QT_out[j] = ( @@ -157,16 +276,21 @@ def _compute_and_update_PI_kernel( if ignore_trivial: if i <= zone_stop and i >= zone_start: p_norm = np.inf - if p_norm < profile[j, 1] and i < j: - profile[j, 1] = p_norm - indices[j, 1] = i - if p_norm < profile[j, 2] and i > j: - profile[j, 2] = p_norm - indices[j, 2] = i + if p_norm < profile_L[j] and i < j: + profile_L[j] = p_norm + indices_L[j] = i + if p_norm < profile_R[j] and i > j: + profile_R[j] = p_norm + indices_R[j] = i - if p_norm < profile[j, 0]: - profile[j, 0] = p_norm - indices[j, 0] = i + if p_norm < profile[j, -1]: + idx = _gpu_searchsorted_right(profile[j], p_norm, bfs, nlevel) + for g in range(k - 1, idx, -1): + profile[j, g] = profile[j, g - 1] + indices[j, g] = indices[j, g - 1] + + profile[j, idx] = p_norm + indices[j, idx] = i def _gpu_stump( @@ -181,10 +305,11 @@ def _gpu_stump( QT_first_fname, μ_Q_fname, σ_Q_fname, - k, + w, ignore_trivial=True, range_start=1, device_id=0, + k=1, ): """ A Numba CUDA version of STOMP for parallel computation of the @@ -235,7 +360,7 @@ def _gpu_stump( The file name for the standard deviation of the query sequence, `Q`, relative to the current sliding window - k : int + w : int The total number of sliding windows to iterate over ignore_trivial : bool @@ -249,6 +374,11 @@ def _gpu_stump( device_id : int The (GPU) device number to use. The default value is `0`. + k : int + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Returns ------- profile_fname : str @@ -289,7 +419,7 @@ def _gpu_stump( Note that left and right matrix profiles are only available for self-joins. """ threads_per_block = config.STUMPY_THREADS_PER_BLOCK - blocks_per_grid = math.ceil(k / threads_per_block) + blocks_per_grid = math.ceil(w / threads_per_block) T_A = np.load(T_A_fname, allow_pickle=False) T_B = np.load(T_B_fname, allow_pickle=False) @@ -300,6 +430,10 @@ def _gpu_stump( μ_Q = np.load(μ_Q_fname, allow_pickle=False) σ_Q = np.load(σ_Q_fname, allow_pickle=False) + device_bfs = cuda.to_device(core._bfs_indices(k, fill_value=-1)) + nlevel = np.floor(np.log2(k) + 1).astype(np.int64) + # number of levels in binary search tree from which `bfs` is constructed. + with cuda.gpus[device_id]: device_T_A = cuda.to_device(T_A) device_QT_odd = cuda.to_device(QT) @@ -316,11 +450,22 @@ def _gpu_stump( device_M_T = cuda.to_device(M_T) device_Σ_T = cuda.to_device(Σ_T) - profile = np.full((k, 3), np.inf, dtype=np.float64) - indices = np.full((k, 3), -1, dtype=np.int64) + profile = np.full((w, k), np.inf, dtype=np.float64) + indices = np.full((w, k), -1, dtype=np.int64) + + profile_L = np.full(w, np.inf, dtype=np.float64) + indices_L = np.full(w, -1, dtype=np.int64) + + profile_R = np.full(w, np.inf, dtype=np.float64) + indices_R = np.full(w, -1, dtype=np.int64) device_profile = cuda.to_device(profile) + device_profile_L = cuda.to_device(profile_L) + device_profile_R = cuda.to_device(profile_R) device_indices = cuda.to_device(indices) + device_indices_L = cuda.to_device(indices_L) + device_indices_R = cuda.to_device(indices_R) + _compute_and_update_PI_kernel[blocks_per_grid, threads_per_block]( range_start - 1, device_T_A, @@ -333,12 +478,19 @@ def _gpu_stump( device_Σ_T, device_μ_Q, device_σ_Q, - k, + w, ignore_trivial, excl_zone, device_profile, + device_profile_L, + device_profile_R, device_indices, + device_indices_L, + device_indices_R, False, + device_bfs, + nlevel, + k, ) for i in range(range_start, range_stop): @@ -354,27 +506,52 @@ def _gpu_stump( device_Σ_T, device_μ_Q, device_σ_Q, - k, + w, ignore_trivial, excl_zone, device_profile, + device_profile_L, + device_profile_R, device_indices, + device_indices_L, + device_indices_R, True, + device_bfs, + nlevel, + k, ) profile = device_profile.copy_to_host() + profile_L = device_profile_L.copy_to_host() + profile_R = device_profile_R.copy_to_host() indices = device_indices.copy_to_host() - profile = np.sqrt(profile) + indices_L = device_indices_L.copy_to_host() + indices_R = device_indices_R.copy_to_host() + + profile[:, :] = np.sqrt(profile) + profile_L[:] = np.sqrt(profile_L) + profile_R[:] = np.sqrt(profile_R) profile_fname = core.array_to_temp_file(profile) + profile_L_fname = core.array_to_temp_file(profile_L) + profile_R_fname = core.array_to_temp_file(profile_R) indices_fname = core.array_to_temp_file(indices) + indices_L_fname = core.array_to_temp_file(indices_L) + indices_R_fname = core.array_to_temp_file(indices_R) - return profile_fname, indices_fname + return ( + profile_fname, + profile_L_fname, + profile_R_fname, + indices_fname, + indices_L_fname, + indices_R_fname, + ) @core.non_normalized(gpu_aamp) def gpu_stump( - T_A, m, T_B=None, ignore_trivial=True, device_id=0, normalize=True, p=2.0 + T_A, m, T_B=None, ignore_trivial=True, device_id=0, normalize=True, p=2.0, k=1 ): """ Compute the z-normalized matrix profile with one or more GPU devices @@ -417,13 +594,24 @@ def gpu_stump( The p-norm to apply for computing the Minkowski distance. This parameter is ignored when `normalize == True`. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Returns ------- out : numpy.ndarray - The first column consists of the matrix profile, the second column - consists of the matrix profile indices, the third column consists of - the left matrix profile indices, and the fourth column consists of - the right matrix profile indices. + When k = 1 (default), the first column consists of the matrix profile, + the second column consists of the matrix profile indices, the third column + consists of the left matrix profile indices, and the fourth column consists + of the right matrix profile indices. However, when k > 1, the output array + will contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) + consists of the top-k matrix profile, the next set of k columns + (i.e., out[:, k:2k]) consists of the corresponding top-k matrix profile + indices, and the last two columns (i.e., out[:, 2k] and out[:, 2k+1] or, + equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left + matrix profile indices and the top-1 right matrix profile indices, respectively. See Also -------- @@ -505,7 +693,7 @@ def gpu_stump( logger.warning("Try setting `ignore_trivial = False`.") n = T_B.shape[0] - k = T_A.shape[0] - m + 1 + w = T_A.shape[0] - m + 1 l = n - m + 1 excl_zone = int( np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM) @@ -518,8 +706,6 @@ def gpu_stump( μ_Q_fname = core.array_to_temp_file(μ_Q) σ_Q_fname = core.array_to_temp_file(σ_Q) - out = np.empty((k, 4), dtype=object) - if isinstance(device_id, int): device_ids = [device_id] else: @@ -528,6 +714,12 @@ def gpu_stump( profile = [None] * len(device_ids) indices = [None] * len(device_ids) + profile_L = [None] * len(device_ids) + indices_L = [None] * len(device_ids) + + profile_R = [None] * len(device_ids) + indices_R = [None] * len(device_ids) + for _id in device_ids: with cuda.gpus[_id]: if ( @@ -571,16 +763,24 @@ def gpu_stump( QT_first_fname, μ_Q_fname, σ_Q_fname, - k, + w, ignore_trivial, start + 1, device_ids[idx], + k, ), ) else: # Execute last chunk in parent process # Only parent process is executed when a single GPU is requested - profile[idx], indices[idx] = _gpu_stump( + ( + profile[idx], + profile_L[idx], + profile_R[idx], + indices[idx], + indices_L[idx], + indices_R[idx], + ) = _gpu_stump( T_A_fname, T_B_fname, m, @@ -592,10 +792,11 @@ def gpu_stump( QT_first_fname, μ_Q_fname, σ_Q_fname, - k, + w, ignore_trivial, start + 1, device_ids[idx], + k, ) # Clean up process pool for multi-GPU request @@ -606,7 +807,14 @@ def gpu_stump( # Collect results from spawned child processes if they exist for idx, result in enumerate(results): if result is not None: - profile[idx], indices[idx] = result.get() + ( + profile[idx], + profile_L[idx], + profile_R[idx], + indices[idx], + indices_L[idx], + indices_R[idx], + ) = result.get() os.remove(T_A_fname) os.remove(T_B_fname) @@ -621,22 +829,44 @@ def gpu_stump( for idx in range(len(device_ids)): profile_fname = profile[idx] + profile_L_fname = profile_L[idx] + profile_R_fname = profile_R[idx] indices_fname = indices[idx] + indices_L_fname = indices_L[idx] + indices_R_fname = indices_R[idx] + profile[idx] = np.load(profile_fname, allow_pickle=False) + profile_L[idx] = np.load(profile_L_fname, allow_pickle=False) + profile_R[idx] = np.load(profile_R_fname, allow_pickle=False) indices[idx] = np.load(indices_fname, allow_pickle=False) + indices_L[idx] = np.load(indices_L_fname, allow_pickle=False) + indices_R[idx] = np.load(indices_R_fname, allow_pickle=False) + os.remove(profile_fname) + os.remove(profile_L_fname) + os.remove(profile_R_fname) os.remove(indices_fname) - - for i in range(1, len(device_ids)): - # Update all matrix profiles and matrix profile indices - # (global, left, right) and store in profile[0] and indices[0] - for col in range(profile[0].shape[1]): # pragma: no cover - cond = profile[0][:, col] < profile[i][:, col] - profile[0][:, col] = np.where(cond, profile[0][:, col], profile[i][:, col]) - indices[0][:, col] = np.where(cond, indices[0][:, col], indices[i][:, col]) - - out[:, 0] = profile[0][:, 0] - out[:, 1:4] = indices[0][:, :] + os.remove(indices_L_fname) + os.remove(indices_R_fname) + + for i in range(1, len(device_ids)): # pragma: no cover + # Update (top-k) matrix profile and matrix profile indices + core._merge_topk_PI(profile[0], profile[i], indices[0], indices[i]) + + # Update (top-1) left matrix profile and matrix profile indices + mask = profile_L[0] < profile_L[i] + profile_L[0][mask] = profile_L[i][mask] + indices_L[0][mask] = indices_L[i][mask] + + # Update (top-1) right matrix profile and matrix profile indices + mask = profile_R[0] < profile_R[i] + profile_R[0][mask] = profile_R[i][mask] + indices_R[0][mask] = indices_R[i][mask] + + out = np.empty((w, 2 * k + 2), dtype=object) # last two columns are to store + # (top-1) left/right matrix profile indices + out[:, :k] = profile[0] + out[:, k:] = np.column_stack((indices[0], indices_L[0], indices_R[0])) core._check_P(out[:, 0]) diff --git a/stumpy/scraamp.py b/stumpy/scraamp.py index 0f0fc41f2..e81c267c3 100644 --- a/stumpy/scraamp.py +++ b/stumpy/scraamp.py @@ -14,6 +14,79 @@ logger = logging.getLogger(__name__) +def _preprocess_prescraamp(T_A, m, T_B=None, s=None): + """ + Performs several preprocessings and returns outputs that are needed for the + non-normalized preSCRIMP algorithm. + + Parameters + ---------- + T_A : numpy.ndarray + The time series or sequence for which to compute the matrix profile + + m : int + Window size + + T_B : numpy.ndarray, default None + The time series or sequence that will be used to annotate T_A. For every + subsequence in T_A, its nearest neighbor in T_B will be recorded. + + s : int, default None + The sampling interval that defaults to + `int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))` + + Returns + ------- + T_A : numpy.ndarray + A copy of the time series input `T_A`, where all NaN and inf values + are replaced with zero. + + T_B : numpy.ndarray + A copy of the time series input `T_B`, where all NaN and inf values + are replaced with zero. If the input `T_B` is not provided (default), + this array is just a copy of `T_A`. + + T_A_subseq_isfinite : numpy.ndarray + A boolean array that indicates whether a subsequence in `T_A` contains a + `np.nan`/`np.inf` value (False) + + T_B_subseq_isfinite : numpy.ndarray + A boolean array that indicates whether a subsequence in `T_B` contains a + `np.nan`/`np.inf` value (False) + + indices : numpy.ndarray + The subsequence indices to compute `prescrump` for + + s : int + The sampling interval that defaults to + `int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))` + + excl_zone : int + The half width for the exclusion zone + """ + if T_B is None: + T_B = T_A + excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + else: + excl_zone = None + + T_A, T_A_subseq_isfinite = core.preprocess_non_normalized(T_A, m) + T_B, T_B_subseq_isfinite = core.preprocess_non_normalized(T_B, m) + + n_A = T_A.shape[0] + l = n_A - m + 1 + + if s is None: # pragma: no cover + if excl_zone is not None: # self-join + s = excl_zone + else: # AB-join + s = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + + indices = np.random.permutation(range(0, l, s)).astype(np.int64) + + return (T_A, T_B, T_A_subseq_isfinite, T_B_subseq_isfinite, indices, s, excl_zone) + + @njit(fastmath=True) def _compute_PI( T_A, @@ -272,7 +345,8 @@ def _prescraamp( return np.power(P_NORM[0], 1.0 / p), I[0] -def prescraamp(T_A, m, T_B=None, s=None, p=2.0): +def prescraamp(T_A, m, T_B=None, s=None, p=2.0, k=1): + # this function should be modified so that it can return top-k matrix profile """ A convenience wrapper around the Numba JIT-compiled parallelized `_prescraamp` function which computes the approximate matrix profile according to the @@ -297,6 +371,11 @@ def prescraamp(T_A, m, T_B=None, s=None, p=2.0): p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Returns ------- P : numpy.ndarray @@ -312,25 +391,16 @@ def prescraamp(T_A, m, T_B=None, s=None, p=2.0): See Algorithm 2 """ - if T_B is None: - T_B = T_A - excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) - else: - excl_zone = None - - T_A, T_A_subseq_isfinite = core.preprocess_non_normalized(T_A, m) - T_B, T_B_subseq_isfinite = core.preprocess_non_normalized(T_B, m) - - n_A = T_A.shape[0] - l = n_A - m + 1 - - if s is None: # pragma: no cover - if excl_zone is not None: # self-join - s = excl_zone - else: # AB-join - s = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + ( + T_A, + T_B, + T_A_subseq_isfinite, + T_B_subseq_isfinite, + indices, + s, + excl_zone, + ) = _preprocess_prescraamp(T_A, m, T_B=T_B, s=s) - indices = np.random.permutation(range(0, l, s)).astype(np.int64) P, I = _prescraamp( T_A, T_B, @@ -387,6 +457,11 @@ class scraamp: p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Attributes ---------- P_ : numpy.ndarray @@ -422,6 +497,7 @@ def __init__( pre_scraamp=False, s=None, p=2.0, + k=1, # this function needs to be modified for top-k ): """ Initialize the `scraamp` object @@ -458,6 +534,11 @@ def __init__( p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. + + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix + profile. Note that this will increase the total computational time and + memory usage when k > 1. """ self._ignore_trivial = ignore_trivial self._p = p @@ -520,9 +601,38 @@ def __init__( if pre_scraamp: if self._ignore_trivial: - P, I = prescraamp(T_A, m, s=s, p=p) + ( + T_A, + T_B, + T_A_subseq_isfinite, + T_B_subseq_isfinite, + indices, + s, + excl_zone, + ) = _preprocess_prescraamp(T_A, m, s=s) else: - P, I = prescraamp(T_A, m, T_B=T_B, s=s, p=p) + ( + T_A, + T_B, + T_A_subseq_isfinite, + T_B_subseq_isfinite, + indices, + s, + excl_zone, + ) = _preprocess_prescraamp(T_A, m, T_B=T_B, s=s) + + P, I = _prescraamp( + T_A, + T_B, + m, + T_A_subseq_isfinite, + T_B_subseq_isfinite, + p, + indices, + s, + excl_zone, + ) + for i in range(P.shape[0]): if self._P[i, 0] > P[i]: self._P[i, 0] = P[i] diff --git a/stumpy/scrump.py b/stumpy/scrump.py index 3ec55ef7c..f874966e2 100644 --- a/stumpy/scrump.py +++ b/stumpy/scrump.py @@ -14,6 +14,83 @@ logger = logging.getLogger(__name__) +def _preprocess_prescrump(T_A, m, T_B=None, s=None): + """ + Performs several preprocessings and returns outputs that are needed for the + prescrump algorithm. + + Parameters + ---------- + T_A : numpy.ndarray + The time series or sequence for which to compute the matrix profile + + m : int + Window size + + T_B : numpy.ndarray, default None + The time series or sequence that will be used to annotate T_A. For every + subsequence in T_A, its nearest neighbor in T_B will be recorded. + + s : int, default None + The sampling interval that defaults to + `int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))` + + Returns + ------- + T_A : numpy.ndarray + A copy of the time series input `T_A`, where all NaN and inf values + are replaced with zero. + + T_B : numpy.ndarray + A copy of the time series input `T_B`, where all NaN and inf values + are replaced with zero. If the input `T_B` is not provided (default), + this array is just a copy of `T_A`. + + μ_Q : numpy.ndarray + Sliding window mean for `T_A` + + σ_Q : numpy.ndarray + Sliding window standard deviation for `T_A` + + M_T : numpy.ndarray + Sliding window mean for `T_B` + + Σ_T : numpy.ndarray + Sliding window standard deviation for `T_B` + + indices : numpy.ndarray + The subsequence indices to compute `prescrump` for + + s : int + The sampling interval that defaults to + `int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))` + + excl_zone : int + The half width for the exclusion zone + """ + if T_B is None: + T_B = T_A + excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + else: + excl_zone = None + + T_A, μ_Q, σ_Q = core.preprocess(T_A, m) + T_B, M_T, Σ_T = core.preprocess(T_B, m) + + n_A = T_A.shape[0] + l = n_A - m + 1 + + if s is None: # pragma: no cover + if excl_zone is not None: # self-join + s = excl_zone + else: # AB-join + s = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + + indices = np.random.permutation(range(0, l, s)).astype(np.int64) + + return (T_A, T_B, μ_Q, σ_Q, M_T, Σ_T, indices, s, excl_zone) + + @njit(fastmath=True) def _compute_PI( T_A, @@ -31,10 +108,11 @@ def _compute_PI( P_squared, I, excl_zone=None, + k=1, ): """ - Compute (Numba JIT-compiled) and update the squared matrix profile distance - and matrix profile indces according to the preSCRIMP algorithm + Compute (Numba JIT-compiled) and update the squared (top-k) matrix profile + distance and matrix profile indces according to the preSCRIMP algorithm. Parameters ---------- @@ -49,16 +127,16 @@ def _compute_PI( Window size μ_Q : numpy.ndarray - Sliding window mean for T_A + Sliding window mean for `T_A` σ_Q : numpy.ndarray - Sliding window standard deviation for T_A + Sliding window standard deviation for `T_A` M_T : numpy.ndarray - Sliding window mean for T_B + Sliding window mean for `T_B` Σ_T : numpy.ndarray - Sliding window standard deviation for T_B + Sliding window standard deviation for `T_B` indices : numpy.ndarray The subsequence indices to compute `prescrump` for @@ -77,14 +155,19 @@ def _compute_PI( `int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))` P_squared : numpy.ndarray - The squared matrix profile + The squared (top-k) matrix profile I : numpy.ndarray - The matrix profile indices + The (top-k) matrix profile indices excl_zone : int The half width for the exclusion zone relative to the `i`. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Returns ------- None @@ -103,67 +186,139 @@ def _compute_PI( for i in indices[start:stop]: Q = T_A[i : i + m] QT[:] = core._sliding_dot_product(Q, T_B) - # Update P[i] relative to all T[j : j + m] squared_distance_profile[:] = core._mass(Q, T_B, QT, μ_Q[i], σ_Q[i], M_T, Σ_T) squared_distance_profile[:] = np.square(squared_distance_profile) if excl_zone is not None: - zone_start = max(0, i - excl_zone) - zone_stop = min(w, i + excl_zone) - squared_distance_profile[zone_start : zone_stop + 1] = np.inf - - # only for self-join - mask = squared_distance_profile < P_squared[thread_idx] - P_squared[thread_idx][mask] = squared_distance_profile[mask] - I[thread_idx][mask] = i - - I[thread_idx, i] = np.argmin(squared_distance_profile) - P_squared[thread_idx, i] = squared_distance_profile[I[thread_idx, i]] - if P_squared[thread_idx, i] == np.inf: # pragma: no cover - I[thread_idx, i] = -1 - else: - j = I[thread_idx, i] - # Given the squared distance, work backwards and compute QT - QT_j = (m - P_squared[thread_idx, i] / 2.0) * (Σ_T[j] * σ_Q[i]) + ( - m * M_T[j] * μ_Q[i] + core._apply_exclusion_zone(squared_distance_profile, i, excl_zone, np.inf) + + nn_i = np.argmin(squared_distance_profile) + if ( + squared_distance_profile[nn_i] < P_squared[thread_idx, i, -1] + and nn_i not in I[thread_idx, i] + ): + idx = np.searchsorted( + P_squared[thread_idx, i], + squared_distance_profile[nn_i], + side="right", + ) + core._shift_insert_at_index( + P_squared[thread_idx, i], idx, squared_distance_profile[nn_i] + ) + core._shift_insert_at_index(I[thread_idx, i], idx, nn_i) + + if P_squared[thread_idx, i, 0] == np.inf: # pragma: no cover + I[thread_idx, i, 0] = -1 + continue + + j = nn_i + # Given the squared distance, work backwards and compute QT + QT_j = (m - P_squared[thread_idx, i, 0] / 2.0) * (Σ_T[j] * σ_Q[i]) + ( + m * M_T[j] * μ_Q[i] + ) + QT_j_prime = QT_j + # Update top-k for both subsequences `S[i+g] = T[i+g:i+g+m]`` and + # `S[j+g] = T[j+g:j+g+m]` (i.e., the right neighbors of `T[i : i+m]` and + # `T[j:j+m]`) by using the distance between `S[i+g]` and `S[j+g]` + for g in range(1, min(s, l - i, w - j)): + QT_j = ( + QT_j + - T_B[j + g - 1] * T_A[i + g - 1] + + T_B[j + g + m - 1] * T_A[i + g + m - 1] + ) + D_squared = core._calculate_squared_distance( + m, + QT_j, + M_T[j + g], + Σ_T[j + g], + μ_Q[i + g], + σ_Q[i + g], ) - QT_j_prime = QT_j - for k in range(1, min(s, l - i, w - j)): - QT_j = ( - QT_j - - T_B[j + k - 1] * T_A[i + k - 1] - + T_B[j + k + m - 1] * T_A[i + k + m - 1] + if ( + D_squared < P_squared[thread_idx, i + g, -1] + and (j + g) not in I[thread_idx, i + g] + ): + idx = np.searchsorted( + P_squared[thread_idx, i + g], D_squared, side="right" ) - D_squared = core._calculate_squared_distance( - m, - QT_j, - M_T[j + k], - Σ_T[j + k], - μ_Q[i + k], - σ_Q[i + k], + core._shift_insert_at_index( + P_squared[thread_idx, i + g], idx, D_squared ) - if D_squared < P_squared[thread_idx, i + k]: - P_squared[thread_idx, i + k] = D_squared - I[thread_idx, i + k] = j + k - if excl_zone is not None and D_squared < P_squared[thread_idx, j + k]: - P_squared[thread_idx, j + k] = D_squared - I[thread_idx, j + k] = i + k - QT_j = QT_j_prime - for k in range(1, min(s, i + 1, j + 1)): - QT_j = QT_j - T_B[j - k + m] * T_A[i - k + m] + T_B[j - k] * T_A[i - k] - D_squared = core._calculate_squared_distance( - m, - QT_j, - M_T[j - k], - Σ_T[j - k], - μ_Q[i - k], - σ_Q[i - k], + core._shift_insert_at_index(I[thread_idx, i + g], idx, j + g) + + if ( + excl_zone is not None + and D_squared < P_squared[thread_idx, j + g, -1] + and (i + g) not in I[thread_idx, j + g] + ): + idx = np.searchsorted( + P_squared[thread_idx, j + g], D_squared, side="right" ) - if D_squared < P_squared[thread_idx, i - k]: - P_squared[thread_idx, i - k] = D_squared - I[thread_idx, i - k] = j - k - if excl_zone is not None and D_squared < P_squared[thread_idx, j - k]: - P_squared[thread_idx, j - k] = D_squared - I[thread_idx, j - k] = i - k + core._shift_insert_at_index( + P_squared[thread_idx, j + g], idx, D_squared + ) + core._shift_insert_at_index(I[thread_idx, j + g], idx, i + g) + + QT_j = QT_j_prime + # Update top-k for both subsequences `S[i-g] = T[i-g:i-g+m]` and + # `S[j-g] = T[j-g:j-g+m]` (i.e., the left neighbors of `T[i : i+m]` and + # `T[j:j+m]`) by using the distance between `S[i-g]` and `S[j-g]` + for g in range(1, min(s, i + 1, j + 1)): + QT_j = QT_j - T_B[j - g + m] * T_A[i - g + m] + T_B[j - g] * T_A[i - g] + D_squared = core._calculate_squared_distance( + m, + QT_j, + M_T[j - g], + Σ_T[j - g], + μ_Q[i - g], + σ_Q[i - g], + ) + if ( + D_squared < P_squared[thread_idx, i - g, -1] + and (j - g) not in I[thread_idx, i - g] + ): + idx = np.searchsorted( + P_squared[thread_idx, i - g], D_squared, side="right" + ) + core._shift_insert_at_index( + P_squared[thread_idx, i - g], idx, D_squared + ) + core._shift_insert_at_index(I[thread_idx, i - g], idx, j - g) + + if ( + excl_zone is not None + and D_squared < P_squared[thread_idx, j - g, -1] + and (i - g) not in I[thread_idx, j - g] + ): + idx = np.searchsorted( + P_squared[thread_idx, j - g], D_squared, side="right" + ) + core._shift_insert_at_index( + P_squared[thread_idx, j - g], idx, D_squared + ) + core._shift_insert_at_index(I[thread_idx, j - g], idx, i - g) + + # In the case of a self-join, the calculated distance profile can also be + # used to refine the top-k for all non-trivial subsequences + if excl_zone is not None: + # Note that the squared distance, `squared_distance_profile[j]`, + # between subsequences `S_i = T[i : i + m]` and `S_j = T[j : j + m]` + # can be used to update the top-k for BOTH subsequence `i` and + # subsequence `j`. We update the latter here. + + indices = np.flatnonzero( + squared_distance_profile < P_squared[thread_idx, :, -1] + ) + for j in indices: + if i not in I[thread_idx, j]: + idx = np.searchsorted( + P_squared[thread_idx, j], + squared_distance_profile[j], + side="right", + ) + core._shift_insert_at_index( + P_squared[thread_idx, j], idx, squared_distance_profile[j] + ) + core._shift_insert_at_index(I[thread_idx, j], idx, i) @njit( @@ -183,6 +338,7 @@ def _prescrump( indices, s, excl_zone=None, + k=1, ): """ A Numba JIT-compiled implementation of the preSCRIMP algorithm. @@ -200,16 +356,17 @@ def _prescrump( Window size μ_Q : numpy.ndarray - Sliding window mean for T_A + Sliding window mean for `T_A` σ_Q : numpy.ndarray - Sliding window standard deviation for T_A + Sliding window standard deviation for `T_A` M_T : numpy.ndarray - Sliding window mean for T_B + Sliding window mean for `T_B` Σ_T : numpy.ndarray - Sliding window standard deviation for T_B + Sliding window standard deviation for `T_B` + indices : numpy.ndarray The subsequence indices to compute `prescrump` for @@ -231,13 +388,23 @@ def _prescrump( excl_zone : int The half width for the exclusion zone relative to the `i`. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Returns ------- out1 : numpy.ndarray - Matrix profile + The (top-k) matrix profile. When k=1 (default), the first (and only) column + in this 2D array consists of the matrix profile. When k > 1, the output + has exactly `k` columns consisting of the top-k matrix profile. out2 : numpy.ndarray - Matrix profile indices + The (top-k) matrix profile indices. When k=1 (default), the first (and only) + column in this 2D array consists of the matrix profile indices. When k > 1, + the output has exactly `k` columns consisting of the top-k matrix profile + indices. Notes ----- @@ -248,8 +415,8 @@ def _prescrump( """ n_threads = numba.config.NUMBA_NUM_THREADS l = T_A.shape[0] - m + 1 - P_squared = np.full((n_threads, l), np.inf, dtype=np.float64) - I = np.full((n_threads, l), -1, dtype=np.int64) + P_squared = np.full((n_threads, l, k), np.inf, dtype=np.float64) + I = np.full((n_threads, l, k), -1, dtype=np.int64) idx_ranges = core._get_ranges(len(indices), n_threads, truncate=False) for thread_idx in prange(n_threads): @@ -269,23 +436,21 @@ def _prescrump( P_squared, I, excl_zone, + k, ) for thread_idx in range(1, n_threads): - for i in range(l): - if P_squared[thread_idx, i] < P_squared[0, i]: - P_squared[0, i] = P_squared[thread_idx, i] - I[0, i] = I[thread_idx, i] + core._merge_topk_PI(P_squared[0], P_squared[thread_idx], I[0], I[thread_idx]) return np.sqrt(P_squared[0]), I[0] @core.non_normalized(scraamp.prescraamp) -def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0): +def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0, k=1): """ - A convenience wrapper around the Numba JIT-compiled parallelized `_prescrump` - function which computes the approximate matrix profile according to the preSCRIMP - algorithm + A convenience wrapper around the Numba JIT-compiled parallelized + `_prescrump` function which computes the approximate (top-k) matrix + profile according to the preSCRIMP algorithm. Parameters ---------- @@ -312,13 +477,23 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0): The p-norm to apply for computing the Minkowski distance. This parameter is ignored when `normalize == True`. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Returns ------- P : numpy.ndarray - Matrix profile + The (top-k) matrix profile. When k = 1 (default), this is a 1D array + consisting of the matrix profile. When k > 1, the output is a 2D array that + has exactly `k` columns consisting of the top-k matrix profile. I : numpy.ndarray - Matrix profile indices + The (top-k) matrix profile indices. When k = 1 (default), this is a 1D array + consisting of the matrix profile indices. When k > 1, the output is a 2D + array that has exactly `k` columns consisting of the top-k matrix profile + indices. Notes ----- @@ -327,25 +502,10 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0): See Algorithm 2 """ - if T_B is None: - T_B = T_A - excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) - else: - excl_zone = None - - T_A, μ_Q, σ_Q = core.preprocess(T_A, m) - T_B, M_T, Σ_T = core.preprocess(T_B, m) - - n_A = T_A.shape[0] - l = n_A - m + 1 - - if s is None: # pragma: no cover - if excl_zone is not None: # self-join - s = excl_zone - else: # AB-join - s = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) + T_A, T_B, μ_Q, σ_Q, M_T, Σ_T, indices, s, excl_zone = _preprocess_prescrump( + T_A, m, T_B=T_B, s=s + ) - indices = np.random.permutation(range(0, l, s)).astype(np.int64) P, I = _prescrump( T_A, T_B, @@ -357,9 +517,13 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0): indices, s, excl_zone, + k, ) - return P, I + if k == 1: + return P.flatten().astype(np.float64), I.flatten().astype(np.int64) + else: + return P, I @core.non_normalized( @@ -413,22 +577,40 @@ class scrump: The p-norm to apply for computing the Minkowski distance. This parameter is ignored when `normalize == True`. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Attributes ---------- P_ : numpy.ndarray - The updated matrix profile + The updated (top-k) matrix profile. When `k=1` (default), this output is + a 1D array consisting of the matrix profile. When `k > 1`, the output + is a 2D array that has exactly `k` columns consisting of the top-k matrix + profile. I_ : numpy.ndarray - The updated matrix profile indices + The updated (top-k) matrix profile indices. When `k=1` (default), this output is + a 1D array consisting of the matrix profile indices. When `k > 1`, the output + is a 2D array that has exactly `k` columns consisting of the top-k matrix + profile indiecs. + + left_I_ : numpy.ndarray + The updated left (top-1) matrix profile indices + + right_I_ : numpy.ndarray + The updated right (top-1) matrix profile indices + Methods ------- update() Update the matrix profile and the matrix profile indices by computing additional new distances (limited by `percentage`) that make up the full - distance matrix. Each output contains three columns that correspond to - the matrix profile, the left matrix profile, and the right matrix profile, - respectively. + distance matrix. It updates the (top-k) matrix profile, (top-1) left + matrix profile, (top-1) right matrix profile, (top-k) matrix profile indices, + (top-1) left matrix profile indices, and (top-1) right matrix profile indices. See Also -------- @@ -476,6 +658,7 @@ def __init__( s=None, normalize=True, p=2.0, + k=1, ): """ Initialize the `scrump` object @@ -518,6 +701,11 @@ def __init__( p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. This parameter is ignored when `normalize == True`. + + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix + profile. Note that this will increase the total computational time and + memory usage when k > 1. """ self._ignore_trivial = ignore_trivial @@ -574,11 +762,15 @@ def __init__( self._n_A = self._T_A.shape[0] self._n_B = self._T_B.shape[0] self._l = self._n_A - self._m + 1 + self._k = k - self._P = np.empty((self._l, 3), dtype=np.float64) - self._I = np.empty((self._l, 3), dtype=np.int64) - self._P[:, :] = np.inf - self._I[:, :] = -1 + self._P = np.full((self._l, self._k), np.inf, dtype=np.float64) + self._PL = np.full(self._l, np.inf, dtype=np.float64) + self._PR = np.full(self._l, np.inf, dtype=np.float64) + + self._I = np.full((self._l, self._k), -1, dtype=np.int64) + self._IL = np.full(self._l, -1, dtype=np.int64) + self._IR = np.full(self._l, -1, dtype=np.int64) self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM)) if s is None: @@ -589,13 +781,32 @@ def __init__( if pre_scrump: if self._ignore_trivial: - P, I = prescrump(T_A, m, s=s) + ( + T_A, + T_B, + μ_Q, + σ_Q, + M_T, + Σ_T, + indices, + s, + excl_zone, + ) = _preprocess_prescrump(T_A, m, s=s) else: - P, I = prescrump(T_A, m, T_B=T_B, s=s) - for i in range(P.shape[0]): - if self._P[i, 0] > P[i]: - self._P[i, 0] = P[i] - self._I[i, 0] = I[i] + ( + T_A, + T_B, + μ_Q, + σ_Q, + M_T, + Σ_T, + indices, + s, + excl_zone, + ) = _preprocess_prescrump(T_A, m, T_B=T_B, s=s) + + P, I = _prescrump(T_A, T_B, m, μ_Q, σ_Q, M_T, Σ_T, indices, s, excl_zone, k) + core._merge_topk_PI(self._P, P, self._I, I) if self._ignore_trivial: self._diags = np.random.permutation( @@ -626,14 +837,14 @@ def __init__( def update(self): """ - Update the matrix profile and the matrix profile indices by computing - additional new distances (limited by `percentage`) that make up the full - distance matrix. + Update the (top-k) matrix profile and the (top-k) matrix profile indices by + computing additional new distances (limited by `percentage`) that make up + the full distance matrix. """ if self._chunk_idx < self._n_chunks: start_idx, stop_idx = self._chunk_diags_ranges[self._chunk_idx] - P, I = _stump( + P, PL, PR, I, IL, IR = _stump( self._T_A, self._T_B, self._m, @@ -649,48 +860,60 @@ def update(self): self._T_B_subseq_isconstant, self._diags[start_idx:stop_idx], self._ignore_trivial, + self._k, ) - # Update matrix profile and indices - for i in range(self._P.shape[0]): - if self._P[i, 0] > P[i, 0]: - self._P[i, 0] = P[i, 0] - self._I[i, 0] = I[i, 0] - # left matrix profile and left matrix profile indices - if self._P[i, 1] > P[i, 1]: - self._P[i, 1] = P[i, 1] - self._I[i, 1] = I[i, 1] - # right matrix profile and right matrix profile indices - if self._P[i, 2] > P[i, 2]: - self._P[i, 2] = P[i, 2] - self._I[i, 2] = I[i, 2] + # Update (top-k) matrix profile and indices + core._merge_topk_PI(self._P, P, self._I, I) + + # update left matrix profile and indices + mask = PL < self._PL + self._PL[mask] = PL[mask] + self._IL[mask] = IL[mask] + + # update right matrix profile and indices + mask = PR < self._PR + self._PR[mask] = PR[mask] + self._IR[mask] = IR[mask] self._chunk_idx += 1 @property def P_(self): """ - Get the updated matrix profile + Get the updated (top-k) matrix profile. When `k=1` (default), this output + is a 1D array consisting of the updated matrix profile. When `k > 1`, the + output is a 2D array that has exactly `k` columns consisting of the updated + top-k matrix profile. """ - return self._P[:, 0].astype(np.float64) + if self._k == 1: + return self._P.flatten().astype(np.float64) + else: + return self._P.astype(np.float64) @property def I_(self): """ - Get the updated matrix profile indices + Get the updated (top-k) matrix profile indices. When `k=1` (default), this + output is a 1D array consisting of the updated matrix profile indices. When + `k > 1`, the output is a 2D array that has exactly `k` columns consisting + of the updated top-k matrix profile indices. """ - return self._I[:, 0].astype(np.int64) + if self._k == 1: + return self._I.flatten().astype(np.int64) + else: + return self._I.astype(np.int64) @property def left_I_(self): """ - Get the updated left matrix profile indices + Get the updated left (top-1) matrix profile indices """ - return self._I[:, 1].astype(np.int64) + return self._IL.astype(np.int64) @property def right_I_(self): """ - Get the updated right matrix profile indices + Get the updated right (top-1) matrix profile indices """ - return self._I[:, 2].astype(np.int64) + return self._IR.astype(np.int64) diff --git a/stumpy/stimp.py b/stumpy/stimp.py index 1c285f116..c377a69aa 100644 --- a/stumpy/stimp.py +++ b/stumpy/stimp.py @@ -214,6 +214,7 @@ def update(self): ignore_trivial=True, percentage=self._percentage, pre_scrump=self._pre_scrump, + k=1, ) approx.update() self._PAN[ diff --git a/stumpy/stump.py b/stumpy/stump.py index fed857649..49657370a 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -40,13 +40,18 @@ def _compute_diagonal( diags_stop_idx, thread_idx, ρ, + ρL, + ρR, I, + IL, + IR, ignore_trivial, + k, ): """ - Compute (Numba JIT-compiled) and update the Pearson correlation, ρ, and I - sequentially along individual diagonals using a single thread and avoiding race - conditions + Compute (Numba JIT-compiled) and update the (top-k) Pearson correlation (ρ), + ρL, ρR, I, IL, and IR sequentially along individual diagonals using a single + thread and avoiding race conditions. Parameters ---------- @@ -116,15 +121,32 @@ def _compute_diagonal( The thread index ρ : numpy.ndarray - The Pearson correlations + The (top-k) Pearson correlations, sorted in ascending order per row + + ρL : numpy.ndarray + The top-1 left Pearson correlations + + ρR : numpy.ndarray + The top-1 right Pearson correlations I : numpy.ndarray - The matrix profile indices + The (top-k) matrix profile indices + + IL : numpy.ndarray + The top-1 left matrix profile indices + + IR : numpy.ndarray + The top-1 right matrix profile indices ignore_trivial : bool Set to `True` if this is a self-join. Otherwise, for AB-join, set this to `False`. Default is `True`. + k : int + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Returns ------- None @@ -155,16 +177,16 @@ def _compute_diagonal( uint64_m = np.uint64(m) for diag_idx in range(diags_start_idx, diags_stop_idx): - k = diags[diag_idx] + g = diags[diag_idx] - if k >= 0: - iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - k)) + if g >= 0: + iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - g)) else: - iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - k)) + iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g)) for i in iter_range: uint64_i = np.uint64(i) - uint64_j = np.uint64(i + k) + uint64_j = np.uint64(i + g) if uint64_i == 0 or uint64_j == 0: cov = ( @@ -196,25 +218,39 @@ def _compute_diagonal( if T_B_subseq_isconstant[uint64_j] and T_A_subseq_isconstant[uint64_i]: pearson = 1.0 + # `ρ[thread_idx, i, :]` is sorted ascendingly and MUST be updated + # when the newly-calculated `pearson` value becomes greater than the + # first (i.e. smallest) element in this array. Note that a higher + # pearson value corresponds to a lower distance. if pearson > ρ[thread_idx, uint64_i, 0]: - ρ[thread_idx, uint64_i, 0] = pearson - I[thread_idx, uint64_i, 0] = uint64_j + idx = np.searchsorted(ρ[thread_idx, uint64_i], pearson) + core._shift_insert_at_index( + ρ[thread_idx, uint64_i], idx, pearson, shift="left" + ) + core._shift_insert_at_index( + I[thread_idx, uint64_i], idx, uint64_j, shift="left" + ) if ignore_trivial: # self-joins only if pearson > ρ[thread_idx, uint64_j, 0]: - ρ[thread_idx, uint64_j, 0] = pearson - I[thread_idx, uint64_j, 0] = uint64_i + idx = np.searchsorted(ρ[thread_idx, uint64_j], pearson) + core._shift_insert_at_index( + ρ[thread_idx, uint64_j], idx, pearson, shift="left" + ) + core._shift_insert_at_index( + I[thread_idx, uint64_j], idx, uint64_i, shift="left" + ) if uint64_i < uint64_j: # left pearson correlation and left matrix profile index - if pearson > ρ[thread_idx, uint64_j, 1]: - ρ[thread_idx, uint64_j, 1] = pearson - I[thread_idx, uint64_j, 1] = uint64_i + if pearson > ρL[thread_idx, uint64_j]: + ρL[thread_idx, uint64_j] = pearson + IL[thread_idx, uint64_j] = uint64_i # right pearson correlation and right matrix profile index - if pearson > ρ[thread_idx, uint64_i, 2]: - ρ[thread_idx, uint64_i, 2] = pearson - I[thread_idx, uint64_i, 2] = uint64_j + if pearson > ρR[thread_idx, uint64_i]: + ρR[thread_idx, uint64_i] = pearson + IR[thread_idx, uint64_i] = uint64_j return @@ -241,11 +277,13 @@ def _stump( T_B_subseq_isconstant, diags, ignore_trivial, + k, ): """ A Numba JIT-compiled version of STOMPopt with Pearson correlations for parallel - computation of the matrix profile, matrix profile indices, left matrix profile - indices, and right matrix profile indices. + computation of the (top-k) matrix profile, the (top-k) matrix profile indices, + the top-1 left matrix profile and its matrix profile index, and the top-1 right + matrix profile and its matrix profile index. Parameters ---------- @@ -300,15 +338,30 @@ def _stump( Set to `True` if this is a self-join. Otherwise, for AB-join, set this to `False`. Default is `True`. + k : int + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Returns ------- profile : numpy.ndarray - Matrix profile + The (top-k) matrix profile indices : numpy.ndarray - The first column consists of the matrix profile indices, the second - column consists of the left matrix profile indices, and the third - column consists of the right matrix profile indices. + The (top-k) matrix profile indices + + left profile : numpy.ndarray + The (top-1) left matrix profile + + left indices : numpy.ndarray + The (top-1) left matrix profile indices + + right profile : numpy.ndarray + The (top-1) right matrix profile + + right indices : numpy.ndarray + The (top-1) right matrix profile indices Notes ----- @@ -359,8 +412,15 @@ def _stump( n_B = T_B.shape[0] l = n_A - m + 1 n_threads = numba.config.NUMBA_NUM_THREADS - ρ = np.full((n_threads, l, 3), -np.inf, dtype=np.float64) - I = np.full((n_threads, l, 3), -1, dtype=np.int64) + + ρ = np.full((n_threads, l, k), np.NINF, dtype=np.float64) + I = np.full((n_threads, l, k), -1, dtype=np.int64) + + ρL = np.full((n_threads, l), np.NINF, dtype=np.float64) + IL = np.full((n_threads, l), -1, dtype=np.int64) + + ρR = np.full((n_threads, l), np.NINF, dtype=np.float64) + IR = np.full((n_threads, l), -1, dtype=np.int64) ndist_counts = core._count_diagonal_ndist(diags, m, n_A, n_B) diags_ranges = core._get_array_ranges(ndist_counts, n_threads, False) @@ -383,7 +443,8 @@ def _stump( cov_d[:] = cov_d - μ_Q_m_1 for thread_idx in prange(n_threads): - # Compute and update cov, I within a single thread to avoiding race conditions + # Compute and update pearson correlations and matrix profile indices + # within a single thread and avoiding race conditions _compute_diagonal( T_A, T_B, @@ -405,47 +466,64 @@ def _stump( diags_ranges[thread_idx, 1], thread_idx, ρ, + ρL, + ρR, I, + IL, + IR, ignore_trivial, + k, ) # Reduction of results from all threads for thread_idx in range(1, n_threads): - for i in prange(l): - if ρ[0, i, 0] < ρ[thread_idx, i, 0]: - ρ[0, i, 0] = ρ[thread_idx, i, 0] - I[0, i, 0] = I[thread_idx, i, 0] - # left pearson correlation and left matrix profile indices - if ρ[0, i, 1] < ρ[thread_idx, i, 1]: - ρ[0, i, 1] = ρ[thread_idx, i, 1] - I[0, i, 1] = I[thread_idx, i, 1] - # right pearson correlation and right matrix profile indices - if ρ[0, i, 2] < ρ[thread_idx, i, 2]: - ρ[0, i, 2] = ρ[thread_idx, i, 2] - I[0, i, 2] = I[thread_idx, i, 2] - - # Convert pearson correlations to distances - p_norm = np.abs(2 * m * (1 - ρ[0, :, :])) + # update top-k arrays + core._merge_topk_ρI(ρ[0], ρ[thread_idx], I[0], I[thread_idx]) + + # update left matrix profile and matrix profile indices + mask = ρL[0] < ρL[thread_idx] + ρL[0][mask] = ρL[thread_idx][mask] + IL[0][mask] = IL[thread_idx][mask] + + # update right matrix profile and matrix profile indices + mask = ρR[0] < ρR[thread_idx] + ρR[0][mask] = ρR[thread_idx][mask] + IR[0][mask] = IR[thread_idx][mask] + + # Reverse top-k rho (and its associated I) to be in descending order and + # then convert from Pearson correlations to Euclidean distances (ascending order) + p_norm = np.abs(2 * m * (1 - ρ[0, :, ::-1])) + I = I[0, :, ::-1] + + p_norm_L = np.abs(2 * m * (1 - ρL[0, :])) + p_norm_R = np.abs(2 * m * (1 - ρR[0, :])) + for i in prange(p_norm.shape[0]): - if p_norm[i, 0] < config.STUMPY_P_NORM_THRESHOLD: - p_norm[i, 0] = 0.0 - if p_norm[i, 1] < config.STUMPY_P_NORM_THRESHOLD: - p_norm[i, 1] = 0.0 - if p_norm[i, 2] < config.STUMPY_P_NORM_THRESHOLD: - p_norm[i, 2] = 0.0 + for j in range(p_norm.shape[1]): + if p_norm[i, j] < config.STUMPY_P_NORM_THRESHOLD: + p_norm[i, j] = 0.0 + + if p_norm_L[i] < config.STUMPY_P_NORM_THRESHOLD: + p_norm_L[i] = 0.0 + + if p_norm_R[i] < config.STUMPY_P_NORM_THRESHOLD: + p_norm_R[i] = 0.0 + P = np.sqrt(p_norm) + PL = np.sqrt(p_norm_L) + PR = np.sqrt(p_norm_R) - return P[:, :], I[0, :, :] + return P, PL, PR, I, IL[0], IR[0] @core.non_normalized(aamp) -def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0): +def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1): """ Compute the z-normalized matrix profile This is a convenience wrapper around the Numba JIT-compiled parallelized - `_stump` function which computes the matrix profile according to STOMPopt with - Pearson correlations. + `_stump` function which computes the (top-k) matrix profile according to + STOMPopt with Pearson correlations. Parameters ---------- @@ -473,13 +551,25 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0): The p-norm to apply for computing the Minkowski distance. This parameter is ignored when `normalize == True`. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. If you have access to a GPU device, then you may be able to + leverage `gpu_stump` for better performance and scalability. + Returns ------- out : numpy.ndarray - The first column consists of the matrix profile, the second column - consists of the matrix profile indices, the third column consists of - the left matrix profile indices, and the fourth column consists of - the right matrix profile indices. + When k = 1 (default), the first column consists of the matrix profile, + the second column consists of the matrix profile indices, the third column + consists of the left matrix profile indices, and the fourth column consists + of the right matrix profile indices. However, when k > 1, the output array + will contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) + consists of the top-k matrix profile, the next set of k columns + (i.e., out[:, k:2k]) consists of the corresponding top-k matrix profile + indices, and the last two columns (i.e., out[:, 2k] and out[:, 2k+1] or, + equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left + matrix profile indices and the top-1 right matrix profile indices, respectively. See Also -------- @@ -593,14 +683,13 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0): l = n_A - m + 1 excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) - out = np.empty((l, 4), dtype=object) if ignore_trivial: diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64) else: diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64) - P, I = _stump( + P, PL, PR, I, IL, IR = _stump( T_A, T_B, m, @@ -616,10 +705,13 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0): T_B_subseq_isconstant, diags, ignore_trivial, + k, ) - out[:, 0] = P[:, 0] - out[:, 1:] = I + out = np.empty((l, 2 * k + 2), dtype=object) # last two columns are to + # store left and right matrix profile indices + out[:, :k] = P + out[:, k:] = np.column_stack((I, IL, IR)) core._check_P(out[:, 0]) diff --git a/stumpy/stumped.py b/stumpy/stumped.py index 696d49080..0b95c5b3a 100644 --- a/stumpy/stumped.py +++ b/stumpy/stumped.py @@ -14,12 +14,14 @@ @core.non_normalized(aamped) -def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0): +def stumped( + dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1 +): """ - Compute the z-normalized matrix profile with a distributed dask cluster + Compute the z-normalized (top-k) matrix profile with a distributed dask cluster This is a highly distributed implementation around the Numba JIT-compiled - parallelized `_stump` function which computes the matrix profile according + parallelized `_stump` function which computes the (top-k) matrix profile according to STOMPopt with Pearson correlations. Parameters @@ -54,13 +56,25 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, The p-norm to apply for computing the Minkowski distance. This parameter is ignored when `normalize == True`. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. If you have access to a GPU device, then you may be able to + leverage `gpu_stump` for better performance and scalability. + Returns ------- out : numpy.ndarray - The first column consists of the matrix profile, the second column - consists of the matrix profile indices, the third column consists of - the left matrix profile indices, and the fourth column consists of - the right matrix profile indices. + When k = 1 (default), the first column consists of the matrix profile, + the second column consists of the matrix profile indices, the third column + consists of the left matrix profile indices, and the fourth column consists + of the right matrix profile indices. However, when k > 1, the output array + will contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) + consists of the top-k matrix profile, the next set of k columns + (i.e., out[:, k:2k]) consists of the corresponding top-k matrix profile + indices, and the last two columns (i.e., out[:, 2k] and out[:, 2k+1] or, + equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left + matrix profile indices and the top-1 right matrix profile indices, respectively. See Also -------- @@ -183,7 +197,6 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, l = n_A - m + 1 excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) - out = np.empty((l, 4), dtype=object) hosts = list(dask_client.ncores().keys()) nworkers = len(hosts) @@ -248,20 +261,31 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, T_B_subseq_isconstant_future, diags_futures[i], ignore_trivial, + k, ) ) results = dask_client.gather(futures) - profile, indices = results[0] + profile, profile_L, profile_R, indices, indices_L, indices_R = results[0] + for i in range(1, len(hosts)): - P, I = results[i] - for col in range(P.shape[1]): # pragma: no cover - cond = P[:, col] < profile[:, col] - profile[:, col] = np.where(cond, P[:, col], profile[:, col]) - indices[:, col] = np.where(cond, I[:, col], indices[:, col]) - - out[:, 0] = profile[:, 0] - out[:, 1:4] = indices + P, PL, PR, I, IL, IR = results[i] + # Update top-k matrix profile and matrix profile indices + core._merge_topk_PI(profile, P, indices, I) + + # Update top-1 left matrix profile and matrix profile index + mask = PL < profile_L + profile_L[mask] = PL[mask] + indices_L[mask] = IL[mask] + + # Update top-1 right matrix profile and matrix profile index + mask = PR < profile_R + profile_R[mask] = PR[mask] + indices_R[mask] = IR[mask] + + out = np.empty((l, 2 * k + 2), dtype=object) + out[:, :k] = profile + out[:, k:] = np.column_stack((indices, indices_L, indices_R)) core._check_P(out[:, 0]) diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py index a3318eaa0..82fac2da8 100644 --- a/stumpy/stumpi.py +++ b/stumpy/stumpi.py @@ -36,19 +36,30 @@ class stumpi: The p-norm to apply for computing the Minkowski distance. This parameter is ignored when `normalize == True`. + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix profile. + Note that this will increase the total computational time and memory usage + when k > 1. + Attributes ---------- P_ : numpy.ndarray - The updated matrix profile for `T` + The updated (top-k) matrix profile for `T`. When `k=1` (default), the first + (and only) column in this 2D array consists of the matrix profile. When + `k > 1`, the output has exactly `k` columns consisting of the top-k matrix + profile. I_ : numpy.ndarray - The updated matrix profile indices for `T` + The updated (top-k) matrix profile indices for `T`. When `k=1` (default), + the first (and only) column in this 2D array consists of the matrix profile + indices. When `k > 1`, the output has exactly `k` columns consisting of the + top-k matrix profile indices. left_P_ : numpy.ndarray - The updated left matrix profile for `T` + The updated left (top-1) matrix profile for `T` left_I_ : numpy.ndarray - The updated left matrix profile indices for `T` + The updated left (top-1) matrix profile indices for `T` T_ : numpy.ndarray The updated time series or sequence for which the matrix profile and matrix @@ -81,7 +92,7 @@ class stumpi: array([-1, 0, 1, 2]) """ - def __init__(self, T, m, egress=True, normalize=True, p=2.0): + def __init__(self, T, m, egress=True, normalize=True, p=2.0, k=1): """ Initialize the `stumpi` object @@ -106,34 +117,41 @@ def __init__(self, T, m, egress=True, normalize=True, p=2.0): p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. This parameter is ignored when `normalize == True`. + + k : int, default 1 + The number of top `k` smallest distances used to construct the matrix + profile. Note that this will increase the total computational time and + memory usage when `k > 1`. """ self._T = core._preprocess(T) core.check_window_size(m, max_size=self._T.shape[-1]) self._m = m + self._k = k + self._n = self._T.shape[0] self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM)) self._T_isfinite = np.isfinite(self._T) self._egress = egress - mp = stump(self._T, self._m) - self._P = mp[:, 0].astype(np.float64) - self._I = mp[:, 1].astype(np.int64) - self._left_I = mp[:, 2].astype(np.int64) - self._left_P = np.empty(self._P.shape, dtype=np.float64) - self._left_P[:] = np.inf + mp = stump(self._T, self._m, k=self._k) + self._P = mp[:, :k].astype(np.float64) + self._I = mp[:, k : 2 * k].astype(np.int64) + + self._left_I = mp[:, 2 * k].astype(np.int64) + self._left_P = np.full_like(self._left_I, np.inf, dtype=np.float64) self._T, self._M_T, self._Σ_T = core.preprocess(self._T, self._m) # Retrieve the left matrix profile values - # Since each matrix profile value is the minimum between the left and right - # matrix profile values, we can save time by re-computing only the left matrix - # profile value when the matrix profile index is equal to the right matrix - # profile index. - mask = self._left_I == self._I - self._left_P[mask] = self._P[mask] + # Since each (top-1) matrix profile value is the minimum between the left + # and right matrix profile values, we can save time by re-computing only + # the left matrix profile value when the (top-1) matrix profile index is + # equal to the right matrix profile index. + mask = self._left_I == self._I[:, 0] + self._left_P[mask] = self._P[mask, 0] # Only re-compute the `i`-th left matrix profile value, `self._left_P[i]`, - # when `self._I[i] != self._left_I[i]` + # when `self._left_I[i] != self._I[i, 0]` for i in np.flatnonzero(self._left_I >= 0 & ~mask): j = self._left_I[i] QT = np.dot(self._T[i : i + self._m], self._T[j : j + self._m]) @@ -156,7 +174,7 @@ def __init__(self, T, m, egress=True, normalize=True, p=2.0): def update(self, t): """ Append a single new data point, `t`, to the existing time series `T` and update - the matrix profile and matrix profile indices. + the (top-k) matrix profile and matrix profile indices. Parameters ---------- @@ -179,8 +197,8 @@ def update(self, t): def _update_egress(self, t): """ - Ingress a new data point, egress the oldest data point, and update the matrix - profile and matrix profile indices + Ingress a new data point, egress the oldest data point, and update the (top-k) + matrix profile and matrix profile indices """ self._n = self._T.shape[0] l = self._n - self._m + 1 - 1 # Subtract 1 due to egress @@ -229,28 +247,37 @@ def _update_egress(self, t): core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf) - update_idx = np.argwhere(D < self._P).flatten() - self._I[update_idx] = D.shape[0] + self._n_appended - 1 # D.shape[0] is base-1 - self._P[update_idx] = D[update_idx] - - I_last = np.argmin(D) - - if np.isinf(D[I_last]): - self._I[-1] = -1 - self._P[-1] = np.inf - else: - self._I[-1] = I_last + self._n_appended - self._P[-1] = D[I_last] - - self._left_I[-1] = I_last + self._n_appended - self._left_P[-1] = D[I_last] + update_idx = np.argwhere(D < self._P[:, -1]).flatten() + for i in update_idx: + idx = np.searchsorted(self._P[i], D[i], side="right") + core._shift_insert_at_index(self._P[i], idx, D[i]) + core._shift_insert_at_index( + self._I[i], idx, D.shape[0] + self._n_appended - 1 + ) + # D.shape[0] is base-1 + + # Calculate the (top-k) matrix profile values/indices for the last susequence + # by using its correspondng distance profile `D` + self._P[-1] = np.inf + self._I[-1] = -1 + for i, d in enumerate(D): + if d < self._P[-1, -1]: + idx = np.searchsorted(self._P[-1], d, side="right") + core._shift_insert_at_index(self._P[-1], idx, d) + core._shift_insert_at_index(self._I[-1], idx, i + self._n_appended) + + # All neighbors of the last subsequence are on its left. So, its (top-1) + # matrix profile value/index and its left matrix profile value/index must + # be equal. + self._left_P[-1] = self._P[-1, 0] + self._left_I[-1] = self._I[-1, 0] self._QT[:] = self._QT_new def _update(self, t): """ - Ingress a new data point and update the matrix profile and matrix profile - indices without egressing the oldest data point + Ingress a new data point and update the (top-k) matrix profile and matrix + profile indices without egressing the oldest data point """ n = self._T.shape[0] l = n - self._m + 1 @@ -287,25 +314,30 @@ def _update(self, t): core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf) - update_idx = np.argwhere(D[:l] < self._P[:l]).flatten() - self._I[update_idx] = l - self._P[update_idx] = D[update_idx] - - I_last = np.argmin(D) - if np.isinf(D[I_last]): - I_new = np.append(self._I, -1) - P_new = np.append(self._P, np.inf) - else: - I_new = np.append(self._I, I_last) - P_new = np.append(self._P, D[I_last]) - left_I_new = np.append(self._left_I, I_last) - left_P_new = np.append(self._left_P, D[I_last]) + update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten() + for i in update_idx: + idx = np.searchsorted(self._P[i], D[i], side="right") + core._shift_insert_at_index(self._P[i], idx, D[i]) + core._shift_insert_at_index(self._I[i], idx, l) + + # Calculating top-k matrix profile and (top-1) left matrix profile (and thier + # corresponding indices) for new subsequence whose distance profie is `D` + P_new = np.full(self._k, np.inf, dtype=np.float64) + I_new = np.full(self._k, -1, dtype=np.int64) + for i, d in enumerate(D): + if d < P_new[-1]: # maximum value in sorted array P_new + idx = np.searchsorted(P_new, d, side="right") + core._shift_insert_at_index(P_new, idx, d) + core._shift_insert_at_index(I_new, idx, i) + + left_I_new = I_new[0] + left_P_new = P_new[0] self._T = T_new - self._P = P_new - self._I = I_new - self._left_I = left_I_new - self._left_P = left_P_new + self._P = np.append(self._P, P_new.reshape(1, -1), axis=0) + self._I = np.append(self._I, I_new.reshape(1, -1), axis=0) + self._left_P = np.append(self._left_P, left_P_new) + self._left_I = np.append(self._left_I, left_I_new) self._QT = QT_new self._M_T = M_T_new self._Σ_T = Σ_T_new @@ -313,28 +345,40 @@ def _update(self, t): @property def P_(self): """ - Get the matrix profile + Get the (top-k) matrix profile. When `k=1` (default), the output is + a 1D array consisting of the matrix profile. When `k > 1`, the + output is a 2D array that has exactly `k` columns and it consists of the + top-k matrix profile. """ - return self._P.astype(np.float64) + if self._k == 1: + return self._P.flatten().astype(np.float64) + else: + return self._P.astype(np.float64) @property def I_(self): """ - Get the matrix profile indices + Get the (top-k) matrix profile indices. When `k=1` (default), the output is + a 1D array consisting of the matrix profile indices. When `k > 1`, the + output is a 2D array that has exactly `k` columns and it consists of the + top-k matrix profile indices. """ - return self._I.astype(np.int64) + if self._k == 1: + return self._I.flatten().astype(np.int64) + else: + return self._I.astype(np.int64) @property def left_P_(self): """ - Get the left matrix profile + Get the (top-1) left matrix profile """ return self._left_P.astype(np.float64) @property def left_I_(self): """ - Get the left matrix profile indices + Get the (top-1) left matrix profile indices """ return self._left_I.astype(np.int64) diff --git a/test.sh b/test.sh index bf7b4c263..78f0d8f84 100755 --- a/test.sh +++ b/test.sh @@ -34,7 +34,7 @@ done check_errs() { # Function. Parameter 1 is the return code - if [[ $1 -ne "0" ]]; then + if [[ $1 -ne "0" && $1 -ne "5" ]]; then echo "Error: pytest encountered exit code $1" # as a bonus, make our script exit with the right error code. exit $1 @@ -121,6 +121,7 @@ test_unit() { echo "Testing Numba JIT Compiled Functions" pytest -rsx -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_stump.py + check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_core.py check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_config.py @@ -136,6 +137,7 @@ test_unit() pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_ostinato.py check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_ostinato.py + check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_mpdist.py check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_motifs.py @@ -143,13 +145,16 @@ test_unit() pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_mmotifs.py check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_mpdist.py + check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_snippets.py check_errs $? pytest -rsx -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_stimp.py + check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stimp.py check_errs $? # aamp pytest -rsx -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_aamp.py + check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamp.py tests/test_maamp.py tests/test_scraamp.py tests/test_aampi.py check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_scraamp.py @@ -161,6 +166,7 @@ test_unit() pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamp_ostinato.py check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_aamp_ostinato.py + check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aampdist.py check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamp_motifs.py @@ -168,9 +174,11 @@ test_unit() pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamp_mmotifs.py check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_aampdist.py + check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aampdist_snippets.py check_errs $? pytest -rsx -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_aamp_stimp.py + check_errs $? pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamp_stimp.py check_errs $? pytest -x -W ignore::DeprecationWarning tests/test_non_normalized_decorator.py diff --git a/tests/naive.py b/tests/naive.py index cc4b0cc80..a58069bba 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -6,7 +6,7 @@ from stumpy import core, config -def z_norm(a, axis=0, threshold=1e-7): +def z_norm(a, axis=0, threshold=config.STUMPY_STDDEV_THRESHOLD): std = np.std(a, axis, keepdims=True) std[np.less(std, threshold, where=~np.isnan(std))] = 1.0 @@ -156,11 +156,22 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None): # pragma: no cover return result -def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False): +def searchsorted_right(a, v): """ - Traverse distance matrix diagonally and update the matrix profile and - matrix profile indices if the parameter `row_wise` is set to `False`. - If the parameter `row_wise` is set to `True`, it is a row-wise traversal. + Naive version of numpy.searchsorted(..., side='right') + """ + indices = np.flatnonzero(v < a) + if len(indices): + return indices.min() + else: # pragma: no cover + return len(a) + + +def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1): + """ + Traverse distance matrix diagonally and update the top-k matrix profile and + matrix profile indices if the parameter `row_wise` is set to `False`. If the + parameter `row_wise` is set to `True`, it is a row-wise traversal. """ if T_B is None: # self-join: ignore_trivial = True @@ -182,35 +193,35 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False): if exclusion_zone is None: exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)) - P = np.full((l, 3), np.inf) - I = np.full((l, 3), -1, dtype=np.int64) + P = np.full((l, k + 2), np.inf, dtype=np.float64) + I = np.full((l, k + 2), -1, dtype=np.int64) # two more columns are to store + # ... left and right top-1 matrix profile indices if row_wise: # row-wise traversal in distance matrix if ignore_trivial: # self-join for i in range(l): apply_exclusion_zone(distance_matrix[i], i, exclusion_zone, np.inf) - for i, D in enumerate(distance_matrix): + for i, D in enumerate(distance_matrix): # D: distance profile # self-join / AB-join: matrix proifle and indices - idx = np.argmin(D) - P[i, 0] = D[idx] - if P[i, 0] == np.inf: - idx = -1 - I[i, 0] = idx + indices = np.argsort(D)[:k] + P[i, :k] = D[indices] + indices[P[i, :k] == np.inf] = -1 + I[i, :k] = indices - # self-join: left matrix profile + # self-join: left matrix profile index (top-1) if ignore_trivial and i > 0: IL = np.argmin(D[:i]) if D[IL] == np.inf: IL = -1 - I[i, 1] = IL + I[i, k] = IL - # self-join: right matrix profile + # self-join: right matrix profile index (top-1) if ignore_trivial and i < D.shape[0]: - IR = i + np.argmin(D[i:]) # shift argmin by `i` to get true index + IR = i + np.argmin(D[i:]) # offset by `i` to get true index if D[IR] == np.inf: IR = -1 - I[i, 2] = IR + I[i, k + 1] = IR else: # diagonal traversal if ignore_trivial: @@ -218,37 +229,40 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False): else: diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1) - for k in diags: - if k >= 0: - iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - k)) + for g in diags: + if g >= 0: + iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - g)) else: - iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - k)) + iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g)) for i in iter_range: - D = distance_matrix[i, i + k] - if D < P[i, 0]: - P[i, 0] = D - I[i, 0] = i + k + d = distance_matrix[i, i + g] + if d < P[i, k - 1]: + idx = searchsorted_right(P[i], d) + # to keep the top-k, we must discard the last element. + P[i, :k] = np.insert(P[i, :k], idx, d)[:-1] + I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1] if ignore_trivial: # Self-joins only - if D < P[i + k, 0]: - P[i + k, 0] = D - I[i + k, 0] = i + if d < P[i + g, k - 1]: + idx = searchsorted_right(P[i + g], d) + P[i + g, :k] = np.insert(P[i + g, :k], idx, d)[:-1] + I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1] - if i < i + k: + if i < i + g: # Left matrix profile and left matrix profile index - if D < P[i + k, 1]: - P[i + k, 1] = D - I[i + k, 1] = i + if d < P[i + g, k]: + P[i + g, k] = d + I[i + g, k] = i - if D < P[i, 2]: + if d < P[i, k + 1]: # right matrix profile and right matrix profile index - P[i, 2] = D - I[i, 2] = i + k + P[i, k + 1] = d + I[i, k + 1] = i + g - result = np.empty((l, 4), dtype=object) - result[:, 0] = P[:, 0] - result[:, 1:4] = I[:, :] + result = np.empty((l, 2 * k + 2), dtype=object) + result[:, :k] = P[:, :k] + result[:, k:] = I[:, :] return result @@ -705,11 +719,13 @@ def __init__(self, T, m, excl_zone=None, p=2.0): self._T_isfinite = np.isfinite(self._T) self._m = m self._p = p - if excl_zone is None: + + self._excl_zone = excl_zone + if self._excl_zone is None: self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM)) self._l = self._T.shape[0] - m + 1 - mp = aamp(T, m, p=p) + mp = aamp(T, m, exclusion_zone=self._excl_zone, p=p) self.P_ = mp[:, 0] self.I_ = mp[:, 1].astype(np.int64) self.left_P_ = np.full(self.P_.shape, np.inf) @@ -772,24 +788,31 @@ def update(self, t): class stumpi_egress(object): - def __init__(self, T, m, excl_zone=None): + def __init__(self, T, m, excl_zone=None, k=1): self._T = np.asarray(T) self._T = self._T.copy() self._T_isfinite = np.isfinite(self._T) self._m = m - if excl_zone is None: + self._k = k + + self._excl_zone = excl_zone + if self._excl_zone is None: self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM)) self._l = self._T.shape[0] - m + 1 - mp = stump(T, m) - self.P_ = mp[:, 0] - self.I_ = mp[:, 1].astype(np.int64) - self.left_P_ = np.full(self.P_.shape, np.inf) - self.left_I_ = mp[:, 2].astype(np.int64) - for i, j in enumerate(self.left_I_): - if j >= 0: - D = core.mass(self._T[i : i + self._m], self._T[j : j + self._m]) - self.left_P_[i] = D[0] + mp = stump(T, m, exclusion_zone=self._excl_zone, k=self._k) + self._P = mp[:, :k].astype(np.float64) + self._I = mp[:, k : 2 * k].astype(np.int64) + + self._left_I = mp[:, 2 * k].astype(np.int64) + self._left_P = np.full_like(self._left_I, np.inf, dtype=np.float64) + + for idx, nn_idx in enumerate(self._left_I): + if nn_idx >= 0: + D = distance_profile( + self._T[idx : idx + self._m], self._T[nn_idx : nn_idx + self._m], m + ) + self._left_P[idx] = D[0] self._n_appended = 0 @@ -804,10 +827,10 @@ def update(self, t): self._T[-1] = 0 self._n_appended += 1 - self.P_[:] = np.roll(self.P_, -1) - self.I_[:] = np.roll(self.I_, -1) - self.left_P_[:] = np.roll(self.left_P_, -1) - self.left_I_[:] = np.roll(self.left_I_, -1) + self._P = np.roll(self._P, -1, axis=0) + self._I = np.roll(self._I, -1, axis=0) + self._left_P[:] = np.roll(self._left_P, -1) + self._left_I[:] = np.roll(self._left_I, -1) D = core.mass(self._T[-self._m :], self._T) T_subseq_isfinite = np.all( @@ -818,22 +841,47 @@ def update(self, t): D[:] = np.inf apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf) + # update top-k matrix profile using newly calculated distance profile `D` for j in range(D.shape[0]): - if D[j] < self.P_[j]: - self.I_[j] = D.shape[0] - 1 + self._n_appended - self.P_[j] = D[j] - - I_last = np.argmin(D) + if D[j] < self._P[j, -1]: + pos = np.searchsorted(self._P[j], D[j], side="right") + self._P[j] = np.insert(self._P[j], pos, D[j])[:-1] + self._I[j] = np.insert( + self._I[j], pos, D.shape[0] - 1 + self._n_appended + )[:-1] + + # update top-k for the last, newly-updated index + I_last_topk = np.argsort(D, kind="mergesort")[: self._k] + self._P[-1] = D[I_last_topk] + self._I[-1] = I_last_topk + self._n_appended + self._I[-1][self._P[-1] == np.inf] = -1 + + # for the last index, the left matrix profile value is self.P_[-1, 0] + # and the same goes for the left matrix profile index + self._left_P[-1] = self._P[-1, 0] + self._left_I[-1] = self._I[-1, 0] + + @property + def P_(self): + if self._k == 1: + return self._P.flatten().astype(np.float64) + else: + return self._P.astype(np.float64) - if np.isinf(D[I_last]): - self.I_[-1] = -1 - self.P_[-1] = np.inf + @property + def I_(self): + if self._k == 1: + return self._I.flatten().astype(np.int64) else: - self.I_[-1] = I_last + self._n_appended - self.P_[-1] = D[I_last] + return self._I.astype(np.int64) - self.left_I_[-1] = I_last + self._n_appended - self.left_P_[-1] = D[I_last] + @property + def left_P_(self): + return self._left_P.astype(np.float64) + + @property + def left_I_(self): + return self._left_I.astype(np.int64) def across_series_nearest_neighbors(Ts, Ts_idx, subseq_idx, m): @@ -1388,55 +1436,81 @@ def aampdist_snippets( ) -def prescrump(T_A, m, T_B, s, exclusion_zone=None): +def prescrump(T_A, m, T_B, s, exclusion_zone=None, k=1): dist_matrix = distance_matrix(T_A, T_B, m) l = T_A.shape[0] - m + 1 # matrix profile length w = T_B.shape[0] - m + 1 # distance profile length - P = np.empty(l) - I = np.empty(l, dtype=np.int64) - P[:] = np.inf - I[:] = -1 + P = np.full((l, k), np.inf, dtype=np.float64) + I = np.full((l, k), -1, dtype=np.int64) for i in np.random.permutation(range(0, l, s)): distance_profile = dist_matrix[i] if exclusion_zone is not None: apply_exclusion_zone(distance_profile, i, exclusion_zone, np.inf) - # only for self-join - mask = distance_profile < P - P[mask] = distance_profile[mask] - I[mask] = i - - I[i] = np.argmin(distance_profile) - P[i] = distance_profile[I[i]] - if P[i] == np.inf: - I[i] = -1 - else: - j = I[i] - for k in range(1, min(s, l - i, w - j)): - d = dist_matrix[i + k, j + k] - if d < P[i + k]: - P[i + k] = d - I[i + k] = j + k - if exclusion_zone is not None and d < P[j + k]: - P[j + k] = d - I[j + k] = i + k + nn_idx = np.argmin(distance_profile) + if distance_profile[nn_idx] < P[i, -1] and nn_idx not in I[i]: + pos = np.searchsorted(P[i], distance_profile[nn_idx], side="right") + P[i] = np.insert(P[i], pos, distance_profile[nn_idx])[:-1] + I[i] = np.insert(I[i], pos, nn_idx)[:-1] + + if P[i, 0] == np.inf: + I[i, 0] = -1 + continue + + j = nn_idx + for g in range(1, min(s, l - i, w - j)): + d = dist_matrix[i + g, j + g] + # Do NOT optimize the `condition` in the following if statement + # and similar ones in this naive function. This is to ensure + # we are avoiding duplicates in each row of I. + if d < P[i + g, -1] and (j + g) not in I[i + g]: + pos = np.searchsorted(P[i + g], d, side="right") + P[i + g] = np.insert(P[i + g], pos, d)[:-1] + I[i + g] = np.insert(I[i + g], pos, j + g)[:-1] + if ( + exclusion_zone is not None + and d < P[j + g, -1] + and (i + g) not in I[j + g] + ): + pos = np.searchsorted(P[j + g], d, side="right") + P[j + g] = np.insert(P[j + g], pos, d)[:-1] + I[j + g] = np.insert(I[j + g], pos, i + g)[:-1] + + for g in range(1, min(s, i + 1, j + 1)): + d = dist_matrix[i - g, j - g] + if d < P[i - g, -1] and (j - g) not in I[i - g]: + pos = np.searchsorted(P[i - g], d, side="right") + P[i - g] = np.insert(P[i - g], pos, d)[:-1] + I[i - g] = np.insert(I[i - g], pos, j - g)[:-1] + if ( + exclusion_zone is not None + and d < P[j - g, -1] + and (i - g) not in I[j - g] + ): + pos = np.searchsorted(P[j - g], d, side="right") + P[j - g] = np.insert(P[j - g], pos, d)[:-1] + I[j - g] = np.insert(I[j - g], pos, i - g)[:-1] + + # In the case of a self-join, the calculated distance profile can also be + # used to refine the top-k for all non-trivial subsequences + if exclusion_zone is not None: + for idx in np.flatnonzero(distance_profile < P[:, -1]): + if i not in I[idx]: + pos = np.searchsorted(P[idx], distance_profile[idx], side="right") + P[idx] = np.insert(P[idx], pos, distance_profile[idx])[:-1] + I[idx] = np.insert(I[idx], pos, i)[:-1] - for k in range(1, min(s, i + 1, j + 1)): - d = dist_matrix[i - k, j - k] - if d < P[i - k]: - P[i - k] = d - I[i - k] = j - k - if exclusion_zone is not None and d < P[j - k]: - P[j - k] = d - I[j - k] = i - k + if k == 1: + P = P.flatten() + I = I.flatten() return P, I -def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s): +def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s, k=1): dist_matrix = distance_matrix(T_A, T_B, m) n_A = T_A.shape[0] @@ -1458,44 +1532,48 @@ def scrump(T_A, m, T_B, percentage, exclusion_zone, pre_scrump, s): diags_ranges_start = diags_ranges[0, 0] diags_ranges_stop = diags_ranges[0, 1] - out = np.full((l, 4), np.inf, dtype=object) - out[:, 1:] = -1 - left_P = np.full(l, np.inf, dtype=np.float64) - right_P = np.full(l, np.inf, dtype=np.float64) + P = np.full((l, k), np.inf, dtype=np.float64) # Topk + PL = np.full(l, np.inf, dtype=np.float64) + PR = np.full(l, np.inf, dtype=np.float64) + + I = np.full((l, k), -1, dtype=np.int64) + IL = np.full(l, -1, dtype=np.int64) + IR = np.full(l, -1, dtype=np.int64) for diag_idx in range(diags_ranges_start, diags_ranges_stop): - k = diags[diag_idx] + g = diags[diag_idx] for i in range(n_A - m + 1): for j in range(n_B - m + 1): - if j - i == k: - if dist_matrix[i, j] < out[i, 0]: - out[i, 0] = dist_matrix[i, j] - out[i, 1] = i + k - - if exclusion_zone is not None and dist_matrix[i, j] < out[i + k, 0]: - out[i + k, 0] = dist_matrix[i, j] - out[i + k, 1] = i + if j - i == g: + d = dist_matrix[i, j] + if d < P[i, -1]: # update TopK of P[i] + idx = searchsorted_right(P[i], d) + if (i + g) not in I[i]: + P[i] = np.insert(P[i], idx, d)[:-1] + I[i] = np.insert(I[i], idx, i + g)[:-1] + + if exclusion_zone is not None and d < P[i + g, -1]: + idx = searchsorted_right(P[i + g], d) + if i not in I[i + g]: + P[i + g] = np.insert(P[i + g], idx, d)[:-1] + I[i + g] = np.insert(I[i + g], idx, i)[:-1] # left matrix profile and left matrix profile indices - if ( - exclusion_zone is not None - and i < i + k - and dist_matrix[i, j] < left_P[i + k] - ): - left_P[i + k] = dist_matrix[i, j] - out[i + k, 2] = i + if exclusion_zone is not None and i < i + g and d < PL[i + g]: + PL[i + g] = d + IL[i + g] = i # right matrix profile and right matrix profile indices - if ( - exclusion_zone is not None - and i + k > i - and dist_matrix[i, j] < right_P[i] - ): - right_P[i] = dist_matrix[i, j] - out[i, 3] = i + k + if exclusion_zone is not None and i + g > i and d < PR[i]: + PR[i] = d + IR[i] = i + g - return out + if k == 1: + P = P.flatten() + I = I.flatten() + + return P, I, IL, IR def prescraamp(T_A, m, T_B, s, exclusion_zone=None, p=2.0): @@ -1759,6 +1837,83 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w return total_ndists +def merge_topk_PI(PA, PB, IA, IB): + if PA.ndim == 1: + for i in range(PA.shape[0]): + if PB[i] < PA[i] and IB[i] != IA[i]: + PA[i] = PB[i] + IA[i] = IB[i] + return + + else: + k = PA.shape[1] + for i in range(PA.shape[0]): + _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i], return_indices=True) + PB[i, overlap_idx_B] = np.inf + IB[i, overlap_idx_B] = -1 + + profile = np.column_stack((PA, PB)) + indices = np.column_stack((IA, IB)) + IDX = np.argsort(profile, axis=1, kind="mergesort") + profile[:, :] = np.take_along_axis(profile, IDX, axis=1) + indices[:, :] = np.take_along_axis(indices, IDX, axis=1) + + PA[:, :] = profile[:, :k] + IA[:, :] = indices[:, :k] + + return + + +def merge_topk_ρI(ρA, ρB, IA, IB): + # This function merges two pearson profiles `ρA` and `ρB`, and updates `ρA` + # and `IA` accordingly. When the inputs are 1D, `ρA[i]` is updated if + # `ρA[i] < ρB[i]` and IA[i] != IB[i]. When the inputs are 2D, each row in + # `ρA` and `ρB` is sorted ascendingly. we want to keep top-k largest values in + # merging row `ρA[i]` and `ρB[i]`. + + # In case of ties between `ρA` and `ρB`, the priority is with `ρA`. In case + # of ties within `ρA, the priority is with an element with greater index. + # Example + # note: the prime symbol is to distinguish two elements with same value + # ρA = [0, 0', 1], and ρB = [0, 1, 1']. + # merging outcome: [1_B, 1'_B, 1_A] + + # Naive Implementation: + # keeping top-k largest with the aforementioned priority rules is the same as + # `merge_topk_PI` but with swapping `ρA` and `ρB` + + # For the same example: + # merging `ρB` and `ρA` ascendingly while choosing `ρB` over `ρA` in case of + # ties: [0_B, 0_A, 0'_A, 1_B, 1'_B, 1_A], and the second half of this array + # is the desribale outcome. + if ρA.ndim == 1: + for i in range(ρA.shape[0]): + if ρB[i] > ρA[i] and IB[i] != IA[i]: + ρA[i] = ρB[i] + IA[i] = IB[i] + return + + else: + k = ρA.shape[1] + for i in range(ρA.shape[0]): + _, _, overlap_idx_B = np.intersect1d(IA[i], IB[i], return_indices=True) + ρB[i, overlap_idx_B] = np.NINF + IB[i, overlap_idx_B] = -1 + + profile = np.column_stack((ρB, ρA)) + indices = np.column_stack((IB, IA)) + + idx = np.argsort(profile, axis=1, kind="mergesort") + profile[:, :] = np.take_along_axis(profile, idx, axis=1) + indices[:, :] = np.take_along_axis(indices, idx, axis=1) + + # keep the last k elements (top-k largest values) + ρA[:, :] = profile[:, k:] + IA[:, :] = indices[:, k:] + + return + + def find_matches(D, excl_zone, max_distance, max_matches=None): if max_matches is None: max_matches = len(D) diff --git a/tests/test_core.py b/tests/test_core.py index 675cd9e27..16d2d0fd2 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1061,6 +1061,242 @@ def test_select_P_ABBA_val_inf(): npt.assert_almost_equal(ref, comp) +def test_merge_topk_PI_without_overlap(): + # This is to test function `core._merge_topk_PI(PA, PB, IA, IB)` when there + # is no overlap between row IA[i] and row IB[i]. + n = 50 + for k in range(1, 6): + PA = np.random.rand(n * k).reshape(n, k) + PA[:, :] = np.sort(PA, axis=1) # sorting each row separately + + PB = np.random.rand(n * k).reshape(n, k) + col_idx = np.random.randint(0, k, size=n) + for i in range(n): # creating ties between values of PA and PB + PB[i, col_idx[i]] = np.random.choice(PA[i], size=1, replace=False) + PB[:, :] = np.sort(PB, axis=1) # sorting each row separately + + IA = np.arange(n * k).reshape(n, k) + IB = IA + n * k + + ref_P = PA.copy() + ref_I = IA.copy() + + comp_P = PA.copy() + comp_I = IA.copy() + + naive.merge_topk_PI(ref_P, PB.copy(), ref_I, IB.copy()) + core._merge_topk_PI(comp_P, PB.copy(), comp_I, IB.copy()) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + + +def test_merge_topk_PI_with_overlap(): + # This is to test function `core._merge_topk_PI(PA, PB, IA, IB)` when there + # is overlap between row IA[i] and row IB[i]. + n = 50 + for k in range(1, 6): + # note: we do not have overlap issue when k is 1. The `k=1` is considered + # for the sake of consistency with the `without-overlap` test function. + PA = np.random.rand(n * k).reshape(n, k) + PB = np.random.rand(n * k).reshape(n, k) + + IA = np.arange(n * k).reshape(n, k) + IB = IA + n * k + + num_overlaps = np.random.randint(1, k + 1, size=n) + for i in range(n): + # create overlaps + col_IDX = np.random.choice(np.arange(k), num_overlaps[i], replace=False) + imprecision = np.random.uniform(low=-1e-06, high=1e-06, size=len(col_IDX)) + PB[i, col_IDX] = PA[i, col_IDX] + imprecision + IB[i, col_IDX] = IA[i, col_IDX] + + # sort each row of PA/PB (and update IA/IB accordingly) + IDX = np.argsort(PA, axis=1) + PA[:, :] = np.take_along_axis(PA, IDX, axis=1) + IA[:, :] = np.take_along_axis(IA, IDX, axis=1) + + IDX = np.argsort(PB, axis=1) + PB[:, :] = np.take_along_axis(PB, IDX, axis=1) + IB[:, :] = np.take_along_axis(IB, IDX, axis=1) + + ref_P = PA.copy() + ref_I = IA.copy() + + comp_P = PA.copy() + comp_I = IA.copy() + + naive.merge_topk_PI(ref_P, PB.copy(), ref_I, IB.copy()) + core._merge_topk_PI(comp_P, PB.copy(), comp_I, IB.copy()) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + + +def test_merge_topk_PI_with_1D_input(): + # including some overlaps randomly + n = 50 + PA = np.random.rand(n) + PB = np.random.rand(n) + + IA = np.arange(n) + IB = IA + n + + n_overlaps = np.random.randint(1, n + 1) + IDX_rows_with_overlaps = np.random.choice(np.arange(n), n_overlaps, replace=False) + imprecision = np.random.uniform(low=-1e-06, high=1e-06, size=n_overlaps) + PB[IDX_rows_with_overlaps] = PA[IDX_rows_with_overlaps] + imprecision + IB[IDX_rows_with_overlaps] = IA[IDX_rows_with_overlaps] + + ref_P = PA.copy() + ref_I = IA.copy() + + comp_P = PA.copy() + comp_I = IA.copy() + + naive.merge_topk_PI(ref_P, PB.copy(), ref_I, IB.copy()) + core._merge_topk_PI(comp_P, PB.copy(), comp_I, IB.copy()) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + + +def test_merge_topk_ρI_without_overlap(): + # This is to test function `core._merge_topk_ρI(ρA, ρB, IA, IB)` when there + # is no overlap between row IA[i] and row IB[i]. + n = 50 + for k in range(1, 6): + ρA = np.random.rand(n * k).reshape(n, k) + ρA[:, :] = np.sort(ρA, axis=1) # sorting each row separately + + ρB = np.random.rand(n * k).reshape(n, k) + col_idx = np.random.randint(0, k, size=n) + for i in range(n): # creating ties between values of PA and PB + ρB[i, col_idx[i]] = np.random.choice(ρA[i], size=1, replace=False) + ρB[:, :] = np.sort(ρB, axis=1) # sorting each row separately + + IA = np.arange(n * k).reshape(n, k) + IB = IA + n * k + + ref_ρ = ρA.copy() + ref_I = IA.copy() + + comp_ρ = ρA.copy() + comp_I = IA.copy() + + naive.merge_topk_ρI(ref_ρ, ρB.copy(), ref_I, IB.copy()) + core._merge_topk_ρI(comp_ρ, ρB.copy(), comp_I, IB.copy()) + + npt.assert_almost_equal(ref_ρ, comp_ρ) + npt.assert_almost_equal(ref_I, comp_I) + + +def test_merge_topk_ρI_with_overlap(): + # This is to test function `core._merge_topk_ρI(ρA, ρB, IA, IB)` when there + # is overlap between row IA[i] and row IB[i]. + n = 50 + for k in range(1, 6): + # note: we do not have overlap issue when k is 1. The `k=1` is considered + # for the sake of consistency with the `without-overlap` test function. + ρA = np.random.rand(n * k).reshape(n, k) + ρB = np.random.rand(n * k).reshape(n, k) + + IA = np.arange(n * k).reshape(n, k) + IB = IA + n * k + + num_overlaps = np.random.randint(1, k + 1, size=n) + for i in range(n): + # create overlaps + col_IDX = np.random.choice(np.arange(k), num_overlaps[i], replace=False) + imprecision = np.random.uniform(low=-1e-06, high=1e-06, size=len(col_IDX)) + ρB[i, col_IDX] = ρA[i, col_IDX] + imprecision + IB[i, col_IDX] = IA[i, col_IDX] + + # sort each row of ρA/ρB (and update IA/IB accordingly) + IDX = np.argsort(ρA, axis=1) + ρA[:, :] = np.take_along_axis(ρA, IDX, axis=1) + IA[:, :] = np.take_along_axis(IA, IDX, axis=1) + + IDX = np.argsort(ρB, axis=1) + ρB[:, :] = np.take_along_axis(ρB, IDX, axis=1) + IB[:, :] = np.take_along_axis(IB, IDX, axis=1) + + ref_ρ = ρA.copy() + ref_I = IA.copy() + + comp_ρ = ρA.copy() + comp_I = IA.copy() + + naive.merge_topk_ρI(ref_ρ, ρB.copy(), ref_I, IB.copy()) + core._merge_topk_ρI(comp_ρ, ρB.copy(), comp_I, IB.copy()) + + npt.assert_almost_equal(ref_ρ, comp_ρ) + npt.assert_almost_equal(ref_I, comp_I) + + +def test_merge_topk_ρI_with_1D_input(): + # including some overlaps randomly + n = 50 + ρA = np.random.rand(n) + ρB = np.random.rand(n) + + IA = np.arange(n) + IB = IA + n + + ref_ρ = ρA.copy() + ref_I = IA.copy() + + comp_ρ = ρA.copy() + comp_I = IA.copy() + + n_overlaps = np.random.randint(1, n + 1) + IDX_rows_with_overlaps = np.random.choice(np.arange(n), n_overlaps, replace=False) + imprecision = np.random.uniform(low=-1e-06, high=1e-06, size=n_overlaps) + ρB[IDX_rows_with_overlaps] = ρA[IDX_rows_with_overlaps] + imprecision + IB[IDX_rows_with_overlaps] = IA[IDX_rows_with_overlaps] + + naive.merge_topk_ρI(ref_ρ, ρB.copy(), ref_I, IB.copy()) + core._merge_topk_ρI(comp_ρ, ρB.copy(), comp_I, IB.copy()) + + npt.assert_almost_equal(ref_ρ, comp_ρ) + npt.assert_almost_equal(ref_I, comp_I) + + +def test_shift_insert_at_index(): + for k in range(1, 6): + a = np.random.rand(k) + ref = np.empty(k, dtype=np.float64) + comp = np.empty(k, dtype=np.float64) + + indices = np.arange(k + 1) + values = np.random.rand(k + 1) + + # test shift = "right" + for (idx, v) in zip(indices, values): + ref[:] = a + comp[:] = a + + ref = np.insert(ref, idx, v)[:-1] + core._shift_insert_at_index( + comp, idx, v, shift="right" + ) # update comp in place + + npt.assert_almost_equal(ref, comp) + + # test shift = "left" + for (idx, v) in zip(indices, values): + ref[:] = a + comp[:] = a + + ref = np.insert(ref, idx, v)[1:] + core._shift_insert_at_index( + comp, idx, v, shift="left" + ) # update comp in place + + npt.assert_almost_equal(ref, comp) + + def test_check_P(): with pytest.raises(ValueError): core._check_P(np.random.rand(10).reshape(2, 5)) diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py index 508b02a56..a6d4e6953 100644 --- a/tests/test_gpu_stump.py +++ b/tests/test_gpu_stump.py @@ -1,10 +1,21 @@ +import math import numpy as np import numpy.testing as npt import pandas as pd -from stumpy import gpu_stump +from stumpy import core, gpu_stump from stumpy import config from numba import cuda +if cuda.is_available(): + from stumpy.gpu_stump import _gpu_searchsorted_left, _gpu_searchsorted_right +else: # pragma: no cover + from stumpy.core import ( + _gpu_searchsorted_left_driver_not_found as _gpu_searchsorted_left, + ) + from stumpy.core import ( + _gpu_searchsorted_right_driver_not_found as _gpu_searchsorted_right, + ) + try: from numba.errors import NumbaPerformanceWarning except ModuleNotFoundError: @@ -39,6 +50,62 @@ def test_gpu_stump_int_input(): gpu_stump(np.arange(10), 5, ignore_trivial=True) +@cuda.jit("(f8[:, :], f8[:], i8[:], i8, b1, i8[:])") +def _gpu_searchsorted_kernel(a, v, bfs, nlevel, is_left, idx): + # A wrapper kernel for calling device function _gpu_searchsorted_left/right. + i = cuda.grid(1) + if i < a.shape[0]: + if is_left: + idx[i] = _gpu_searchsorted_left(a[i], v[i], bfs, nlevel) + else: + idx[i] = _gpu_searchsorted_right(a[i], v[i], bfs, nlevel) + + +@pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning) +def test_gpu_searchsorted(): + n = 3 * config.STUMPY_THREADS_PER_BLOCK + 1 + V = np.empty(n, dtype=np.float64) + + threads_per_block = config.STUMPY_THREADS_PER_BLOCK + blocks_per_grid = math.ceil(n / threads_per_block) + + for k in range(1, 32): + device_bfs = cuda.to_device(core._bfs_indices(k, fill_value=-1)) + nlevel = np.floor(np.log2(k) + 1).astype(np.int64) + + A = np.sort(np.random.rand(n, k), axis=1) + device_A = cuda.to_device(A) + + V[:] = np.random.rand(n) + for i, idx in enumerate(np.random.choice(np.arange(n), size=k, replace=False)): + V[idx] = A[idx, i] # create ties + device_V = cuda.to_device(V) + + is_left = True # test case + ref_IDX = [np.searchsorted(A[i], V[i], side="left") for i in range(n)] + ref_IDX = np.asarray(ref_IDX, dtype=np.int64) + + comp_IDX = np.full(n, -1, dtype=np.int64) + device_comp_IDX = cuda.to_device(comp_IDX) + _gpu_searchsorted_kernel[blocks_per_grid, threads_per_block]( + device_A, device_V, device_bfs, nlevel, is_left, device_comp_IDX + ) + comp_IDX = device_comp_IDX.copy_to_host() + npt.assert_array_equal(ref_IDX, comp_IDX) + + is_left = False # test case + ref_IDX = [np.searchsorted(A[i], V[i], side="right") for i in range(n)] + ref_IDX = np.asarray(ref_IDX, dtype=np.int64) + + comp_IDX = np.full(n, -1, dtype=np.int64) + device_comp_IDX = cuda.to_device(comp_IDX) + _gpu_searchsorted_kernel[blocks_per_grid, threads_per_block]( + device_A, device_V, device_bfs, nlevel, is_left, device_comp_IDX + ) + comp_IDX = device_comp_IDX.copy_to_host() + npt.assert_array_equal(ref_IDX, comp_IDX) + + @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning) @pytest.mark.parametrize("T_A, T_B", test_data) def test_gpu_stump_self_join(T_A, T_B): @@ -350,3 +417,32 @@ def test_gpu_stump_nan_zero_mean_self_join(): naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) + + +@pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning) +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_gpu_stump_self_join_KNN(T_A, T_B): + m = 3 + zone = int(np.ceil(m / 4)) + for k in range(2, 4): + ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True, k=k) + comp_mp = gpu_stump(T_B, m, ignore_trivial=True, k=k) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) + + comp_mp = gpu_stump(pd.Series(T_B), m, ignore_trivial=True, k=k) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) + + +@pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning) +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_gpu_stump_A_B_join_KNN(T_A, T_B): + m = 3 + for k in range(2, 4): + ref_mp = naive.stump(T_A, m, T_B=T_B, row_wise=True, k=k) + comp_mp = gpu_stump(T_A, m, T_B, ignore_trivial=False, k=k) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) diff --git a/tests/test_non_normalized_decorator.py b/tests/test_non_normalized_decorator.py index 2c6447d09..8da9354c8 100644 --- a/tests/test_non_normalized_decorator.py +++ b/tests/test_non_normalized_decorator.py @@ -340,6 +340,7 @@ def test_mmotifs(T, m): npt.assert_almost_equal(ref_distances, cmp_distances) +@pytest.mark.filterwarnings("ignore:All-NaN slice encountered") def test_snippets(): T = np.random.rand(64) m = 10 diff --git a/tests/test_scrump.py b/tests/test_scrump.py index 0e56a4c62..de978c856 100644 --- a/tests/test_scrump.py +++ b/tests/test_scrump.py @@ -107,11 +107,9 @@ def test_scrump_self_join(T_A, T_B, percentages): seed = np.random.randint(100000) np.random.seed(seed) - ref_mp = naive.scrump(T_B, m, T_B, percentage, zone, False, None) - ref_P = ref_mp[:, 0] - ref_I = ref_mp[:, 1] - ref_left_I = ref_mp[:, 2] - ref_right_I = ref_mp[:, 3] + ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump( + T_B, m, T_B, percentage, zone, False, None + ) np.random.seed(seed) approx = scrump( @@ -140,11 +138,9 @@ def test_scrump_A_B_join(T_A, T_B, percentages): seed = np.random.randint(100000) np.random.seed(seed) - ref_mp = naive.scrump(T_A, m, T_B, percentage, None, False, None) - ref_P = ref_mp[:, 0] - ref_I = ref_mp[:, 1] - ref_left_I = ref_mp[:, 2] - ref_right_I = ref_mp[:, 3] + ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump( + T_A, m, T_B, percentage, None, False, None + ) np.random.seed(seed) approx = scrump( @@ -174,11 +170,9 @@ def test_scrump_A_B_join_swap(T_A, T_B, percentages): seed = np.random.randint(100000) np.random.seed(seed) - ref_mp = naive.scrump(T_B, m, T_A, percentage, None, False, None) - ref_P = ref_mp[:, 0] - # ref_I = ref_mp[:, 1] - ref_left_I = ref_mp[:, 2] - ref_right_I = ref_mp[:, 3] + ref_P, _, ref_left_I, ref_right_I = naive.scrump( + T_B, m, T_A, percentage, None, False, None + ) np.random.seed(seed) approx = scrump( @@ -210,11 +204,9 @@ def test_scrump_self_join_larger_window(T_A, T_B, m, percentages): seed = np.random.randint(100000) np.random.seed(seed) - ref_mp = naive.scrump(T_B, m, T_B, percentage, zone, False, None) - ref_P = ref_mp[:, 0] - ref_I = ref_mp[:, 1] - ref_left_I = ref_mp[:, 2] - ref_right_I = ref_mp[:, 3] + ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump( + T_B, m, T_B, percentage, zone, False, None + ) np.random.seed(seed) approx = scrump( @@ -377,16 +369,12 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages): seed = np.random.randint(100000) np.random.seed(seed) - ref_P, ref_I = naive.prescrump(T_B, m, T_B, s=s, exclusion_zone=zone) - ref_mp = naive.scrump(T_B, m, T_B, percentage, zone, True, s) - for i in range(ref_mp.shape[0]): - if ref_P[i] < ref_mp[i, 0]: - ref_mp[i, 0] = ref_P[i] - ref_mp[i, 1] = ref_I[i] - ref_P = ref_mp[:, 0] - ref_I = ref_mp[:, 1] - # ref_left_I = ref_mp[:, 2] - # ref_right_I = ref_mp[:, 3] + ref_P, ref_I = naive.prescrump(T_B, m, T_B, s=s, exclusion_zone=zone, k=1) + ref_P_aux, ref_I_aux, _, _ = naive.scrump( + T_B, m, T_B, percentage, zone, True, s, k=1 + ) + + naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux) np.random.seed(seed) approx = scrump( @@ -395,16 +383,14 @@ def test_scrump_plus_plus_self_join(T_A, T_B, percentages): approx.update() comp_P = approx.P_ comp_I = approx.I_ - # comp_left_I = approx.left_I_ - # comp_right_I = approx.right_I_ naive.replace_inf(ref_P) - naive.replace_inf(comp_I) + naive.replace_inf(comp_P) + ref_P = ref_P + ref_I = ref_I npt.assert_almost_equal(ref_P, comp_P) npt.assert_almost_equal(ref_I, comp_I) - # npt.assert_almost_equal(ref_left_I, comp_left_I) - # npt.assert_almost_equal(ref_right_I, comp_right_I) @pytest.mark.parametrize("T_A, T_B", test_data) @@ -418,16 +404,15 @@ def test_scrump_plus_plus_A_B_join(T_A, T_B, percentages): seed = np.random.randint(100000) np.random.seed(seed) - ref_P, ref_I = naive.prescrump(T_A, m, T_B, s=s) - ref_mp = naive.scrump(T_A, m, T_B, percentage, None, False, None) - for i in range(ref_mp.shape[0]): - if ref_P[i] < ref_mp[i, 0]: - ref_mp[i, 0] = ref_P[i] - ref_mp[i, 1] = ref_I[i] - ref_P = ref_mp[:, 0] - ref_I = ref_mp[:, 1] - ref_left_I = ref_mp[:, 2] - ref_right_I = ref_mp[:, 3] + ref_P, ref_I = naive.prescrump(T_A, m, T_B, s=s, k=1) + + ref_P_aux, ref_I_aux, ref_left_I_aux, ref_right_I_aux = naive.scrump( + T_A, m, T_B, percentage, None, False, None, k=1 + ) + + naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux) + ref_left_I = ref_left_I_aux + ref_right_I = ref_right_I_aux approx = scrump( T_A, @@ -447,6 +432,8 @@ def test_scrump_plus_plus_A_B_join(T_A, T_B, percentages): naive.replace_inf(ref_P) naive.replace_inf(comp_P) + ref_P = ref_P + ref_I = ref_I npt.assert_almost_equal(ref_P, comp_P) npt.assert_almost_equal(ref_I, comp_I) npt.assert_almost_equal(ref_left_I, comp_left_I) @@ -551,11 +538,9 @@ def test_scrump_constant_subsequence_self_join(percentages): seed = np.random.randint(100000) np.random.seed(seed) - ref_mp = naive.scrump(T, m, T, percentage, zone, False, None) - ref_P = ref_mp[:, 0] - ref_I = ref_mp[:, 1] - ref_left_I = ref_mp[:, 2] - ref_right_I = ref_mp[:, 3] + ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump( + T, m, T, percentage, zone, False, None + ) np.random.seed(seed) approx = scrump( @@ -589,11 +574,7 @@ def test_scrump_identical_subsequence_self_join(percentages): seed = np.random.randint(100000) np.random.seed(seed) - ref_mp = naive.scrump(T, m, T, percentage, zone, False, None) - ref_P = ref_mp[:, 0] - # ref_I = ref_mp[:, 1] - # ref_left_I = ref_mp[:, 2] - # ref_right_I = ref_mp[:, 3] + ref_P, _, _, _ = naive.scrump(T, m, T, percentage, zone, False, None) np.random.seed(seed) approx = scrump( @@ -635,11 +616,9 @@ def test_scrump_nan_inf_self_join( seed = np.random.randint(100000) np.random.seed(seed) - ref_mp = naive.scrump(T_B_sub, m, T_B_sub, percentage, zone, False, None) - ref_P = ref_mp[:, 0] - ref_I = ref_mp[:, 1] - ref_left_I = ref_mp[:, 2] - ref_right_I = ref_mp[:, 3] + ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump( + T_B_sub, m, T_B_sub, percentage, zone, False, None + ) np.random.seed(seed) approx = scrump(T_B_sub, m, percentage=percentage, pre_scrump=False) @@ -669,11 +648,9 @@ def test_scrump_nan_zero_mean_self_join(percentages): seed = np.random.randint(100000) np.random.seed(seed) - ref_mp = naive.scrump(T, m, T, percentage, zone, False, None) - ref_P = ref_mp[:, 0] - ref_I = ref_mp[:, 1] - ref_left_I = ref_mp[:, 2] - ref_right_I = ref_mp[:, 3] + ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump( + T, m, T, percentage, zone, False, None + ) np.random.seed(seed) approx = scrump(T, m, percentage=percentage, pre_scrump=False) @@ -708,3 +685,300 @@ def test_prescrump_A_B_join_larger_window(T_A, T_B): npt.assert_almost_equal(ref_P, comp_P) npt.assert_almost_equal(ref_I, comp_I) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_prescrump_self_join_KNN(T_A, T_B): + m = 3 + zone = int(np.ceil(m / 4)) + for k in range(2, 4): + for s in range(1, zone + 1): + seed = np.random.randint(100000) + + np.random.seed(seed) + ref_P, ref_I = naive.prescrump(T_B, m, T_B, s=s, exclusion_zone=zone, k=k) + + np.random.seed(seed) + comp_P, comp_I = prescrump(T_B, m, s=s, k=k) + + npt.assert_almost_equal(ref_I, comp_I) + npt.assert_almost_equal(ref_P, comp_P) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_prescrump_A_B_join_KNN(T_A, T_B): + m = 3 + zone = int(np.ceil(m / 4)) + for k in range(2, 4): + for s in range(1, zone + 1): + seed = np.random.randint(100000) + + np.random.seed(seed) + ref_P, ref_I = naive.prescrump(T_A, m, T_B, s=s) + + np.random.seed(seed) + comp_P, comp_I = prescrump(T_A, m, T_B=T_B, s=s) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +@pytest.mark.parametrize("percentages", percentages) +def test_scrump_self_join_KNN(T_A, T_B, percentages): + m = 3 + zone = int(np.ceil(m / 4)) + + for k in range(2, 4): + for percentage in percentages: + seed = np.random.randint(100000) + + np.random.seed(seed) + ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump( + T_B, m, T_B, percentage, zone, False, None, k=k + ) + + np.random.seed(seed) + approx = scrump( + T_B, + m, + ignore_trivial=True, + percentage=percentage, + pre_scrump=False, + k=k, + ) + approx.update() + comp_P = approx.P_ + comp_I = approx.I_ + comp_left_I = approx.left_I_ + comp_right_I = approx.right_I_ + + naive.replace_inf(ref_P) + naive.replace_inf(comp_P) + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + npt.assert_almost_equal(ref_left_I, comp_left_I) + npt.assert_almost_equal(ref_right_I, comp_right_I) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +@pytest.mark.parametrize("percentages", percentages) +def test_scrump_A_B_join_KNN(T_A, T_B, percentages): + m = 3 + for k in range(2, 4): + for percentage in percentages: + seed = np.random.randint(100000) + + np.random.seed(seed) + ref_P, ref_I, ref_left_I, ref_right_I = naive.scrump( + T_A, m, T_B, percentage, None, False, None, k=k + ) + + np.random.seed(seed) + approx = scrump( + T_A, + m, + T_B, + ignore_trivial=False, + percentage=percentage, + pre_scrump=False, + k=k, + ) + approx.update() + comp_P = approx.P_ + comp_I = approx.I_ + comp_left_I = approx.left_I_ + comp_right_I = approx.right_I_ + + naive.replace_inf(ref_P) + naive.replace_inf(comp_P) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + npt.assert_almost_equal(ref_left_I, comp_left_I) + npt.assert_almost_equal(ref_right_I, comp_right_I) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +@pytest.mark.parametrize("percentages", percentages) +def test_scrump_plus_plus_self_join_KNN(T_A, T_B, percentages): + m = 3 + zone = int(np.ceil(m / 4)) + + for k in range(2, 4): + for s in range(1, zone + 1): + for percentage in percentages: + seed = np.random.randint(100000) + + np.random.seed(seed) + ref_P, ref_I = naive.prescrump( + T_B, m, T_B, s=s, exclusion_zone=zone, k=k + ) + ref_P_aux, ref_I_aux, _, _ = naive.scrump( + T_B, m, T_B, percentage, zone, True, s, k=k + ) + naive.merge_topk_PI(ref_P, ref_P_aux, ref_I, ref_I_aux) + + np.random.seed(seed) + approx = scrump( + T_B, + m, + ignore_trivial=True, + percentage=percentage, + pre_scrump=True, + s=s, + k=k, + ) + approx.update() + comp_P = approx.P_ + comp_I = approx.I_ + + naive.replace_inf(ref_P) + naive.replace_inf(comp_P) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_prescrump_self_join_larger_window_m_5_k_5(T_A, T_B): + m = 5 + k = 5 + zone = int(np.ceil(m / 4)) + + if len(T_B) > m: + for s in range(1, zone + 1): + seed = np.random.randint(100000) + + np.random.seed(seed) + ref_P, ref_I = naive.prescrump(T_B, m, T_B, s=s, exclusion_zone=zone, k=k) + + np.random.seed(seed) + comp_P, comp_I = prescrump(T_B, m, s=s, k=k) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_prescrump_A_B_join_larger_window_m_5_k_5(T_A, T_B): + m = 5 + k = 5 + zone = int(np.ceil(m / 4)) + if len(T_A) > m and len(T_B) > m: + for s in range(1, zone + 1): + seed = np.random.randint(100000) + + np.random.seed(seed) + ref_P, ref_I = naive.prescrump(T_A, m, T_B, s=s, k=k) + + np.random.seed(seed) + comp_P, comp_I = prescrump(T_A, m, T_B, s=s, k=k) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + + +def test_prescrump_self_join_KNN_no_overlap(): + # This test is particularly designed to raise error in a rare case described + # as follows: Let's denote `I[i]` as the array with length `k` that contains + # the start indices of the best-so-far top-k nearest neighbors of `subseq i`, + # (`S_i`). Also, we denote `P[i]` as their corresponding ascendingly-sorted + # distances. Let's denote `d` as the distane betweeen `S_i` and `S_j`. P[i] and + # I[i] must be updated if (1) `j` is not in I[i] and (2) `d` < P[i,-1]. Regarding + # the former condition, one needs to check the whole array I[i]. Checking the + # array I[i, :idx], where `idx = np.searchsorted(P[i], 'd', side='right')` is + # not completly correct and that is due to imprecision in numerical calculation. + # It may happen that `j` is not in `I[i, :idx]`, but it is in fact at `I[i, idx]` + # (or any other position in array I[i]). And, its corresponding distance, i.e + # P[i, idx], is d + 1e-5, for instance. In theory, this should be exactly `d`. + # However, due to imprecision, we may calculated a slightly different value + # for such distance in one of previous iterations in function prescrump. This + # test results in error if someone tries to change the performant code of prescrump + # function and check `I[i, :idx]` rather than the full array `I[i]`. + T = np.array( + [ + -916.64703784, + -327.42056679, + 379.19386284, + -281.80427628, + -189.85401773, + -38.69610569, + 187.89889345, + 578.65862523, + 528.09687811, + -667.42973795, + -285.27749324, + -211.28930925, + -703.93802657, + -820.53780562, + -955.91174663, + 383.65471851, + 932.08809422, + -563.57569746, + 784.0546579, + -343.14886064, + -612.72329848, + -270.09273091, + -448.39346549, + 578.03202014, + 867.15436674, + -783.55167049, + -494.78062922, + -311.18567747, + 522.70052256, + 933.45474094, + 192.34822368, + -162.11374908, + -612.95359279, + -449.62297051, + -351.79138459, + -77.70189101, + -439.46519487, + -660.48431174, + 548.69362177, + 485.36004744, + -535.3566627, + -568.0955257, + 755.26647273, + 736.1079588, + -597.65672557, + 379.3299783, + 731.38211912, + 247.34827447, + 545.41888454, + 644.94300763, + 20.99042666, + 788.19859515, + -898.24325898, + -929.47841134, + -738.45875181, + 66.01030291, + 512.945841, + -44.07720164, + 302.97141464, + -696.95271302, + 662.98385163, + -712.3807531, + -43.62688539, + 74.16927482, + ] + ) + + # test_cases: dict() with `key: value` pair, where key is `(m, k)`, and value + # is a list of random `seeds` + test_cases = { + (3, 2): [4279, 9133, 8190], + (3, 5): [1267, 4016, 4046], + (5, 2): [6327, 4926, 3712], + (5, 5): [3032, 3032, 8117], + } + for (m, k), specified_seeds in test_cases.items(): + zone = int(np.ceil(m / 4)) + for seed in specified_seeds: + np.random.seed(seed) + ref_P, ref_I = naive.prescrump(T, m, T, s=1, exclusion_zone=zone, k=k) + comp_P, comp_I = prescrump(T, m, s=1, k=k) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) diff --git a/tests/test_stimp.py b/tests/test_stimp.py index 089c1f1f9..a56bec695 100644 --- a/tests/test_stimp.py +++ b/tests/test_stimp.py @@ -50,12 +50,9 @@ def test_stimp_1_percent(T): zone = int(np.ceil(m / 4)) s = zone tmp_P, tmp_I = naive.prescrump(T, m, T, s=s, exclusion_zone=zone) - ref_mp = naive.scrump(T, m, T, percentage, zone, True, s) - for i in range(ref_mp.shape[0]): - if tmp_P[i] < ref_mp[i, 0]: - ref_mp[i, 0] = tmp_P[i] - ref_mp[i, 1] = tmp_I[i] - ref_PAN[pan._bfs_indices[idx], : ref_mp.shape[0]] = ref_mp[:, 0] + ref_P, ref_I, _, _ = naive.scrump(T, m, T, percentage, zone, True, s) + naive.merge_topk_PI(ref_P, tmp_P, ref_I, tmp_I) + ref_PAN[pan._bfs_indices[idx], : ref_P.shape[0]] = ref_P # Compare raw pan cmp_PAN = pan._PAN @@ -108,12 +105,9 @@ def test_stimp_max_m(T): zone = int(np.ceil(m / 4)) s = zone tmp_P, tmp_I = naive.prescrump(T, m, T, s=s, exclusion_zone=zone) - ref_mp = naive.scrump(T, m, T, percentage, zone, True, s) - for i in range(ref_mp.shape[0]): - if tmp_P[i] < ref_mp[i, 0]: - ref_mp[i, 0] = tmp_P[i] - ref_mp[i, 1] = tmp_I[i] - ref_PAN[pan._bfs_indices[idx], : ref_mp.shape[0]] = ref_mp[:, 0] + ref_P, ref_I, _, _ = naive.scrump(T, m, T, percentage, zone, True, s) + naive.merge_topk_PI(ref_P, tmp_P, ref_I, tmp_I) + ref_PAN[pan._bfs_indices[idx], : ref_P.shape[0]] = ref_P # Compare raw pan cmp_PAN = pan._PAN diff --git a/tests/test_stump.py b/tests/test_stump.py index 6feaf1598..e08746758 100644 --- a/tests/test_stump.py +++ b/tests/test_stump.py @@ -240,3 +240,34 @@ def test_stump_nan_zero_mean_self_join(): naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_stump_self_join_KNN(T_A, T_B): + m = 3 + zone = int(np.ceil(m / 4)) + for k in range(2, 4): + ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k) + comp_mp = stump(T_B, m, ignore_trivial=True, k=k) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) + + comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True, k=k) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_stump_A_B_join_KNN(T_A, T_B): + m = 3 + for k in range(2, 4): + ref_mp = naive.stump(T_A, m, T_B=T_B, k=k) + comp_mp = stump(T_A, m, T_B, ignore_trivial=False, k=k) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) + + comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False, k=k) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) diff --git a/tests/test_stumped.py b/tests/test_stumped.py index ca53829fc..9181d81c8 100644 --- a/tests/test_stumped.py +++ b/tests/test_stumped.py @@ -608,3 +608,36 @@ def test_stumped_two_subsequences_nan_inf_A_B_join_swap( naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) + + +@pytest.mark.filterwarnings("ignore:numpy.dtype size changed") +@pytest.mark.filterwarnings("ignore:numpy.ufunc size changed") +@pytest.mark.filterwarnings("ignore:numpy.ndarray size changed") +@pytest.mark.filterwarnings("ignore:\\s+Port 8787 is already in use:UserWarning") +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_stumped_self_join_KNN(T_A, T_B, dask_cluster): + with Client(dask_cluster) as dask_client: + m = 3 + zone = int(np.ceil(m / 4)) + for k in range(2, 4): + ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k) + comp_mp = stumped(dask_client, T_B, m, ignore_trivial=True, k=k) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) + + +@pytest.mark.filterwarnings("ignore:numpy.dtype size changed") +@pytest.mark.filterwarnings("ignore:numpy.ufunc size changed") +@pytest.mark.filterwarnings("ignore:numpy.ndarray size changed") +@pytest.mark.filterwarnings("ignore:\\s+Port 8787 is already in use:UserWarning") +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_stumped_A_B_join_KNN(T_A, T_B, dask_cluster): + with Client(dask_cluster) as dask_client: + m = 3 + for k in range(2, 4): + ref_mp = naive.stump(T_A, m, T_B=T_B, k=k) + comp_mp = stumped(dask_client, T_A, m, T_B, ignore_trivial=False, k=k) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) diff --git a/tests/test_stumpi.py b/tests/test_stumpi.py index 0fa2c3066..5ab2023a7 100644 --- a/tests/test_stumpi.py +++ b/tests/test_stumpi.py @@ -35,9 +35,8 @@ def test_stumpi_self_join(): ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True) ref_P = ref_mp[:, 0] ref_I = ref_mp[:, 1] - ref_left_P = np.empty(ref_P.shape) - ref_left_P[:] = np.inf - ref_left_I = ref_mp[:, 2] + ref_left_I = ref_mp[:, 2].astype(np.int64) + ref_left_P = np.full_like(ref_left_I, np.inf, dtype=np.float64) for i, j in enumerate(ref_left_I): if j >= 0: D = core.mass(stream.T_[i : i + m], stream.T_[j : j + m]) @@ -861,8 +860,8 @@ def test_stumpi_profile_index_match(): T_stream = T_full[:warm_start].copy() stream = stumpi(T_stream, m, egress=True) - P = np.full(stream.P_.shape, np.inf) - left_P = np.full(stream.left_P_.shape, np.inf) + P = np.full_like(stream.P_, np.inf, dtype=np.float64) + left_P = np.full_like(stream.left_P_, np.inf, dtype=np.float64) n = 0 for i in range(len(T_stream), len(T_full)): @@ -870,18 +869,18 @@ def test_stumpi_profile_index_match(): stream.update(t) P[:] = np.inf - idx = np.argwhere(stream.I_ >= 0).flatten() - P[idx] = naive.distance( - naive.z_norm(T_full_subseq[idx + n + 1], axis=1), - naive.z_norm(T_full_subseq[stream.I_[idx]], axis=1), + indices = np.argwhere(stream.I_ >= 0).flatten() + P[indices] = naive.distance( + naive.z_norm(T_full_subseq[indices + n + 1], axis=1), + naive.z_norm(T_full_subseq[stream.I_[indices]], axis=1), axis=1, ) left_P[:] = np.inf - idx = np.argwhere(stream.left_I_ >= 0).flatten() - left_P[idx] = naive.distance( - naive.z_norm(T_full_subseq[idx + n + 1], axis=1), - naive.z_norm(T_full_subseq[stream.left_I_[idx]], axis=1), + indices = np.argwhere(stream.left_I_ >= 0).flatten() + left_P[indices] = naive.distance( + naive.z_norm(T_full_subseq[indices + n + 1], axis=1), + naive.z_norm(T_full_subseq[stream.left_I_[indices]], axis=1), axis=1, ) @@ -889,3 +888,175 @@ def test_stumpi_profile_index_match(): npt.assert_almost_equal(stream.left_P_, left_P) n += 1 + + +def test_stumpi_self_join_KNN(): + m = 3 + zone = int(np.ceil(m / 4)) + + for k in range(2, 4): + seed = np.random.randint(100000) + np.random.seed(seed) + + T = np.random.rand(30) + stream = stumpi(T, m, egress=False, k=k) + for i in range(34): + t = np.random.rand() + stream.update(t) + + comp_P = stream.P_ + comp_I = stream.I_ + comp_left_P = stream.left_P_ + comp_left_I = stream.left_I_ + + ref_mp = naive.stump(stream.T_, m, exclusion_zone=zone, row_wise=True, k=k) + ref_P = ref_mp[:, :k] + ref_I = ref_mp[:, k : 2 * k] + ref_left_I = ref_mp[:, 2 * k].astype(np.int64) + ref_left_P = np.full_like(ref_left_I, np.inf, dtype=np.float64) + for i, j in enumerate(ref_left_I): + if j >= 0: + D = core.mass(stream.T_[i : i + m], stream.T_[j : j + m]) + ref_left_P[i] = D[0] + + naive.replace_inf(ref_P) + naive.replace_inf(ref_left_P) + naive.replace_inf(comp_P) + naive.replace_inf(comp_left_P) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + npt.assert_almost_equal(ref_left_P, comp_left_P) + npt.assert_almost_equal(ref_left_I, comp_left_I) + + np.random.seed(seed) + T = np.random.rand(30) + T = pd.Series(T) + stream = stumpi(T, m, egress=False, k=k) + for i in range(34): + t = np.random.rand() + stream.update(t) + + comp_P = stream.P_ + comp_I = stream.I_ + comp_left_P = stream.left_P_ + comp_left_I = stream.left_I_ + + naive.replace_inf(comp_P) + naive.replace_inf(comp_left_P) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + npt.assert_almost_equal(ref_left_P, comp_left_P) + npt.assert_almost_equal(ref_left_I, comp_left_I) + + +def test_stumpi_self_join_egress_KNN(): + m = 3 + + for k in range(2, 4): + seed = np.random.randint(100000) + np.random.seed(seed) + n = 30 + T = np.random.rand(n) + + ref_mp = naive.stumpi_egress(T, m, k=k) + ref_P = ref_mp.P_.copy() + ref_I = ref_mp.I_ + ref_left_P = ref_mp.left_P_.copy() + ref_left_I = ref_mp.left_I_ + + stream = stumpi(T, m, egress=True, k=k) + + comp_P = stream.P_.copy() + comp_I = stream.I_ + comp_left_P = stream.left_P_.copy() + comp_left_I = stream.left_I_ + + naive.replace_inf(ref_P) + naive.replace_inf(ref_left_P) + naive.replace_inf(comp_P) + naive.replace_inf(comp_left_P) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + npt.assert_almost_equal(ref_left_P, comp_left_P) + npt.assert_almost_equal(ref_left_I, comp_left_I) + + for i in range(34): + t = np.random.rand() + ref_mp.update(t) + stream.update(t) + + comp_P = stream.P_.copy() + comp_I = stream.I_ + comp_left_P = stream.left_P_.copy() + comp_left_I = stream.left_I_ + + ref_P = ref_mp.P_.copy() + ref_I = ref_mp.I_ + ref_left_P = ref_mp.left_P_.copy() + ref_left_I = ref_mp.left_I_ + + naive.replace_inf(ref_P) + naive.replace_inf(ref_left_P) + naive.replace_inf(comp_P) + naive.replace_inf(comp_left_P) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + npt.assert_almost_equal(ref_left_P, comp_left_P) + npt.assert_almost_equal(ref_left_I, comp_left_I) + + np.random.seed(seed) + T = np.random.rand(n) + T = pd.Series(T) + + ref_mp = naive.stumpi_egress(T, m, k=k) + ref_P = ref_mp.P_.copy() + ref_I = ref_mp.I_ + ref_left_P = ref_mp.left_P_.copy() + ref_left_I = ref_mp.left_I_ + + stream = stumpi(T, m, egress=True, k=k) + + comp_P = stream.P_.copy() + comp_I = stream.I_ + comp_left_P = stream.left_P_.copy() + comp_left_I = stream.left_I_ + + naive.replace_inf(ref_P) + naive.replace_inf(ref_left_P) + naive.replace_inf(comp_P) + naive.replace_inf(comp_left_P) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + npt.assert_almost_equal(ref_left_P, comp_left_P) + npt.assert_almost_equal(ref_left_I, comp_left_I) + + for i in range(34): + t = np.random.rand() + t = np.random.rand() + ref_mp.update(t) + stream.update(t) + + comp_P = stream.P_.copy() + comp_I = stream.I_ + comp_left_P = stream.left_P_.copy() + comp_left_I = stream.left_I_ + + ref_P = ref_mp.P_.copy() + ref_I = ref_mp.I_ + ref_left_P = ref_mp.left_P_.copy() + ref_left_I = ref_mp.left_I_ + + naive.replace_inf(ref_P) + naive.replace_inf(ref_left_P) + naive.replace_inf(comp_P) + naive.replace_inf(comp_left_P) + + npt.assert_almost_equal(ref_P, comp_P) + npt.assert_almost_equal(ref_I, comp_I) + npt.assert_almost_equal(ref_left_P, comp_left_P) + npt.assert_almost_equal(ref_left_I, comp_left_I)