Skip to content

Commit

Permalink
Fixed #890 Corrected the number of contiguous windows in snippet (#894)
Browse files Browse the repository at this point in the history
* fix n windows in naive snippet

* fix n windows in performant snippet and minor bugs

* improve reproducability and update notebook

* set k=10 and add some explanation

* minor changes

* Avoid numpy divide-by-zero warning to avoid unnecessary context
  • Loading branch information
NimaSarajpoor authored Aug 12, 2023
1 parent d4c6a21 commit 6663f5f
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 82 deletions.
134 changes: 66 additions & 68 deletions docs/Tutorial_Time_Series_Snippets.ipynb

Large diffs are not rendered by default.

11 changes: 4 additions & 7 deletions stumpy/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def _get_all_profiles(

right_pad = 0
T_subseq_isconstant = core.process_isconstant(T, s, mpdist_T_subseq_isconstant)
n_contiguous_windows = int(T.shape[0] // m)
if T.shape[0] % m != 0:
right_pad = int(m * np.ceil(T.shape[0] / m) - T.shape[0])
pad_width = (0, right_pad)
Expand All @@ -111,12 +112,12 @@ def _get_all_profiles(
)

n_padded = T.shape[0]
D = np.empty(((n_padded // m) - 1, n_padded - m + 1), dtype=np.float64)
D = np.empty((n_contiguous_windows, n_padded - m + 1), dtype=np.float64)

M_T, Σ_T = core.compute_mean_std(T, s)

# Iterate over non-overlapping subsequences, see Definition 3
for i in range((n_padded // m) - 1):
for i in range(n_contiguous_windows):
start = i * m
stop = (i + 1) * m
S_i = T[start:stop]
Expand Down Expand Up @@ -290,17 +291,13 @@ def snippets(
mpdist_T_subseq_isconstant=mpdist_T_subseq_isconstant,
)

pad_width = (0, int(m * np.ceil(T.shape[0] / m) - T.shape[0]))
T_padded = np.pad(T, pad_width, mode="constant", constant_values=np.nan)
n_padded = T_padded.shape[0]

snippets = np.empty((k, m), dtype=np.float64)
snippets_indices = np.empty(k, dtype=np.int64)
snippets_profiles = np.empty((k, D.shape[-1]), dtype=np.float64)
snippets_fractions = np.empty(k, dtype=np.float64)
snippets_areas = np.empty(k, dtype=np.float64)
Q = np.full(D.shape[-1], np.inf, dtype=np.float64)
indices = np.arange(0, n_padded - m, m, dtype=np.int64)
indices = np.arange(D.shape[0], dtype=np.int64) * m
snippets_regimes_list = []

for i in range(k):
Expand Down
11 changes: 4 additions & 7 deletions tests/naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1548,6 +1548,7 @@ def get_all_mpdist_profiles(

T_subseq_isconstant = rolling_isconstant(T, s, mpdist_T_subseq_isconstant)
right_pad = 0
n_contiguous_windows = int(T.shape[0] // m)
if T.shape[0] % m != 0:
right_pad = int(m * np.ceil(T.shape[0] / m) - T.shape[0])
pad_width = (0, right_pad)
Expand All @@ -1557,10 +1558,10 @@ def get_all_mpdist_profiles(
)

n_padded = T.shape[0]
D = np.empty(((n_padded // m) - 1, n_padded - m + 1))
D = np.empty((n_contiguous_windows, n_padded - m + 1))

# Iterate over non-overlapping subsequences, see Definition 3
for i in range((n_padded // m) - 1):
for i in range(n_contiguous_windows):
start = i * m
stop = (i + 1) * m
S_i = T[start:stop]
Expand Down Expand Up @@ -1601,17 +1602,13 @@ def mpdist_snippets(
mpdist_T_subseq_isconstant=mpdist_T_subseq_isconstant,
)

pad_width = (0, int(m * np.ceil(T.shape[0] / m) - T.shape[0]))
T_padded = np.pad(T, pad_width, mode="constant", constant_values=np.nan)
n_padded = T_padded.shape[0]

snippets = np.empty((k, m))
snippets_indices = np.empty(k, dtype=np.int64)
snippets_profiles = np.empty((k, D.shape[-1]))
snippets_fractions = np.empty(k)
snippets_areas = np.empty(k)
Q = np.inf
indices = np.arange(0, n_padded - m, m)
indices = np.arange(0, D.shape[0] * m, m)
snippets_regimes_list = []

for snippet_idx in range(k):
Expand Down

0 comments on commit 6663f5f

Please sign in to comment.