[MRG] Fix cdist_soft_dtw_normalized when two different datasets are p…

…rovided (#476)
tslearn-team · Aug 21, 2023 · 338cf2b · 338cf2b
1 parent b6688a8
commit 338cf2b
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 7 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,8 +10,15 @@ Changelogs for this project are recorded in this file since v0.2.0.
 
 
 
+## [Towards v0.7]
+
 ## [Towards v0.6.2]
 
+### Fixed
+
+* Fixed an incorrect calculation of the normalization term for `cdist_soft_dtw_normalized` when `dataset2` is provided.
+* Fixed UCR/UEA datasets download link
+
 ## [v0.6.1]
 
 ### Fixed

diff --git a/tslearn/datasets/ucr_uea.py b/tslearn/datasets/ucr_uea.py
@@ -50,14 +50,13 @@ def __init__(self, use_cache=True):
         os.makedirs(self._data_dir, exist_ok=True)
 
         try:
-            url_multivariate = ("https://www.timeseriesclassification.com/"
-                                "ClassificationDownloads/Archives/summaryMultivariate.csv")
+            url_multivariate = ("http://www.timeseriesclassification.com/aeon-toolkit/Archives/"
+                                "summaryMultivariate.csv")
             self._list_multivariate_filename = os.path.join(
                 self._data_dir, os.path.basename(url_multivariate)
             )
             urlretrieve(url_multivariate, self._list_multivariate_filename)
-            url_baseline = ("https://www.timeseriesclassification.com/"
-                            "singleTrainTest.csv")
+            url_baseline = ("http://www.timeseriesclassification.com/singleTrainTest.csv")
             self._baseline_scores_filename = os.path.join(
                 self._data_dir, os.path.basename(url_baseline))
             urlretrieve(url_baseline, self._baseline_scores_filename)
@@ -267,7 +266,7 @@ def load_dataset(self, dataset_name):
                 shutil.rmtree(full_path, ignore_errors=True)
             # else, actually raise the error!
 
-            url = ("https://www.timeseriesclassification.com/ClassificationDownloads/%s.zip"
+            url = ("https://www.timeseriesclassification.com/aeon-toolkit/%s.zip"
                    % dataset_name)
             success = extract_from_zip_url(url, target_dir=full_path)
             if not success:

diff --git a/tslearn/metrics/softdtw_variants.py b/tslearn/metrics/softdtw_variants.py
@@ -658,6 +658,9 @@ def cdist_soft_dtw_normalized(dataset1, dataset2=None, gamma=1.0, be=None):
     >>> time_series = np.random.randn(10, 15, 1)
     >>> np.alltrue(cdist_soft_dtw_normalized(time_series) >= 0.)
     True
+    >>> time_series2 = np.random.randn(4, 15, 1)
+    >>> np.alltrue(cdist_soft_dtw_normalized(time_series, time_series2) >= 0.)
+    True
 
     See Also
     --------
@@ -672,8 +675,22 @@ def cdist_soft_dtw_normalized(dataset1, dataset2=None, gamma=1.0, be=None):
     """
     be = instantiate_backend(be, dataset1)
     dists = cdist_soft_dtw(dataset1, dataset2=dataset2, gamma=gamma, be=be)
-    d_ii = be.diag(dists)
-    dists -= 0.5 * (be.reshape(d_ii, (-1, 1)) + be.reshape(d_ii, (1, -1)))
+    if dataset2 is None:
+        d_ii = be.diag(dists)
+        normalizer = -0.5 * (be.reshape(d_ii, (-1, 1)) + be.reshape(d_ii, (1, -1)))
+    else:
+        self_dists1 = be.empty((dataset1.shape[0], 1))
+        for i, ts1 in enumerate(dataset1):
+            ts1_short = ts1[:ts_size(ts1)]
+            self_dists1[i, 0] = soft_dtw(ts1_short, ts1_short, gamma=gamma, be=be)
+
+        self_dists2 = be.empty((1, dataset2.shape[0]))
+        for j, ts2 in enumerate(dataset2):
+            ts2_short = ts2[:ts_size(ts2)]
+            self_dists2[0, j] = soft_dtw(ts2_short, ts2_short, gamma=gamma, be=be)
+
+        normalizer = -0.5 * (self_dists1 + self_dists2)
+    dists += normalizer
     return dists