Skip to content

Commit

Permalink
[ENH] Speed up the regression distances tests (#2052)
Browse files Browse the repository at this point in the history
* remove unnecessary ExponentTransform import

* speed up regression distance tests

* speed up regression distance tests

* remove loaders
  • Loading branch information
TonyBagnall authored Sep 14, 2024
1 parent 36accda commit 043a3e8
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 78 deletions.
101 changes: 49 additions & 52 deletions aeon/distances/tests/test_sklearn_compatibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,6 @@
from sklearn.svm import SVR

from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier
from aeon.datasets import (
load_basic_motions,
load_cardano_sentiment,
load_covid_3month,
load_unit_test,
)
from aeon.distances._distance import DISTANCES
from aeon.regression.distance_based import KNeighborsTimeSeriesRegressor
from aeon.testing.data_generation import make_example_3d_numpy
Expand All @@ -41,10 +35,10 @@ def test_function_transformer(dist):
def test_distance_based(dist):
"""Test all distances work with KNN in a pipeline."""
X, y = make_example_3d_numpy(
n_cases=5, n_channels=1, n_timepoints=10, regression_target=True
n_cases=6, n_channels=1, n_timepoints=10, regression_target=True
)
X2, y2 = make_example_3d_numpy(
n_cases=10, n_channels=1, n_timepoints=10, regression_target=True
n_cases=6, n_channels=1, n_timepoints=10, regression_target=True
)
pairwise = dist["pairwise_distance"]
Xt = pairwise(X)
Expand Down Expand Up @@ -73,8 +67,8 @@ def test_clusterer(dist):
@pytest.mark.parametrize(
"task",
[
["cls", load_unit_test, KNeighborsClassifier, KNeighborsTimeSeriesClassifier],
["reg", load_covid_3month, KNeighborsRegressor, KNeighborsTimeSeriesRegressor],
["cls", KNeighborsClassifier, KNeighborsTimeSeriesClassifier],
["reg", KNeighborsRegressor, KNeighborsTimeSeriesRegressor],
],
)
def test_univariate(dist, k, task):
Expand All @@ -85,36 +79,41 @@ def test_univariate(dist, k, task):
return

# Test univariate with 2D format (compatible with sklearn)
problem_type, problem_loader, knn_sk_func, knn_aeon_func = task
problem_type, knn_sk_func, knn_aeon_func = task

# Load the unit test dataset as a 2D numpy array
X_train, y_train = problem_loader(split="train", return_type="numpy2D")
X_test, y_test = problem_loader(split="test", return_type="numpy2D")
indices = np.random.RandomState(0).choice(
min(len(y_test), len(y_train)), 6, replace=False
# Create a collection as a 2D numpy array
if problem_type == "cls":
reg = False
else:
reg = True
X_train, y_train = make_example_3d_numpy(
random_state=0, n_cases=6, regression_target=reg
)
X_test, y_test = make_example_3d_numpy(
random_state=2, n_cases=6, regression_target=reg
)
X_train = np.squeeze(X_train)
X_test = np.squeeze(X_test)
# Compute the pairwise distance matrix for working with sklearn knn with
# precomputed distances.
X_train_precomp_distance = dist["pairwise_distance"](X_train[indices])
X_test_precomp_distance = dist["pairwise_distance"](
X_test[indices], X_train[indices]
)
X_train_precomp_distance = dist["pairwise_distance"](X_train)
X_test_precomp_distance = dist["pairwise_distance"](X_test, X_train)

knn_aeon = knn_aeon_func(distance=dist["name"], n_neighbors=k) # aeon
knn_sk = knn_sk_func(metric=dist["distance"], n_neighbors=k) # sklearn
knn_sk_precomp = knn_sk_func(metric="precomputed", n_neighbors=k) # sklearn pre

knn_aeon.fit(X_train[indices], y_train[indices])
knn_sk.fit(X_train[indices], y_train[indices])
knn_sk_precomp.fit(X_train_precomp_distance, y_train[indices])
knn_aeon.fit(X_train, y_train)
knn_sk.fit(X_train, y_train)
knn_sk_precomp.fit(X_train_precomp_distance, y_train)

if problem_type == "cls":
knn_aeon_output = knn_aeon.predict_proba(X_test[indices])
knn_sk_output = knn_sk.predict_proba(X_test[indices])
knn_aeon_output = knn_aeon.predict_proba(X_test)
knn_sk_output = knn_sk.predict_proba(X_test)
knn_sk_precomp_output = knn_sk_precomp.predict_proba(X_test_precomp_distance)
elif problem_type == "reg":
knn_aeon_output = knn_aeon.predict(X_test[indices])
knn_sk_output = knn_sk.predict(X_test[indices])
knn_aeon_output = knn_aeon.predict(X_test)
knn_sk_output = knn_sk.predict(X_test)
knn_sk_precomp_output = knn_sk_precomp.predict(X_test_precomp_distance)

assert_allclose(knn_aeon_output, knn_sk_output)
Expand All @@ -128,13 +127,11 @@ def test_univariate(dist, k, task):
[
[
"cls",
load_basic_motions,
KNeighborsClassifier,
KNeighborsTimeSeriesClassifier,
],
[
"reg",
load_cardano_sentiment,
KNeighborsRegressor,
KNeighborsTimeSeriesRegressor,
],
Expand All @@ -150,34 +147,36 @@ def test_multivariate(dist, k, task):
# Test multivariate dataset in two ways: A) concatenating channels to be compatible
# with sklearn, and B) precomputing distances.

problem_type, problem_loader, knn_sk_func, knn_aeon_func = task

# Load the basic motions dataset as a 3D numpy array
X_train, y_train = problem_loader(split="train", return_type="numpy3D")
X_test, y_test = problem_loader(split="test", return_type="numpy3D")
problem_type, knn_sk_func, knn_aeon_func = task
if problem_type == "cls":
reg = False
else:
reg = True
X_train, y_train = make_example_3d_numpy(
random_state=0, n_cases=6, regression_target=reg
)
X_test, y_test = make_example_3d_numpy(
random_state=2, n_cases=6, regression_target=reg
)

# Transform to 2D format concatenating channels.
X_train_concat = X_train.reshape(X_train.shape[0], -1)
X_test_concat = X_test.reshape(X_test.shape[0], -1)

indices = np.random.RandomState(0).choice(
min(len(y_test), len(y_train)), 6, replace=False
)

# A) Test multivariate with 2D format (concatenates channels to be compatible with
# sklearn)
knn_aeon = knn_aeon_func(distance=dist["name"], n_neighbors=k) # aeon concat
knn_sk = knn_sk_func(metric=dist["distance"], n_neighbors=k) # sklearn concat

knn_aeon.fit(X_train_concat[indices], y_train[indices])
knn_sk.fit(X_train_concat[indices], y_train[indices])
knn_aeon.fit(X_train_concat, y_train)
knn_sk.fit(X_train_concat, y_train)

if problem_type == "cls":
knn_aeon_output = knn_aeon.predict_proba(X_test_concat[indices])
knn_sk_output = knn_sk.predict_proba(X_test_concat[indices])
knn_aeon_output = knn_aeon.predict_proba(X_test_concat)
knn_sk_output = knn_sk.predict_proba(X_test_concat)
elif problem_type == "reg":
knn_aeon_output = knn_aeon.predict(X_test_concat[indices])
knn_sk_output = knn_sk.predict(X_test_concat[indices])
knn_aeon_output = knn_aeon.predict(X_test_concat)
knn_sk_output = knn_sk.predict(X_test_concat)

assert_allclose(knn_aeon_output, knn_sk_output)

Expand All @@ -186,22 +185,20 @@ def test_multivariate(dist, k, task):
# distances)

# Compute the pairwise distance matrix
X_train_precomp_distance = dist["pairwise_distance"](X_train[indices])
X_test_precomp_distance = dist["pairwise_distance"](
X_test[indices], X_train[indices]
)
X_train_precomp_distance = dist["pairwise_distance"](X_train)
X_test_precomp_distance = dist["pairwise_distance"](X_test, X_train)

knn_aeon_3D = knn_aeon_func(distance=dist["name"], n_neighbors=k) # aeon
knn_sk_precomp = knn_sk_func(metric="precomputed", n_neighbors=k) # sklearn precomp

knn_aeon_3D.fit(X_train[indices], y_train[indices])
knn_sk_precomp.fit(X_train_precomp_distance, y_train[indices])
knn_aeon_3D.fit(X_train, y_train)
knn_sk_precomp.fit(X_train_precomp_distance, y_train)

if problem_type == "cls":
knn_aeon_3D_output = knn_aeon_3D.predict_proba(X_test[indices])
knn_aeon_3D_output = knn_aeon_3D.predict_proba(X_test)
knn_sk_precomp_output = knn_sk_precomp.predict_proba(X_test_precomp_distance)
elif problem_type == "reg":
knn_aeon_3D_output = knn_aeon_3D.predict(X_test[indices])
knn_aeon_3D_output = knn_aeon_3D.predict(X_test)
knn_sk_precomp_output = knn_sk_precomp.predict(X_test_precomp_distance)

assert_allclose(knn_aeon_3D_output, knn_sk_precomp_output)
44 changes: 18 additions & 26 deletions aeon/regression/distance_based/tests/test_time_series_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,40 @@
from numpy.testing import assert_almost_equal
from sklearn.metrics import mean_squared_error

from aeon.datasets import load_covid_3month
from aeon.distances import get_distance_function
from aeon.regression.distance_based import KNeighborsTimeSeriesRegressor
from aeon.testing.data_generation import make_example_3d_numpy

distance_functions = ["euclidean", "dtw", "wdtw", "msm", "erp", "adtw", "twe"]

# expected mse on test set using default parameters.
expected_mse = {
"euclidean": 0.002815386587822589,
"dtw": 0.002921957478363366,
"wdtw": 0.0025029139202436303,
"msm": 0.002427566155284863,
"erp": 0.002247674986547397,
"adtw": 0.00265555172857104,
"twe": 0.0028423024613138774,
"euclidean": 0.5958635513183005,
"dtw": 0.13862493928236033,
"wdtw": 0.13862493928236033,
"msm": 0.10700935790251886,
"erp": 0.2707789569252858,
"adtw": 0.1125922971718583,
"twe": 0.1668928688769102,
}

# expected mse on test set using window params.
expected_mse_window = {
"dtw": 0.0027199984296669712,
"wdtw": 0.0026043512829531305,
"msm": 0.002413148537646331,
"erp": 0.0021331320891357546,
"adtw": 0.0027602314681382163,
"twe": 0.0030244991099088346,
"dtw": 0.19829606291787538,
"wdtw": 0.19829606291787538,
"msm": 0.10700935790251886,
"erp": 0.24372655531245097,
"adtw": 0.12166501682071837,
"twe": 0.15932454084282624,
}

X_train, y_train = make_example_3d_numpy(regression_target=True, random_state=0)
X_test, y_test = make_example_3d_numpy(regression_target=True, random_state=2)


@pytest.mark.parametrize("distance_key", distance_functions)
def test_knn_neighbors(distance_key):
"""Tests kneighbors method."""
X_train, y_train = load_covid_3month(split="train")
X_test, y_test = load_covid_3month(split="test")

"""Tests kneighbours method."""
model = KNeighborsTimeSeriesRegressor(
n_neighbors=1, weights="distance", distance=distance_key
)
Expand All @@ -53,8 +53,6 @@ def test_knn_bounding_matrix(distance_key):
"""Test knn with custom bounding parameters, and using callables."""
if distance_key == "euclidean" or distance_key == "squared":
return
X_train, y_train = load_covid_3month(split="train")
X_test, y_test = load_covid_3month(split="test")
distance_callable = get_distance_function(distance_key)

knn = KNeighborsTimeSeriesRegressor(
Expand All @@ -65,9 +63,3 @@ def test_knn_bounding_matrix(distance_key):

mse = mean_squared_error(y_test, y_pred)
assert_almost_equal(mse, expected_mse_window[distance_key])


if __name__ == "__main__":
for distance_key in distance_functions:
test_knn_neighbors(distance_key)
test_knn_bounding_matrix(distance_key)

0 comments on commit 043a3e8

Please sign in to comment.