From 1b0e1d30756674bfc06bb2b7e5095ec369635752 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 18:15:09 +0900 Subject: [PATCH 1/5] first commit From 683d3cd3b9be5b1da6aa21a1786fc75ff9afe533 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 18:40:21 +0900 Subject: [PATCH 2/5] =?UTF-8?q?X=E3=81=AB=E5=AF=BE=E3=81=99=E3=82=8B?= =?UTF-8?q?=E6=BD=9C=E5=9C=A8=E5=A4=89=E6=95=B0=E3=82=92=E6=B1=82=E3=82=81?= =?UTF-8?q?=E3=82=8Btransform=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89=E3=82=92?= =?UTF-8?q?=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/som.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libs/models/som.py b/libs/models/som.py index b7ebcf8..4ef5685 100644 --- a/libs/models/som.py +++ b/libs/models/som.py @@ -110,3 +110,11 @@ def fit(self, nb_epoch=100, verbose=True): self.history['z'][epoch] = self.Z self.history['y'][epoch] = self.Y self.history['sigma'][epoch] = sigma + + def transform(self,X): + if self.metric == "sqeuclidean": + distance = dist.cdist(X,self.Y,self.metric) + return self.Zeta[distance.argmin(axis=1)] + elif self.metric == "KLdivergence": + divergence = -np.sum(self.X[:,np.newaxis,:] * np.log(self.Y[np.newaxis,:,:]),axis=2) # NxK + return self.Zeta[divergence.argmin(axis=1)] \ No newline at end of file From dfa153ced5cc35cec0329a37c9fbca9dd1d8ab81 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 18:41:20 +0900 Subject: [PATCH 3/5] Reformat --- libs/models/som.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/libs/models/som.py b/libs/models/som.py index 4ef5685..9bbd211 100644 --- a/libs/models/som.py +++ b/libs/models/som.py @@ -6,7 +6,7 @@ class SOM: - def __init__(self, X, latent_dim, resolution, sigma_max, sigma_min, tau, init='random',metric="sqeuclidean"): + def __init__(self, X, latent_dim, resolution, sigma_max, sigma_min, tau, init='random', metric="sqeuclidean"): self.X = X self.N = self.X.shape[0] @@ -28,22 +28,23 @@ def __init__(self, X, latent_dim, resolution, sigma_max, sigma_min, tau, init='r elif latent_dim == 2: if isinstance(init, str) and init == 'PCA': comp1, comp2 = pca.singular_values_[0], pca.singular_values_[1] - zeta = np.meshgrid(np.linspace(-1, 1, resolution), np.linspace(-comp2/comp1, comp2/comp1, resolution)) + zeta = np.meshgrid(np.linspace(-1, 1, resolution), + np.linspace(-comp2 / comp1, comp2 / comp1, resolution)) else: zeta = np.meshgrid(np.linspace(-1, 1, resolution), np.linspace(-1, 1, resolution)) - self.Zeta = np.dstack(zeta).reshape(resolution**2, latent_dim) + self.Zeta = np.dstack(zeta).reshape(resolution ** 2, latent_dim) else: raise ValueError("invalid latent dimension: {}".format(latent_dim)) - self.K = resolution**self.L + self.K = resolution ** self.L if isinstance(init, str) and init == 'random': self.Z = np.random.rand(self.N, latent_dim) * 2.0 - 1.0 elif isinstance(init, str) and init == 'random_bmu': init_bmus = np.random.randint(0, self.Zeta.shape[0] - 1, self.N) - self.Z = self.Zeta[init_bmus,:] + self.Z = self.Zeta[init_bmus, :] elif isinstance(init, str) and init == 'PCA': - self.Z = pca.transform(X)/comp1 + self.Z = pca.transform(X) / comp1 elif isinstance(init, np.ndarray) and init.dtype == int: init_bmus = init.copy() self.Z = self.Zeta[init_bmus, :] @@ -52,9 +53,9 @@ def __init__(self, X, latent_dim, resolution, sigma_max, sigma_min, tau, init='r else: raise ValueError("invalid init: {}".format(init)) - #metricに関する処理 + # metricに関する処理 if metric == "sqeuclidean": - self.metric="sqeuclidean" + self.metric = "sqeuclidean" elif metric == "KLdivergence": self.metric = "KLdivergence" @@ -78,18 +79,18 @@ def fit(self, nb_epoch=100, verbose=True): # 協調過程 # 学習量を計算 # sigma = self.sigma_min + (self.sigma_max - self.sigma_min) * np.exp(-epoch / self.tau) # 近傍半径を設定 - sigma = max(self.sigma_min, self.sigma_max * ( 1 - (epoch / self.tau) ) )# 近傍半径を設定 + sigma = max(self.sigma_min, self.sigma_max * (1 - (epoch / self.tau))) # 近傍半径を設定 Dist = dist.cdist(self.Zeta, self.Z, 'sqeuclidean') # KxNの距離行列を計算 # ノードと勝者ノードの全ての組み合わせにおける距離を網羅した行列 - H = np.exp(-Dist / (2 * sigma * sigma)) # KxNの学習量行列を計算 + H = np.exp(-Dist / (2 * sigma * sigma)) # KxNの学習量行列を計算 # 適合過程 # 参照ベクトルの更新 - G = np.sum(H, axis=1)[:, np.newaxis] # 各ノードが受ける学習量の総和を保持するKx1の列ベクトルを計算 - Ginv = np.reciprocal(G) # Gのそれぞれの要素の逆数を取る - R = H * Ginv # 学習量の総和が1になるように規格化 - self.Y = R @ self.X # 学習量を重みとして観測データの平均を取り参照ベクトルとする + G = np.sum(H, axis=1)[:, np.newaxis] # 各ノードが受ける学習量の総和を保持するKx1の列ベクトルを計算 + Ginv = np.reciprocal(G) # Gのそれぞれの要素の逆数を取る + R = H * Ginv # 学習量の総和が1になるように規格化 + self.Y = R @ self.X # 学習量を重みとして観測データの平均を取り参照ベクトルとする # 競合過程 if self.metric is "sqeuclidean": # ユークリッド距離を使った勝者決定 @@ -111,10 +112,10 @@ def fit(self, nb_epoch=100, verbose=True): self.history['y'][epoch] = self.Y self.history['sigma'][epoch] = sigma - def transform(self,X): + def transform(self, X): if self.metric == "sqeuclidean": - distance = dist.cdist(X,self.Y,self.metric) + distance = dist.cdist(X, self.Y, self.metric) return self.Zeta[distance.argmin(axis=1)] elif self.metric == "KLdivergence": - divergence = -np.sum(self.X[:,np.newaxis,:] * np.log(self.Y[np.newaxis,:,:]),axis=2) # NxK - return self.Zeta[divergence.argmin(axis=1)] \ No newline at end of file + divergence = -np.sum(self.X[:, np.newaxis, :] * np.log(self.Y[np.newaxis, :, :]), axis=2) # NxK + return self.Zeta[divergence.argmin(axis=1)] From 52c6e8396648c5de6266a6447ee779474700b86c Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 18:43:24 +0900 Subject: [PATCH 4/5] =?UTF-8?q?fit=E5=86=85=E3=81=A7=E6=BD=9C=E5=9C=A8?= =?UTF-8?q?=E5=A4=89=E6=95=B0=E6=8E=A8=E5=AE=9A=E3=81=AE=E3=83=A1=E3=83=88?= =?UTF-8?q?=E3=83=AA=E3=83=83=E3=82=AF=E6=8C=87=E5=AE=9A=E3=81=AE=E6=9D=A1?= =?UTF-8?q?=E4=BB=B6=E6=96=87=E3=81=AEis=E3=82=92=3D=3D=E3=81=AB=E4=BF=AE?= =?UTF-8?q?=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/som.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/models/som.py b/libs/models/som.py index 9bbd211..68a8ed5 100644 --- a/libs/models/som.py +++ b/libs/models/som.py @@ -93,14 +93,14 @@ def fit(self, nb_epoch=100, verbose=True): self.Y = R @ self.X # 学習量を重みとして観測データの平均を取り参照ベクトルとする # 競合過程 - if self.metric is "sqeuclidean": # ユークリッド距離を使った勝者決定 + if self.metric == "sqeuclidean": # ユークリッド距離を使った勝者決定 # 勝者ノードの計算 Dist = dist.cdist(self.X, self.Y) # NxKの距離行列を計算 bmus = Dist.argmin(axis=1) # Nx1の勝者ノード番号をまとめた列ベクトルを計算 # argmin(axis=1)を用いて各行で最小値を探しそのインデックスを返す self.Z = self.Zeta[bmus, :] # 勝者ノード番号から勝者ノードを求める - elif self.metric is "KLdivergence": # KL情報量を使った勝者決定 + elif self.metric == "KLdivergence": # KL情報量を使った勝者決定 Dist = np.sum(self.X[:, np.newaxis, :] * np.log(self.Y)[np.newaxis, :, :], axis=2) # N*K行列 # 勝者番号の決定 bmus = np.argmax(Dist, axis=1) From a316e87feefb91be05d51a378556250f7a0dec70 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 19:37:28 +0900 Subject: [PATCH 5/5] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=83=A1?= =?UTF-8?q?=E3=82=BD=E3=83=83=E3=83=89=E3=82=92=E4=BD=9C=E6=88=90=E3=81=97?= =?UTF-8?q?fit=E5=86=85=E3=81=AE=E6=BD=9C=E5=9C=A8=E5=A4=89=E6=95=B0?= =?UTF-8?q?=E6=8E=A8=E5=AE=9A=E3=81=A8transform=E5=86=85=E3=81=AE=E3=81=9D?= =?UTF-8?q?=E3=82=8C=E3=81=AE=E7=B5=90=E6=9E=9C=E3=81=8C=E4=B8=80=E8=87=B4?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=81=93=E3=81=A8=E3=82=92=E7=A2=BA=E8=AA=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/som/test_som.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/som/test_som.py b/tests/som/test_som.py index 6c08da7..180bbdb 100644 --- a/tests/som/test_som.py +++ b/tests/som/test_som.py @@ -96,6 +96,37 @@ def test_init_pca(self): np.testing.assert_allclose(SOMResult, EVDResult/np.sqrt(Lambda.real.max()), rtol=1e-06) + def test_transform(self): + n_distributon = 100 + n_category = 20 + + # create categorical distribution + X_categorical = np.random.rand(n_distributon,n_category) + X_categorical = X_categorical / X_categorical.sum(axis=1)[:,None] + + np.testing.assert_allclose(X_categorical.sum(axis=1),np.ones(X_categorical.shape[0])) + + # fit + som_categorical = SOM(X_categorical,latent_dim=2,resolution=50,sigma_max=2.0,sigma_min=0.3,tau=50,metric="KLdivergence") + som_categorical.fit(50) + Z_fit = som_categorical.Z + Z_transformed = som_categorical.transform(X_categorical) + + np.testing.assert_allclose(Z_transformed,Z_fit) + + # confirm to multi variable dataset + n_samples = 100 + n_features = 20 + + X_multi_variate = np.random.normal(0.0,1.0,(n_samples,n_features)) + + # fit + som_multi_variate = SOM(X_multi_variate,latent_dim=2,resolution=50,sigma_max=2.0,sigma_min=0.2,tau=50,metric="sqeuclidean") + som_multi_variate.fit(10) + Z_fit = som_multi_variate.Z + Z_transformed = som_multi_variate.transform(X_multi_variate) + + np.testing.assert_allclose(Z_fit,Z_transformed)