From cc6e323040198624a9b03d3ee1698a2147af40fc Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Tue, 12 Nov 2019 16:13:16 +0900 Subject: [PATCH 01/25] =?UTF-8?q?=E3=82=B0=E3=83=AB=E3=83=BC=E3=83=97?= =?UTF-8?q?=E3=81=AE=E6=A7=8B=E6=88=90=E3=83=A1=E3=83=B3=E3=83=90=E3=83=BC?= =?UTF-8?q?=E3=82=92bag=20of=20words=E7=9A=84=E3=81=AA=E8=A1=A8=E7=8F=BE?= =?UTF-8?q?=E3=81=A7=E4=B8=8E=E3=81=88=E3=82=89=E3=82=8C=E3=81=9F=E6=99=82?= =?UTF-8?q?=E3=81=AE=E5=87=A6=E7=90=86=E3=82=92=E4=BD=9C=E6=88=90=E3=81=99?= =?UTF-8?q?=E3=82=8B=E3=81=9F=E3=82=81=E3=81=AB=5Ffit=5FKDE=E5=86=85?= =?UTF-8?q?=E3=81=A7if=E6=96=87=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/tsom_plus_som.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/libs/models/tsom_plus_som.py b/libs/models/tsom_plus_som.py index 10e0609..0540805 100644 --- a/libs/models/tsom_plus_som.py +++ b/libs/models/tsom_plus_som.py @@ -25,14 +25,17 @@ def _fit_1st_TSOM(self, tsom_epoch_num): def _fit_KDE(self, kernel_width): # 学習した後の潜在空間からKDEで確率分布を作る prob_data = np.zeros((self.group_num, self.tsom.K1)) # group数*ノード数 # グループごとにKDEを適用 - for i in range(self.group_num): - Dist = dist.cdist(self.tsom.Zeta1, self.tsom.Z1[self.index_members_of_group[i], :], - 'sqeuclidean') # KxNの距離行列を計算 - H = np.exp(-Dist / (2 * kernel_width * kernel_width)) # KxNの学習量行列を計算 - prob = np.sum(H, axis=1) - prob_sum = np.sum(prob) - prob = prob / prob_sum - prob_data[i, :] = prob + if isinstance(self.index_members_of_group, np.ndarray) and self.index_members_of_group.ndim == 2: + pass + else: + for i in range(self.group_num): + Dist = dist.cdist(self.tsom.Zeta1, self.tsom.Z1[self.index_members_of_group[i], :], + 'sqeuclidean') # KxNの距離行列を計算 + H = np.exp(-Dist / (2 * kernel_width * kernel_width)) # KxNの学習量行列を計算 + prob = np.sum(H, axis=1) + prob_sum = np.sum(prob) + prob = prob / prob_sum + prob_data[i, :] = prob self.params_som['X'] = prob_data self.params_som['metric'] = "KLdivergence" From e311e08d635e17d5eb5e5419160109c632d33eca Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Tue, 12 Nov 2019 16:21:15 +0900 Subject: [PATCH 02/25] =?UTF-8?q?bag=20of=20words=E7=9A=84=E3=81=AA?= =?UTF-8?q?=E8=A1=A8=E7=8F=BE=E3=81=A7=E3=82=B0=E3=83=AB=E3=83=BC=E3=83=97?= =?UTF-8?q?=E3=81=AE=E6=A7=8B=E6=88=90=E3=83=A1=E3=83=B3=E3=83=90=E3=83=BC?= =?UTF-8?q?=E3=82=92=E8=A1=A8=E7=8F=BE=E3=81=97=E3=81=9F=E6=99=82=E3=81=AE?= =?UTF-8?q?=E5=87=A6=E7=90=86=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/tsom_plus_som.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libs/models/tsom_plus_som.py b/libs/models/tsom_plus_som.py index 0540805..549a8eb 100644 --- a/libs/models/tsom_plus_som.py +++ b/libs/models/tsom_plus_som.py @@ -26,7 +26,10 @@ def _fit_KDE(self, kernel_width): # 学習した後の潜在空間からKDEで prob_data = np.zeros((self.group_num, self.tsom.K1)) # group数*ノード数 # グループごとにKDEを適用 if isinstance(self.index_members_of_group, np.ndarray) and self.index_members_of_group.ndim == 2: - pass + distance = dist.cdist(self.tsom.Zeta1, self.tsom.Z1,'sqeuclidean') #K1 x num_members + H = np.exp(-0.5*distance/(kernel_width*kernel_width))#KxN + prob_data = self.index_members_of_group @ H.T #num_group x K1 + prob_data = prob_data / prob_data.sum(axis=1)[:,None] else: for i in range(self.group_num): Dist = dist.cdist(self.tsom.Zeta1, self.tsom.Z1[self.index_members_of_group[i], :], From e6863b36aede3ab016b00a8b8cf744b9887c371f Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Sat, 16 Nov 2019 17:58:41 +0900 Subject: [PATCH 03/25] Reformat --- libs/models/tsom_plus_som.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libs/models/tsom_plus_som.py b/libs/models/tsom_plus_som.py index 549a8eb..6eaad38 100644 --- a/libs/models/tsom_plus_som.py +++ b/libs/models/tsom_plus_som.py @@ -26,13 +26,14 @@ def _fit_KDE(self, kernel_width): # 学習した後の潜在空間からKDEで prob_data = np.zeros((self.group_num, self.tsom.K1)) # group数*ノード数 # グループごとにKDEを適用 if isinstance(self.index_members_of_group, np.ndarray) and self.index_members_of_group.ndim == 2: - distance = dist.cdist(self.tsom.Zeta1, self.tsom.Z1,'sqeuclidean') #K1 x num_members - H = np.exp(-0.5*distance/(kernel_width*kernel_width))#KxN - prob_data = self.index_members_of_group @ H.T #num_group x K1 - prob_data = prob_data / prob_data.sum(axis=1)[:,None] + distance = dist.cdist(self.tsom.Zeta1, self.tsom.Z1, 'sqeuclidean') # K1 x num_members + H = np.exp(-0.5 * distance / (kernel_width * kernel_width)) # KxN + prob_data = self.index_members_of_group @ H.T # num_group x K1 + prob_data = prob_data / prob_data.sum(axis=1)[:, None] else: for i in range(self.group_num): - Dist = dist.cdist(self.tsom.Zeta1, self.tsom.Z1[self.index_members_of_group[i], :], + Dist = dist.cdist(self.tsom.Zeta1, + self.tsom.Z1[self.index_members_of_group[i], :], 'sqeuclidean') # KxNの距離行列を計算 H = np.exp(-Dist / (2 * kernel_width * kernel_width)) # KxNの学習量行列を計算 prob = np.sum(H, axis=1) From 7805eddf6e92df8961bcc38b811ce33412bff28e Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Sat, 16 Nov 2019 18:20:23 +0900 Subject: [PATCH 04/25] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=83=A1?= =?UTF-8?q?=E3=82=BD=E3=83=83=E3=83=89=E3=82=92=E4=BD=9C=E6=88=90=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/plus_TSOM/allclose_plusTSOM.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/plus_TSOM/allclose_plusTSOM.py b/tests/plus_TSOM/allclose_plusTSOM.py index 9a8ee1d..b15dd31 100644 --- a/tests/plus_TSOM/allclose_plusTSOM.py +++ b/tests/plus_TSOM/allclose_plusTSOM.py @@ -74,5 +74,27 @@ def test_plusTSOM_ishida_vs_test_plusTSOM_watanabe(self): np.testing.assert_allclose(htsom_ishida.som.history['y'], htsom_watanabe.som.history['y']) np.testing.assert_allclose(htsom_ishida.som.history['z'], htsom_watanabe.som.history['z']) + def _transform_list_to_bag(self,list_of_indexes): + for indexes in list_of_indexes: + + def test_matching_index_member_as_list_or_bag(self): + seed = 100 + np.random.seed(seed) + n_samples = 1000 + n_groups = 10 # group数 + n_features = 3 # 各メンバーの特徴数 + n_samples_per_group = 30 # 各グループにメンバーに何人いるのか + member_features,index_members_of_group = self.create_artficial_data(n_samples, + n_features, + n_groups, + n_samples_per_group) + + + Z1 = np.random.rand(n_samples, 2) * 2.0 - 1.0 + Z2 = np.random.rand(n_features, 2) * 2.0 - 1.0 + init_TSOM = [Z1, Z2] + init_SOM = np.random.rand(n_groups, 2) * 2.0 - 1.0 + + if __name__ == "__main__": unittest.main() From e83648d14b702ae096ec29f9afbd7706d512f01b Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Mon, 18 Nov 2019 14:50:04 +0900 Subject: [PATCH 05/25] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=82=92?= =?UTF-8?q?=E6=9B=B8=E3=81=84=E3=81=A6=E5=AE=9F=E8=A1=8C=E3=81=97=E3=81=A6?= =?UTF-8?q?=E3=81=BF=E3=81=9F=E3=81=8C=E3=82=A8=E3=83=A9=E3=83=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/plus_TSOM/allclose_plusTSOM.py | 53 ++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/tests/plus_TSOM/allclose_plusTSOM.py b/tests/plus_TSOM/allclose_plusTSOM.py index b15dd31..2ecb221 100644 --- a/tests/plus_TSOM/allclose_plusTSOM.py +++ b/tests/plus_TSOM/allclose_plusTSOM.py @@ -74,27 +74,68 @@ def test_plusTSOM_ishida_vs_test_plusTSOM_watanabe(self): np.testing.assert_allclose(htsom_ishida.som.history['y'], htsom_watanabe.som.history['y']) np.testing.assert_allclose(htsom_ishida.som.history['z'], htsom_watanabe.som.history['z']) - def _transform_list_to_bag(self,list_of_indexes): + def _transform_list_to_bag(self,list_of_indexes,num_members): + bag_of_members = np.empty((0,num_members)) for indexes in list_of_indexes: - + one_hot_vectors = np.eye(num_members)[indexes] + one_bag = one_hot_vectors.sum(axis=0)[None,:] + bag_of_members=np.append(bag_of_members,one_bag,axis=0) + return bag_of_members def test_matching_index_member_as_list_or_bag(self): seed = 100 np.random.seed(seed) - n_samples = 1000 + n_members = 100 n_groups = 10 # group数 n_features = 3 # 各メンバーの特徴数 n_samples_per_group = 30 # 各グループにメンバーに何人いるのか - member_features,index_members_of_group = self.create_artficial_data(n_samples, + member_features,index_members_of_group = self.create_artficial_data(n_members, n_features, n_groups, n_samples_per_group) + bag_of_members = self._transform_list_to_bag(index_members_of_group, n_members) - - Z1 = np.random.rand(n_samples, 2) * 2.0 - 1.0 + Z1 = np.random.rand(n_members, 2) * 2.0 - 1.0 Z2 = np.random.rand(n_features, 2) * 2.0 - 1.0 init_TSOM = [Z1, Z2] init_SOM = np.random.rand(n_groups, 2) * 2.0 - 1.0 + params_tsom = {'latent_dim': [2, 2], + 'resolution': [10, 10], + 'SIGMA_MAX': [1.0, 1.0], + 'SIGMA_MIN': [0.1, 0.1], + 'TAU': [50, 50], + 'init': init_TSOM} + params_som = {'latent_dim': 2, + 'resolution': 10, + 'sigma_max': 2.0, + 'sigma_min': 0.5, + 'tau': 50, + 'init': init_SOM} + tsom_epoch_num = 50 + som_epoch_num = 50 + kernel_width = 0.3 + + tsom_plus_som_input_list = TSOMPlusSOM(member_features=member_features, + index_members_of_group=index_members_of_group, + params_tsom=params_tsom, + params_som=params_som) + tsom_plus_som_input_bag = TSOMPlusSOM(member_features=member_features, + index_members_of_group=bag_of_members, + params_tsom=params_tsom, + params_som=params_som) + + tsom_plus_som_input_list.fit(tsom_epoch_num=tsom_epoch_num, + kernel_width=kernel_width, + som_epoch_num=som_epoch_num) + tsom_plus_som_input_bag.fit(tsom_epoch_num=tsom_epoch_num, + kernel_width=kernel_width, + som_epoch_num=som_epoch_num) + np.testing.assert_allclose(tsom_plus_som_input_list.tsom.history['y'], tsom_plus_som_input_bag.tsom.history['y']) + np.testing.assert_allclose(tsom_plus_som_input_list.tsom.history['z1'], tsom_plus_som_input_bag.tsom.history['z1']) + np.testing.assert_allclose(tsom_plus_som_input_list.tsom.history['z2'], tsom_plus_som_input_bag.tsom.history['z2']) + np.testing.assert_allclose(tsom_plus_som_input_list.params_som['X'], tsom_plus_som_input_bag.params_som['X']) + np.testing.assert_allclose(tsom_plus_som_input_list.som.history['y'], tsom_plus_som_input_bag.som.history['y']) + np.testing.assert_allclose(tsom_plus_som_input_list.som.history['z'], tsom_plus_som_input_bag.som.history['z']) if __name__ == "__main__": unittest.main() From b82e7fec059baeda246e072dc7265da0e0a1c059 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Tue, 19 Nov 2019 15:46:51 +0900 Subject: [PATCH 06/25] first commit From 7c1fc04e3bd18a78b92baa1a89f26ecf4069bc98 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Tue, 19 Nov 2019 15:48:48 +0900 Subject: [PATCH 07/25] =?UTF-8?q?=E8=87=AA=E5=88=86=E3=81=A7=E4=BB=A5?= =?UTF-8?q?=E5=89=8D=E3=81=8B=E3=82=89=E4=BD=9C=E6=88=90=E3=81=97=E3=81=A6?= =?UTF-8?q?=E3=81=84=E3=81=9FUKR=E3=81=A8=E7=AD=89=E4=BE=A1=E3=81=AA?= =?UTF-8?q?=E3=82=B3=E3=83=BC=E3=83=89=E3=82=92=E3=81=93=E3=81=A1=E3=82=89?= =?UTF-8?q?=E3=81=AB=E7=A7=BB=E6=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/unsupervised_kernel_regression.py | 212 ++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 libs/models/unsupervised_kernel_regression.py diff --git a/libs/models/unsupervised_kernel_regression.py b/libs/models/unsupervised_kernel_regression.py new file mode 100644 index 0000000..51d0b02 --- /dev/null +++ b/libs/models/unsupervised_kernel_regression.py @@ -0,0 +1,212 @@ +import numpy as np +import scipy.spatial.distance as dist +from tqdm import tqdm + + +class UnsupervisedKernelRegression(object): + def __init__(self, X, latent_dim,sigma=0.2, + isGEM=False, + isCompact=True,alpha=0.0, + init='random',isLOOCV=False): + self.X = X.copy() + self.N = X.shape[0] + self.D = X.shape[1] + self.L = latent_dim + self.sigma = sigma + self.gamma = 1.0 / (sigma*sigma) + self.isCompact = isCompact + self.isGEM = isGEM + self.isLOOCV = isLOOCV + + self.Z = None + if isinstance(init, str) and init in 'random': + self.Z = np.random.normal(0, 0.1, (self.N, self.L)) + elif isinstance(init, np.ndarray) and init.shape == (self.N, self.L): + self.Z = init.copy() + else: + raise ValueError("invalid init: {}".format(init)) + + self.Alpha = alpha + + self.history = {} + + self.donefit = False + + + + def fit(self, nb_epoch=100, verbose=True, eta=0.5, expand_epoch=None): + + K = self.X @ self.X.T + X2 = np.diag(K)[:, None] + # Xn = np.sum(np.square(self.X[:, None, :] - self.X[None, :, :]), axis=2) + # DistX = Xn.reshape(self.N**2, 1) + + self.nb_epoch = nb_epoch + + self.history['z'] = np.zeros((nb_epoch, self.N, self.L)) + self.history['y'] = np.zeros((nb_epoch, self.N, self.D)) + self.history['zvar'] = np.zeros((nb_epoch, self.L)) + self.history['obj_func'] = np.zeros(nb_epoch) + + + if verbose: + bar = tqdm(range(nb_epoch)) + else: + bar = range(nb_epoch) + + + for epoch in bar: + Delta = self.Z[:, None, :] - self.Z[None, :, :] + DistZ = np.sum(np.square(Delta), axis=2) + H = np.exp(-0.5 * self.gamma * DistZ) + if self.isLOOCV: + H -= np.identity(H.shape[0]) + + # Hprime = H + G = np.sum(H, axis=1)[:, None] + GInv = 1 / G + R = H * GInv + # Rprime = Hprime * GInv + + Y = R @ self.X + # Y2 = np.sum(np.square(Y), axis=1)[:, None] + # beta0 = np.sum(G) / np.sum(G * (X2 - Y2)) + DeltaYX = Y[:,None,:] - self.X[None, :, :] + Error = Y - self.X + obj_func = -0.5 * np.sum(np.square(Error)) - 0.5*self.Alpha*np.sum(np.square(self.Z)) + + + A = self.gamma * R * np.einsum('nd,nid->ni', Y - self.X, DeltaYX) + #dFdZ = -beta0 * np.sum((A + A.T)[:, :, None] * Delta, axis=1) + if self.isGEM: + dFdZ = -np.sum(A[:, :, None] * Delta, axis=1) + else: + dFdZ = -np.sum((A + A.T)[:, :, None] * Delta, axis=1) + + dFdZ -= self.Alpha * self.Z + + # self.Z += (eta / self.D) * dFdZ + self.Z += eta * dFdZ + if self.isCompact: + self.Z = np.clip(self.Z,-1.0,1.0) + else: + self.Z -= self.Z.mean(axis=0) + + + self.history['z'][epoch] = self.Z + self.history['y'][epoch] = Y + self.history['zvar'][epoch] = np.mean(np.square(self.Z - self.Z.mean(axis=0)),axis=0) + self.history['obj_func'][epoch] = obj_func + + + + self.donefit = True + return self.history + + def calcF(self, resolution, size='auto'): + """ + :param resolution: + :param size: + :return: + """ + if not self.donefit: + raise ValueError("fit is not done") + + self.resolution = resolution + Zeta = create_zeta(-1, 1, self.L, resolution) + M = Zeta.shape[0] + + self.history['f'] = np.zeros((self.nb_epoch, M, self.D)) + + for epoch in range(self.nb_epoch): + Z = self.history['z'][epoch] + if size == 'auto': + Zeta = create_zeta(Z.min(), Z.max(), self.L, resolution) + else: + Zeta = create_zeta(size.min(), size.max(), self.L, resolution) + + Dist = dist.cdist(Zeta, Z, 'sqeuclidean') + + H = np.exp(-0.5 *self.gamma* Dist) + G = np.sum(H, axis=1)[:, None] + GInv = np.reciprocal(G) + R = H * GInv + + Y = np.dot(R, self.X) + + self.history['f'][epoch] = Y + + def transform(self, Xnew, nb_epoch_trans=100, eta_trans=0.5, verbose=True, constrained=True): + # calculate latent variables of new data using gradient descent + # objective function is square error E = ||f(z)-x||^2 + + if not self.donefit: + raise ValueError("fit is not done") + + Nnew = Xnew.shape[0] + + # initialize Znew, using latent variables of observed data + Dist_Xnew_X = dist.cdist(Xnew, self.X) + BMS = np.argmin(Dist_Xnew_X, axis=1) # calculate Best Matching Sample + Znew = self.Z[BMS,:] # initialize Znew + + if verbose: + bar = tqdm(range(nb_epoch_trans)) + else: + bar = range(nb_epoch_trans) + + for epoch in bar: + # calculate gradient + Delta = self.Z[None,:,:] - Znew[:,None,:] # shape = (Nnew,N,L) + Dist_Znew_Z = dist.cdist(Znew,self.Z,"sqeuclidean") # shape = (Nnew,N) + H = np.exp(-0.5 *self.gamma* Dist_Znew_Z) # shape = (Nnew,N) + G = np.sum(H,axis=1)[:,None] # shape = (Nnew,1) + Ginv = np.reciprocal(G) # shape = (Nnew,1) + R = H * Ginv # shape = (Nnew,N) + F = R @ self.X # shape = (Nnew,D) + + Delta_bar = np.einsum("kn,knl->kl",R,Delta) # (Nnew,N)times(Nnew,N,L)=(Nnew,L) + # Delta_bar = np.sum(R[:,:,None] * Delta, axis=1) # same calculate + dRdZ = self.gamma * R[:,:,None] * (Delta - Delta_bar[:,None,:]) # shape = (Nnew,N,L) + + dFdZ = np.einsum("nd,knl->kdl",self.X,dRdZ) # shape = (Nnew,D,L) + # dFdZ = np.sum(self.X[None,:,:,None]*dRdZ[:,:,None,:],axis=1) # same calculate + dEdZ = 2.0 * np.einsum("kd,kdl->kl",F-Xnew,dFdZ) # shape (Nnew, L) + # update latent variables + Znew -= eta_trans * dEdZ + if self.isCompact: + Znew = np.clip(Znew,-1.0,1.0) + if constrained: + Znew = np.clip(Znew, self.Z.min(axis=0), self.Z.max(axis=0)) + + return Znew + + def inverse_transform(self, Znew): + if not self.donefit: + raise ValueError("fit is not done") + if Znew.shape[1]!=self.L: + raise ValueError("Znew dimension must be {}".format(self.L)) + + Dist_Znew_Z = dist.cdist(Znew,self.Z,"sqeuclidean") # shape = (Nnew,N) + H = np.exp(-0.5 * self.gamma *Dist_Znew_Z) # shape = (Nnew,N) + G = np.sum(H,axis=1)[:,None] # shape = (Nnew,1) + Ginv = np.reciprocal(G) # shape = (Nnew,1) + R = H * Ginv # shape = (Nnew,N) + F = R @ self.X # shape = (Nnew,D) + + return F + + + + +def create_zeta(zeta_min, zeta_max, latent_dim, resolution): + mesh1d, step = np.linspace(zeta_min, zeta_max, resolution, endpoint=False, retstep=True) + mesh1d += step / 2.0 + if latent_dim == 1: + Zeta = mesh1d + elif latent_dim == 2: + Zeta = np.meshgrid(mesh1d, mesh1d) + else: + raise ValueError("invalid latent dim {}".format(latent_dim)) + Zeta = np.dstack(Zeta).reshape(-1, latent_dim) + return Zeta From e3dcca5551809187a5b47df7cde3b1279ab599a6 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Tue, 19 Nov 2019 16:05:04 +0900 Subject: [PATCH 08/25] =?UTF-8?q?=E4=BD=99=E5=88=86=E3=81=AA=E3=82=B3?= =?UTF-8?q?=E3=83=A1=E3=83=B3=E3=83=88=E3=82=A2=E3=82=A6=E3=83=88=E6=96=87?= =?UTF-8?q?=E3=81=AE=E5=89=8A=E9=99=A4=E3=80=81=E5=A4=89=E6=95=B0=E5=90=8D?= =?UTF-8?q?=E3=81=AErefactor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/unsupervised_kernel_regression.py | 91 ++++++++----------- 1 file changed, 39 insertions(+), 52 deletions(-) diff --git a/libs/models/unsupervised_kernel_regression.py b/libs/models/unsupervised_kernel_regression.py index 51d0b02..76f220e 100644 --- a/libs/models/unsupervised_kernel_regression.py +++ b/libs/models/unsupervised_kernel_regression.py @@ -4,48 +4,43 @@ class UnsupervisedKernelRegression(object): - def __init__(self, X, latent_dim,sigma=0.2, - isGEM=False, - isCompact=True,alpha=0.0, - init='random',isLOOCV=False): + def __init__(self, X, n_components, bandwidth_gaussian_kernel=0.2, + is_compact=True, lambda_=0.0, + init='random', is_loocv=False): self.X = X.copy() - self.N = X.shape[0] - self.D = X.shape[1] - self.L = latent_dim - self.sigma = sigma - self.gamma = 1.0 / (sigma*sigma) - self.isCompact = isCompact - self.isGEM = isGEM - self.isLOOCV = isLOOCV + self.n_samples = X.shape[0] + self.n_dimensions = X.shape[1] + self.n_components = n_components + self.bandwidth_gaussian_kernel = bandwidth_gaussian_kernel + self.precision = 1.0 / (bandwidth_gaussian_kernel * bandwidth_gaussian_kernel) + self.is_compact = is_compact + self.is_loocv = is_loocv self.Z = None if isinstance(init, str) and init in 'random': - self.Z = np.random.normal(0, 0.1, (self.N, self.L)) - elif isinstance(init, np.ndarray) and init.shape == (self.N, self.L): + self.Z = np.random.normal(0, 1.0, (self.n_samples, self.n_components)) * bandwidth_gaussian_kernel * 0.5 + elif isinstance(init, np.ndarray) and init.shape == (self.n_samples, self.n_components): self.Z = init.copy() else: raise ValueError("invalid init: {}".format(init)) - self.Alpha = alpha + self.lambda_ = lambda_ self.history = {} - self.donefit = False + self._done_fit = False def fit(self, nb_epoch=100, verbose=True, eta=0.5, expand_epoch=None): K = self.X @ self.X.T - X2 = np.diag(K)[:, None] - # Xn = np.sum(np.square(self.X[:, None, :] - self.X[None, :, :]), axis=2) - # DistX = Xn.reshape(self.N**2, 1) self.nb_epoch = nb_epoch - self.history['z'] = np.zeros((nb_epoch, self.N, self.L)) - self.history['y'] = np.zeros((nb_epoch, self.N, self.D)) - self.history['zvar'] = np.zeros((nb_epoch, self.L)) + self.history['z'] = np.zeros((nb_epoch, self.n_samples, self.n_components)) + self.history['y'] = np.zeros((nb_epoch, self.n_samples, self.n_dimensions)) + self.history['zvar'] = np.zeros((nb_epoch, self.n_components)) self.history['obj_func'] = np.zeros(nb_epoch) @@ -58,36 +53,28 @@ def fit(self, nb_epoch=100, verbose=True, eta=0.5, expand_epoch=None): for epoch in bar: Delta = self.Z[:, None, :] - self.Z[None, :, :] DistZ = np.sum(np.square(Delta), axis=2) - H = np.exp(-0.5 * self.gamma * DistZ) - if self.isLOOCV: + H = np.exp(-0.5 * self.precision * DistZ) + if self.is_loocv: H -= np.identity(H.shape[0]) # Hprime = H G = np.sum(H, axis=1)[:, None] GInv = 1 / G R = H * GInv - # Rprime = Hprime * GInv Y = R @ self.X - # Y2 = np.sum(np.square(Y), axis=1)[:, None] - # beta0 = np.sum(G) / np.sum(G * (X2 - Y2)) DeltaYX = Y[:,None,:] - self.X[None, :, :] Error = Y - self.X - obj_func = -0.5 * np.sum(np.square(Error)) - 0.5*self.Alpha*np.sum(np.square(self.Z)) + obj_func = -0.5 * np.sum(np.square(Error)) - self.lambda_ * np.sum(np.square(self.Z)) + A = self.precision * R * np.einsum('nd,nid->ni', Y - self.X, DeltaYX) + dFdZ = -np.sum((A + A.T)[:, :, None] * Delta, axis=1) - A = self.gamma * R * np.einsum('nd,nid->ni', Y - self.X, DeltaYX) - #dFdZ = -beta0 * np.sum((A + A.T)[:, :, None] * Delta, axis=1) - if self.isGEM: - dFdZ = -np.sum(A[:, :, None] * Delta, axis=1) - else: - dFdZ = -np.sum((A + A.T)[:, :, None] * Delta, axis=1) - - dFdZ -= self.Alpha * self.Z + dFdZ -= self.lambda_ * self.Z # self.Z += (eta / self.D) * dFdZ self.Z += eta * dFdZ - if self.isCompact: + if self.is_compact: self.Z = np.clip(self.Z,-1.0,1.0) else: self.Z -= self.Z.mean(axis=0) @@ -100,7 +87,7 @@ def fit(self, nb_epoch=100, verbose=True, eta=0.5, expand_epoch=None): - self.donefit = True + self._done_fit = True return self.history def calcF(self, resolution, size='auto'): @@ -109,25 +96,25 @@ def calcF(self, resolution, size='auto'): :param size: :return: """ - if not self.donefit: + if not self._done_fit: raise ValueError("fit is not done") self.resolution = resolution - Zeta = create_zeta(-1, 1, self.L, resolution) + Zeta = create_zeta(-1, 1, self.n_components, resolution) M = Zeta.shape[0] - self.history['f'] = np.zeros((self.nb_epoch, M, self.D)) + self.history['f'] = np.zeros((self.nb_epoch, M, self.n_dimensions)) for epoch in range(self.nb_epoch): Z = self.history['z'][epoch] if size == 'auto': - Zeta = create_zeta(Z.min(), Z.max(), self.L, resolution) + Zeta = create_zeta(Z.min(), Z.max(), self.n_components, resolution) else: - Zeta = create_zeta(size.min(), size.max(), self.L, resolution) + Zeta = create_zeta(size.min(), size.max(), self.n_components, resolution) Dist = dist.cdist(Zeta, Z, 'sqeuclidean') - H = np.exp(-0.5 *self.gamma* Dist) + H = np.exp(-0.5 * self.precision * Dist) G = np.sum(H, axis=1)[:, None] GInv = np.reciprocal(G) R = H * GInv @@ -140,7 +127,7 @@ def transform(self, Xnew, nb_epoch_trans=100, eta_trans=0.5, verbose=True, const # calculate latent variables of new data using gradient descent # objective function is square error E = ||f(z)-x||^2 - if not self.donefit: + if not self._done_fit: raise ValueError("fit is not done") Nnew = Xnew.shape[0] @@ -159,7 +146,7 @@ def transform(self, Xnew, nb_epoch_trans=100, eta_trans=0.5, verbose=True, const # calculate gradient Delta = self.Z[None,:,:] - Znew[:,None,:] # shape = (Nnew,N,L) Dist_Znew_Z = dist.cdist(Znew,self.Z,"sqeuclidean") # shape = (Nnew,N) - H = np.exp(-0.5 *self.gamma* Dist_Znew_Z) # shape = (Nnew,N) + H = np.exp(-0.5 * self.precision * Dist_Znew_Z) # shape = (Nnew,N) G = np.sum(H,axis=1)[:,None] # shape = (Nnew,1) Ginv = np.reciprocal(G) # shape = (Nnew,1) R = H * Ginv # shape = (Nnew,N) @@ -167,14 +154,14 @@ def transform(self, Xnew, nb_epoch_trans=100, eta_trans=0.5, verbose=True, const Delta_bar = np.einsum("kn,knl->kl",R,Delta) # (Nnew,N)times(Nnew,N,L)=(Nnew,L) # Delta_bar = np.sum(R[:,:,None] * Delta, axis=1) # same calculate - dRdZ = self.gamma * R[:,:,None] * (Delta - Delta_bar[:,None,:]) # shape = (Nnew,N,L) + dRdZ = self.precision * R[:, :, None] * (Delta - Delta_bar[:, None, :]) # shape = (Nnew,N,L) dFdZ = np.einsum("nd,knl->kdl",self.X,dRdZ) # shape = (Nnew,D,L) # dFdZ = np.sum(self.X[None,:,:,None]*dRdZ[:,:,None,:],axis=1) # same calculate dEdZ = 2.0 * np.einsum("kd,kdl->kl",F-Xnew,dFdZ) # shape (Nnew, L) # update latent variables Znew -= eta_trans * dEdZ - if self.isCompact: + if self.is_compact: Znew = np.clip(Znew,-1.0,1.0) if constrained: Znew = np.clip(Znew, self.Z.min(axis=0), self.Z.max(axis=0)) @@ -182,13 +169,13 @@ def transform(self, Xnew, nb_epoch_trans=100, eta_trans=0.5, verbose=True, const return Znew def inverse_transform(self, Znew): - if not self.donefit: + if not self._done_fit: raise ValueError("fit is not done") - if Znew.shape[1]!=self.L: - raise ValueError("Znew dimension must be {}".format(self.L)) + if Znew.shape[1]!=self.n_components: + raise ValueError("Znew dimension must be {}".format(self.n_components)) Dist_Znew_Z = dist.cdist(Znew,self.Z,"sqeuclidean") # shape = (Nnew,N) - H = np.exp(-0.5 * self.gamma *Dist_Znew_Z) # shape = (Nnew,N) + H = np.exp(-0.5 * self.precision * Dist_Znew_Z) # shape = (Nnew,N) G = np.sum(H,axis=1)[:,None] # shape = (Nnew,1) Ginv = np.reciprocal(G) # shape = (Nnew,1) R = H * Ginv # shape = (Nnew,N) From 9853f7be6fc39dfd1b9c87e404941864c47fcab1 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Tue, 19 Nov 2019 17:01:49 +0900 Subject: [PATCH 09/25] =?UTF-8?q?history=E3=82=92=E3=83=87=E3=83=95?= =?UTF-8?q?=E3=82=A9=E3=83=AB=E3=83=88=E3=81=A7=E3=81=AF=E6=AE=8B=E3=81=95?= =?UTF-8?q?=E3=81=9A=E3=80=81=E3=82=B3=E3=83=B3=E3=82=B9=E3=83=88=E3=83=A9?= =?UTF-8?q?=E3=82=AF=E3=82=BF=E3=81=A7=E6=AE=8B=E3=81=99=E3=81=8B=E6=8C=87?= =?UTF-8?q?=E5=AE=9A=E3=81=A7=E3=81=8D=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?= =?UTF-8?q?=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/unsupervised_kernel_regression.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/libs/models/unsupervised_kernel_regression.py b/libs/models/unsupervised_kernel_regression.py index 76f220e..6f82449 100644 --- a/libs/models/unsupervised_kernel_regression.py +++ b/libs/models/unsupervised_kernel_regression.py @@ -4,9 +4,9 @@ class UnsupervisedKernelRegression(object): - def __init__(self, X, n_components, bandwidth_gaussian_kernel=0.2, - is_compact=True, lambda_=0.0, - init='random', is_loocv=False): + def __init__(self, X, n_components, bandwidth_gaussian_kernel=1.0, + is_compact=False, lambda_=1.0, + init='random', is_loocv=False, is_save_history=False): self.X = X.copy() self.n_samples = X.shape[0] self.n_dimensions = X.shape[1] @@ -15,6 +15,7 @@ def __init__(self, X, n_components, bandwidth_gaussian_kernel=0.2, self.precision = 1.0 / (bandwidth_gaussian_kernel * bandwidth_gaussian_kernel) self.is_compact = is_compact self.is_loocv = is_loocv + self.is_save_hisotry = is_save_history self.Z = None if isinstance(init, str) and init in 'random': @@ -26,7 +27,7 @@ def __init__(self, X, n_components, bandwidth_gaussian_kernel=0.2, self.lambda_ = lambda_ - self.history = {} + self._done_fit = False @@ -38,10 +39,12 @@ def fit(self, nb_epoch=100, verbose=True, eta=0.5, expand_epoch=None): self.nb_epoch = nb_epoch - self.history['z'] = np.zeros((nb_epoch, self.n_samples, self.n_components)) - self.history['y'] = np.zeros((nb_epoch, self.n_samples, self.n_dimensions)) - self.history['zvar'] = np.zeros((nb_epoch, self.n_components)) - self.history['obj_func'] = np.zeros(nb_epoch) + if self.is_save_hisotry: + self.history = {} + self.history['z'] = np.zeros((nb_epoch, self.n_samples, self.n_components)) + self.history['y'] = np.zeros((nb_epoch, self.n_samples, self.n_dimensions)) + self.history['zvar'] = np.zeros((nb_epoch, self.n_components)) + self.history['obj_func'] = np.zeros(nb_epoch) if verbose: @@ -57,7 +60,6 @@ def fit(self, nb_epoch=100, verbose=True, eta=0.5, expand_epoch=None): if self.is_loocv: H -= np.identity(H.shape[0]) - # Hprime = H G = np.sum(H, axis=1)[:, None] GInv = 1 / G R = H * GInv @@ -72,7 +74,6 @@ def fit(self, nb_epoch=100, verbose=True, eta=0.5, expand_epoch=None): dFdZ -= self.lambda_ * self.Z - # self.Z += (eta / self.D) * dFdZ self.Z += eta * dFdZ if self.is_compact: self.Z = np.clip(self.Z,-1.0,1.0) @@ -80,10 +81,11 @@ def fit(self, nb_epoch=100, verbose=True, eta=0.5, expand_epoch=None): self.Z -= self.Z.mean(axis=0) - self.history['z'][epoch] = self.Z - self.history['y'][epoch] = Y - self.history['zvar'][epoch] = np.mean(np.square(self.Z - self.Z.mean(axis=0)),axis=0) - self.history['obj_func'][epoch] = obj_func + if self.is_save_hisotry: + self.history['z'][epoch] = self.Z + self.history['y'][epoch] = Y + self.history['zvar'][epoch] = np.mean(np.square(self.Z - self.Z.mean(axis=0)),axis=0) + self.history['obj_func'][epoch] = obj_func From 2e9efbd0192bf0c524ecb99788bc0f5955736723 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 20 Nov 2019 14:22:29 +0900 Subject: [PATCH 10/25] =?UTF-8?q?=E5=86=99=E5=83=8F=E3=81=AEhistory?= =?UTF-8?q?=E3=82=92=E8=A8=88=E7=AE=97=E3=81=99=E3=82=8B=E3=83=A1=E3=82=BD?= =?UTF-8?q?=E3=83=83=E3=83=89=E3=81=AE=E5=90=8D=E7=A7=B0=E3=82=92=E5=A4=89?= =?UTF-8?q?=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/unsupervised_kernel_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/models/unsupervised_kernel_regression.py b/libs/models/unsupervised_kernel_regression.py index 6f82449..84f46ce 100644 --- a/libs/models/unsupervised_kernel_regression.py +++ b/libs/models/unsupervised_kernel_regression.py @@ -92,7 +92,7 @@ def fit(self, nb_epoch=100, verbose=True, eta=0.5, expand_epoch=None): self._done_fit = True return self.history - def calcF(self, resolution, size='auto'): + def calculation_history_of_mapping(self, resolution, size='auto'): """ :param resolution: :param size: From dba3fe96a1e98d54e2fd657758874c6c05482a4d Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 20 Nov 2019 15:31:56 +0900 Subject: [PATCH 11/25] =?UTF-8?q?ukr=E3=81=AE=E8=A6=B3=E6=B8=AC=E7=A9=BA?= =?UTF-8?q?=E9=96=93=E3=81=AB=E3=81=8A=E3=81=91=E3=82=8B=E5=86=99=E5=83=8F?= =?UTF-8?q?=E3=81=AE=E5=AD=A6=E7=BF=92=E9=81=8E=E7=A8=8B=E3=82=92=E5=8F=AF?= =?UTF-8?q?=E8=A6=96=E5=8C=96=E3=81=99=E3=82=8Btutorial=20code=E3=82=92?= =?UTF-8?q?=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tutorials/ukr/fitting_saddle_shape.py | 43 +++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tutorials/ukr/fitting_saddle_shape.py diff --git a/tutorials/ukr/fitting_saddle_shape.py b/tutorials/ukr/fitting_saddle_shape.py new file mode 100644 index 0000000..ef1fc55 --- /dev/null +++ b/tutorials/ukr/fitting_saddle_shape.py @@ -0,0 +1,43 @@ +import numpy as np +from libs.models.unsupervised_kernel_regression import UnsupervisedKernelRegression as UKR +from libs.models.som import SOM +from libs.visualization.som.animation_learning_process_3d import anime_learning_process_3d +from libs.datasets.artificial.kura import create_data +import matplotlib + +if __name__ == '__main__': + + # create artiricial dataset + nb_samples = 500 + seed = 1 + np.random.seed(seed) + X = create_data(nb_samples) + x_sigma = 0.1 + X += np.random.normal(0,x_sigma,X.shape) + + # common parameter + n_components = 2 + bandwidth_gaussian_kernel = 0.2 + nb_epoch = 100 + + # ukr parameter + is_compact = True + is_save_history = True + lambda_ = 0.0 + eta = 0.02 + + # som parameter + tau = nb_epoch + init_bandwidth = 2.0 + resolution = 10 + + + som = SOM(X, latent_dim=n_components, resolution=resolution, + sigma_max=init_bandwidth, sigma_min=bandwidth_gaussian_kernel, tau=tau) + ukr = UKR(X, n_components=n_components,bandwidth_gaussian_kernel=bandwidth_gaussian_kernel, + is_compact=is_compact,is_save_history=is_save_history,lambda_=lambda_) + som.fit(nb_epoch=nb_epoch) + ukr.fit(nb_epoch=nb_epoch,eta=eta) + ukr.calculation_history_of_mapping(resolution=30) + + anime_learning_process_3d(X=ukr.X, Y_allepoch=ukr.history['f']) From ea5ed3e70c61ddd27ef58677328bc78be3756b22 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 20 Nov 2019 23:35:39 +0900 Subject: [PATCH 12/25] =?UTF-8?q?tutorial=E3=82=B3=E3=83=BC=E3=83=89?= =?UTF-8?q?=E3=82=92SOM=E3=81=A8=E6=AF=94=E8=BC=83=E3=81=97=E3=81=A6?= =?UTF-8?q?=E8=A1=A8=E7=A4=BA=E3=81=99=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?= =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tutorials/ukr/fitting_saddle_shape.py | 38 +++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/tutorials/ukr/fitting_saddle_shape.py b/tutorials/ukr/fitting_saddle_shape.py index ef1fc55..b816446 100644 --- a/tutorials/ukr/fitting_saddle_shape.py +++ b/tutorials/ukr/fitting_saddle_shape.py @@ -1,9 +1,10 @@ import numpy as np from libs.models.unsupervised_kernel_regression import UnsupervisedKernelRegression as UKR from libs.models.som import SOM -from libs.visualization.som.animation_learning_process_3d import anime_learning_process_3d from libs.datasets.artificial.kura import create_data -import matplotlib +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import matplotlib.animation as animation if __name__ == '__main__': @@ -18,13 +19,13 @@ # common parameter n_components = 2 bandwidth_gaussian_kernel = 0.2 - nb_epoch = 100 + nb_epoch = 50 # ukr parameter is_compact = True is_save_history = True lambda_ = 0.0 - eta = 0.02 + eta = 0.020 # som parameter tau = nb_epoch @@ -32,6 +33,7 @@ resolution = 10 + # learn ukr and som som = SOM(X, latent_dim=n_components, resolution=resolution, sigma_max=init_bandwidth, sigma_min=bandwidth_gaussian_kernel, tau=tau) ukr = UKR(X, n_components=n_components,bandwidth_gaussian_kernel=bandwidth_gaussian_kernel, @@ -40,4 +42,30 @@ ukr.fit(nb_epoch=nb_epoch,eta=eta) ukr.calculation_history_of_mapping(resolution=30) - anime_learning_process_3d(X=ukr.X, Y_allepoch=ukr.history['f']) + fig = plt.figure() + ax_som = fig.add_subplot(1,2,1,aspect='equal',projection='3d') + ax_ukr = fig.add_subplot(1,2,2,aspect='equal',projection='3d') + + def plot(i): + ax_som.cla() + ax_ukr.cla() + ax_som.scatter(X[:,0], X[:,1], X[:,2], s=3, c=X[:,0], alpha=0.5) + ax_ukr.scatter(X[:,0], X[:,1], X[:,2], s=3, c=X[:,0], alpha=0.5) + mapping_2d_som = som.history['y'][i].reshape(resolution,resolution,X.shape[1]) + ax_som.plot_wireframe(mapping_2d_som[:,:,0], + mapping_2d_som[:,:,1], + mapping_2d_som[:,:,2]) + mapping_2d_ukr = ukr.history['f'][i].reshape(30,30,X.shape[1]) + ax_ukr.plot_surface(mapping_2d_ukr[:,:,0], + mapping_2d_ukr[:,:,1], + mapping_2d_ukr[:,:,2], + antialiased=False) + fig.suptitle("epoch {}".format(i)) + ax_som.set_title('som') + ax_ukr.set_title('ukr') + + ani = animation.FuncAnimation(fig, plot,frames=nb_epoch,interval=100, repeat=False) + plt.show() + + # anime_learning_process_3d(X=ukr.X, Y_allepoch=ukr.history['f']) +# \ No newline at end of file From 1069ccb1010d69e992cef3f0c9f726c6c4f1704f Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Thu, 21 Nov 2019 16:24:06 +0900 Subject: [PATCH 13/25] =?UTF-8?q?=E7=9B=AE=E7=9A=84=E9=96=A2=E6=95=B0?= =?UTF-8?q?=E3=81=AE=E8=AA=A4=E5=B7=AE=E9=A0=85=E3=82=92=E3=82=B5=E3=83=B3?= =?UTF-8?q?=E3=83=97=E3=83=AB=E6=95=B0=E3=81=A7=E5=89=B2=E3=82=8B=E3=82=88?= =?UTF-8?q?=E3=81=86=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/unsupervised_kernel_regression.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/models/unsupervised_kernel_regression.py b/libs/models/unsupervised_kernel_regression.py index 84f46ce..e238df5 100644 --- a/libs/models/unsupervised_kernel_regression.py +++ b/libs/models/unsupervised_kernel_regression.py @@ -67,10 +67,10 @@ def fit(self, nb_epoch=100, verbose=True, eta=0.5, expand_epoch=None): Y = R @ self.X DeltaYX = Y[:,None,:] - self.X[None, :, :] Error = Y - self.X - obj_func = -0.5 * np.sum(np.square(Error)) - self.lambda_ * np.sum(np.square(self.Z)) + obj_func = np.sum(np.square(Error)) / self.n_samples + self.lambda_ * np.sum(np.square(self.Z)) A = self.precision * R * np.einsum('nd,nid->ni', Y - self.X, DeltaYX) - dFdZ = -np.sum((A + A.T)[:, :, None] * Delta, axis=1) + dFdZ = -2.0 * np.sum((A + A.T)[:, :, None] * Delta, axis=1) / self.n_samples dFdZ -= self.lambda_ * self.Z From 27ad199ba3509d978397f1c5629e752cc404fac5 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Sat, 23 Nov 2019 20:03:54 +0900 Subject: [PATCH 14/25] =?UTF-8?q?tutorial=E3=82=B3=E3=83=BC=E3=83=89?= =?UTF-8?q?=E3=82=92=E4=B8=80=E9=83=A8=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tutorials/ukr/fitting_saddle_shape.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tutorials/ukr/fitting_saddle_shape.py b/tutorials/ukr/fitting_saddle_shape.py index b816446..222b2f3 100644 --- a/tutorials/ukr/fitting_saddle_shape.py +++ b/tutorials/ukr/fitting_saddle_shape.py @@ -19,13 +19,13 @@ # common parameter n_components = 2 bandwidth_gaussian_kernel = 0.2 - nb_epoch = 50 + nb_epoch = 100 # ukr parameter is_compact = True is_save_history = True lambda_ = 0.0 - eta = 0.020 + eta = 3.0 # som parameter tau = nb_epoch @@ -64,7 +64,7 @@ def plot(i): ax_som.set_title('som') ax_ukr.set_title('ukr') - ani = animation.FuncAnimation(fig, plot,frames=nb_epoch,interval=100, repeat=False) + ani = animation.FuncAnimation(fig, plot,frames=nb_epoch,interval=50, repeat=False) plt.show() # anime_learning_process_3d(X=ukr.X, Y_allepoch=ukr.history['f']) From fa22858c85fc377ceb3ce5a928804e1eff26110e Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Sat, 23 Nov 2019 20:33:24 +0900 Subject: [PATCH 15/25] =?UTF-8?q?=E3=83=81=E3=83=A5=E3=83=BC=E3=83=88?= =?UTF-8?q?=E3=83=AA=E3=82=A2=E3=83=AB=E3=82=B3=E3=83=BC=E3=83=89=E3=81=A7?= =?UTF-8?q?=E6=BD=9C=E5=9C=A8=E7=A9=BA=E9=96=93=E3=82=82=E8=A1=A8=E7=A4=BA?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tutorials/ukr/fitting_saddle_shape.py | 70 ++++++++++++++++----------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/tutorials/ukr/fitting_saddle_shape.py b/tutorials/ukr/fitting_saddle_shape.py index 222b2f3..aac82d3 100644 --- a/tutorials/ukr/fitting_saddle_shape.py +++ b/tutorials/ukr/fitting_saddle_shape.py @@ -7,14 +7,13 @@ import matplotlib.animation as animation if __name__ == '__main__': - # create artiricial dataset nb_samples = 500 seed = 1 np.random.seed(seed) X = create_data(nb_samples) x_sigma = 0.1 - X += np.random.normal(0,x_sigma,X.shape) + X += np.random.normal(0, x_sigma, X.shape) # common parameter n_components = 2 @@ -25,47 +24,64 @@ is_compact = True is_save_history = True lambda_ = 0.0 - eta = 3.0 + eta = 8.0 # som parameter tau = nb_epoch init_bandwidth = 2.0 resolution = 10 - # learn ukr and som som = SOM(X, latent_dim=n_components, resolution=resolution, sigma_max=init_bandwidth, sigma_min=bandwidth_gaussian_kernel, tau=tau) - ukr = UKR(X, n_components=n_components,bandwidth_gaussian_kernel=bandwidth_gaussian_kernel, - is_compact=is_compact,is_save_history=is_save_history,lambda_=lambda_) + ukr = UKR(X, n_components=n_components, bandwidth_gaussian_kernel=bandwidth_gaussian_kernel, + is_compact=is_compact, is_save_history=is_save_history, lambda_=lambda_) som.fit(nb_epoch=nb_epoch) - ukr.fit(nb_epoch=nb_epoch,eta=eta) + ukr.fit(nb_epoch=nb_epoch, eta=eta) ukr.calculation_history_of_mapping(resolution=30) - fig = plt.figure() - ax_som = fig.add_subplot(1,2,1,aspect='equal',projection='3d') - ax_ukr = fig.add_subplot(1,2,2,aspect='equal',projection='3d') + fig = plt.figure(figsize=[7, 8]) + ax_latent_space_som = fig.add_subplot(2, 2, 1, aspect='equal') + ax_data_space_som = fig.add_subplot(2, 2, 2, aspect='equal', projection='3d') + ax_latent_space_ukr = fig.add_subplot(2, 2, 3, aspect='equal') + ax_data_space_ukr = fig.add_subplot(2, 2, 4, aspect='equal', projection='3d') + def plot(i): - ax_som.cla() - ax_ukr.cla() - ax_som.scatter(X[:,0], X[:,1], X[:,2], s=3, c=X[:,0], alpha=0.5) - ax_ukr.scatter(X[:,0], X[:,1], X[:,2], s=3, c=X[:,0], alpha=0.5) - mapping_2d_som = som.history['y'][i].reshape(resolution,resolution,X.shape[1]) - ax_som.plot_wireframe(mapping_2d_som[:,:,0], - mapping_2d_som[:,:,1], - mapping_2d_som[:,:,2]) - mapping_2d_ukr = ukr.history['f'][i].reshape(30,30,X.shape[1]) - ax_ukr.plot_surface(mapping_2d_ukr[:,:,0], - mapping_2d_ukr[:,:,1], - mapping_2d_ukr[:,:,2], - antialiased=False) + ax_latent_space_som.cla() + ax_data_space_som.cla() + ax_data_space_ukr.cla() + ax_latent_space_ukr.cla() + ax_data_space_som.scatter(X[:, 0], X[:, 1], X[:, 2], s=3, c=X[:, 0], alpha=0.5) + ax_data_space_ukr.scatter(X[:, 0], X[:, 1], X[:, 2], s=3, c=X[:, 0], alpha=0.5) + mapping_2d_som = som.history['y'][i].reshape(resolution, resolution, X.shape[1]) + ax_data_space_som.plot_wireframe(mapping_2d_som[:, :, 0], + mapping_2d_som[:, :, 1], + mapping_2d_som[:, :, 2]) + mapping_2d_ukr = ukr.history['f'][i].reshape(30, 30, X.shape[1]) + ax_data_space_ukr.plot_surface(mapping_2d_ukr[:, :, 0], + mapping_2d_ukr[:, :, 1], + mapping_2d_ukr[:, :, 2], + antialiased=False) + ith_z_som = som.history['z'][i] + ith_z_ukr = ukr.history['z'][i] + ax_latent_space_som.scatter(ith_z_som[:, 0], ith_z_som[:, 1], s=3, c=X[:, 0]) + ax_latent_space_ukr.scatter(ith_z_ukr[:, 0], ith_z_ukr[:, 1], s=3, c=X[:, 0]) fig.suptitle("epoch {}".format(i)) - ax_som.set_title('som') - ax_ukr.set_title('ukr') - ani = animation.FuncAnimation(fig, plot,frames=nb_epoch,interval=50, repeat=False) + ax_latent_space_som.set_xlim(-1.0, 1.0) + ax_latent_space_som.set_ylim(-1.0, 1.0) + ax_latent_space_ukr.set_xlim(-1.0, 1.0) + ax_latent_space_ukr.set_ylim(-1.0, 1.0) + + ax_latent_space_som.set_title('som latent space') + ax_latent_space_ukr.set_title('ukr latent space') + ax_data_space_som.set_title('som data space') + ax_data_space_ukr.set_title('ukr data space') + + + ani = animation.FuncAnimation(fig, plot, frames=nb_epoch, interval=20, repeat=False) plt.show() # anime_learning_process_3d(X=ukr.X, Y_allepoch=ukr.history['f']) -# \ No newline at end of file +# From 89109e2096abc3bb5f9bd663aac76731ebe09f97 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Tue, 3 Dec 2019 15:36:15 +0900 Subject: [PATCH 16/25] =?UTF-8?q?=E5=A4=89=E6=95=B0=E3=81=AErefactor?= =?UTF-8?q?=E3=81=A8=E3=82=B3=E3=83=A1=E3=83=B3=E3=83=88=E3=81=AE=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/tsom_plus_som.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/libs/models/tsom_plus_som.py b/libs/models/tsom_plus_som.py index 6eaad38..bd3276a 100644 --- a/libs/models/tsom_plus_som.py +++ b/libs/models/tsom_plus_som.py @@ -5,13 +5,13 @@ class TSOMPlusSOM: - def __init__(self, member_features, index_members_of_group, params_tsom, params_som): + def __init__(self, member_features, group_features, params_tsom, params_som): self.params_tsom = params_tsom self.params_som = params_som self.params_tsom['X'] = member_features - self.index_members_of_group = index_members_of_group # グループ数の確認 - self.group_num = len(self.index_members_of_group) + self.group_features = group_features # グループ数の確認 + self.group_num = len(self.group_features) def fit(self, tsom_epoch_num, kernel_width, som_epoch_num): self._fit_1st_TSOM(tsom_epoch_num) @@ -25,15 +25,17 @@ def _fit_1st_TSOM(self, tsom_epoch_num): def _fit_KDE(self, kernel_width): # 学習した後の潜在空間からKDEで確率分布を作る prob_data = np.zeros((self.group_num, self.tsom.K1)) # group数*ノード数 # グループごとにKDEを適用 - if isinstance(self.index_members_of_group, np.ndarray) and self.index_members_of_group.ndim == 2: + if isinstance(self.group_features, np.ndarray) and self.group_features.ndim == 2: + # group_featuresがbag of membersで与えられた時の処理 distance = dist.cdist(self.tsom.Zeta1, self.tsom.Z1, 'sqeuclidean') # K1 x num_members H = np.exp(-0.5 * distance / (kernel_width * kernel_width)) # KxN - prob_data = self.index_members_of_group @ H.T # num_group x K1 + prob_data = self.group_features @ H.T # num_group x K1 prob_data = prob_data / prob_data.sum(axis=1)[:, None] else: + # group_featuresがlist of listsもしくはlist of arraysで与えられた時の処理 for i in range(self.group_num): Dist = dist.cdist(self.tsom.Zeta1, - self.tsom.Z1[self.index_members_of_group[i], :], + self.tsom.Z1[self.group_features[i], :], 'sqeuclidean') # KxNの距離行列を計算 H = np.exp(-Dist / (2 * kernel_width * kernel_width)) # KxNの学習量行列を計算 prob = np.sum(H, axis=1) From 9f0f30f54aa343b12a3b47e40c5b407064521a07 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Tue, 3 Dec 2019 16:01:03 +0900 Subject: [PATCH 17/25] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=82=B3?= =?UTF-8?q?=E3=83=BC=E3=83=89=E3=82=92=E4=BF=AE=E6=AD=A3=E3=81=97=E5=AE=9F?= =?UTF-8?q?=E8=A1=8C=E3=80=81pass=E3=82=92=E7=A2=BA=E8=AA=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/plus_TSOM/allclose_plusTSOM.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/tests/plus_TSOM/allclose_plusTSOM.py b/tests/plus_TSOM/allclose_plusTSOM.py index 2ecb221..d18c58a 100644 --- a/tests/plus_TSOM/allclose_plusTSOM.py +++ b/tests/plus_TSOM/allclose_plusTSOM.py @@ -10,11 +10,10 @@ class TestTSOMPlusSOM(unittest.TestCase): def create_artficial_data(self,n_samples,n_features,n_groups,n_samples_per_group): x = np.random.normal(0.0,1.0,(n_samples,n_features)) if isinstance(n_samples_per_group,int): - index_members_of_group = np.random.randint(0,n_samples,(n_groups,n_samples_per_group)) - elif isinstance(n_samples_per_group,np.ndarray): - index_members_of_group = [] - for n_samples_in_the_group in n_samples_per_group: - index_members_of_group.append(np.random.randint(0,n_samples,n_samples_in_the_group)) + n_samples_per_group = np.ones(n_groups,int) * n_samples_per_group + index_members_of_group = [] + for n_samples_in_the_group in n_samples_per_group: + index_members_of_group.append(np.random.randint(0,n_samples,n_samples_in_the_group)) return x, index_members_of_group def test_plusTSOM_ishida_vs_test_plusTSOM_watanabe(self): @@ -23,7 +22,7 @@ def test_plusTSOM_ishida_vs_test_plusTSOM_watanabe(self): n_samples = 1000 n_groups = 10 # group数 n_features = 3 # 各メンバーの特徴数 - n_samples_per_group = 30 # 各グループにメンバーに何人いるのか + n_samples_per_group = np.random.randint(1,30,n_groups) # 各グループにメンバーに何人いるのか member_features,index_members_of_group = self.create_artficial_data(n_samples, n_features, n_groups, @@ -52,7 +51,7 @@ def test_plusTSOM_ishida_vs_test_plusTSOM_watanabe(self): kernel_width = 0.3 htsom_ishida = TSOMPlusSOM(member_features=member_features, - index_members_of_group=index_members_of_group, + group_features=index_members_of_group, params_tsom=params_tsom, params_som=params_som) htsom_watanabe = TSOMPlusSOMWatanabe(member_features=member_features, @@ -87,7 +86,7 @@ def test_matching_index_member_as_list_or_bag(self): n_members = 100 n_groups = 10 # group数 n_features = 3 # 各メンバーの特徴数 - n_samples_per_group = 30 # 各グループにメンバーに何人いるのか + n_samples_per_group = np.random.randint(1,50,n_groups) # 各グループにメンバーに何人いるのか member_features,index_members_of_group = self.create_artficial_data(n_members, n_features, n_groups, @@ -115,13 +114,13 @@ def test_matching_index_member_as_list_or_bag(self): kernel_width = 0.3 tsom_plus_som_input_list = TSOMPlusSOM(member_features=member_features, - index_members_of_group=index_members_of_group, + group_features=index_members_of_group, params_tsom=params_tsom, params_som=params_som) tsom_plus_som_input_bag = TSOMPlusSOM(member_features=member_features, - index_members_of_group=bag_of_members, - params_tsom=params_tsom, - params_som=params_som) + group_features=bag_of_members, + params_tsom=params_tsom, + params_som=params_som) tsom_plus_som_input_list.fit(tsom_epoch_num=tsom_epoch_num, kernel_width=kernel_width, From a70776fb9e92ec89508a9f33a7758b5e24922d15 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 17:49:57 +0900 Subject: [PATCH 18/25] =?UTF-8?q?=E3=82=AB=E3=83=BC=E3=83=8D=E3=83=AB?= =?UTF-8?q?=E5=AF=86=E5=BA=A6=E6=8E=A8=E5=AE=9A=E3=81=AE=E8=A8=88=E7=AE=97?= =?UTF-8?q?=E3=82=92fit=E4=BB=A5=E5=A4=96=E3=81=A7=E3=82=82=E5=88=A9?= =?UTF-8?q?=E7=94=A8=E3=81=A7=E3=81=8D=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?= =?UTF-8?q?=E4=B8=80=E8=88=AC=E5=8C=96=E3=80=81=E3=83=86=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=82=92=E5=AE=9F=E8=A1=8C=E3=81=97pass=E3=82=92=E7=A2=BA?= =?UTF-8?q?=E8=AA=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/tsom_plus_som.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/libs/models/tsom_plus_som.py b/libs/models/tsom_plus_som.py index bd3276a..86c7e0a 100644 --- a/libs/models/tsom_plus_som.py +++ b/libs/models/tsom_plus_som.py @@ -23,28 +23,36 @@ def _fit_1st_TSOM(self, tsom_epoch_num): self.tsom.fit(tsom_epoch_num) def _fit_KDE(self, kernel_width): # 学習した後の潜在空間からKDEで確率分布を作る - prob_data = np.zeros((self.group_num, self.tsom.K1)) # group数*ノード数 + prob_data = self._calculate_kde(group_features=self.group_features,kernel_width=kernel_width) + self.params_som['X'] = prob_data + self.params_som['metric'] = "KLdivergence" + + def _calculate_kde(self, group_features, kernel_width): # グループごとにKDEを適用 - if isinstance(self.group_features, np.ndarray) and self.group_features.ndim == 2: + if isinstance(group_features, np.ndarray) and group_features.ndim == 2: # group_featuresがbag of membersで与えられた時の処理 distance = dist.cdist(self.tsom.Zeta1, self.tsom.Z1, 'sqeuclidean') # K1 x num_members H = np.exp(-0.5 * distance / (kernel_width * kernel_width)) # KxN - prob_data = self.group_features @ H.T # num_group x K1 + prob_data = group_features @ H.T # num_group x K1 prob_data = prob_data / prob_data.sum(axis=1)[:, None] else: # group_featuresがlist of listsもしくはlist of arraysで与えられた時の処理 - for i in range(self.group_num): + prob_data = np.zeros((self.group_num, self.tsom.K1)) # group数*ノード数 + for i,one_group_features in enumerate(group_features): Dist = dist.cdist(self.tsom.Zeta1, - self.tsom.Z1[self.group_features[i], :], + self.tsom.Z1[one_group_features, :], 'sqeuclidean') # KxNの距離行列を計算 - H = np.exp(-Dist / (2 * kernel_width * kernel_width)) # KxNの学習量行列を計算 + H = np.exp(-Dist / (2 * kernel_width * kernel_width)) # KxNのカーネルの値を計算 prob = np.sum(H, axis=1) prob_sum = np.sum(prob) prob = prob / prob_sum prob_data[i, :] = prob - self.params_som['X'] = prob_data - self.params_som['metric'] = "KLdivergence" + return prob_data + def _fit_2nd_SOM(self, som_epoch_num): # 上位のSOMを self.som = SOM(**self.params_som) self.som.fit(som_epoch_num) + + def transform(self, group_features): + pass \ No newline at end of file From 60d563062ae8fb055abc08a045b831a4570c84cf Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 18:13:37 +0900 Subject: [PATCH 19/25] =?UTF-8?q?Reformat=E3=81=A8transform=E3=83=A1?= =?UTF-8?q?=E3=82=BD=E3=83=83=E3=83=89=E3=81=AE=E4=BD=9C=E6=88=90=E3=80=81?= =?UTF-8?q?som=E5=81=B4=E3=81=A7transform=E3=83=A1=E3=82=BD=E3=83=83?= =?UTF-8?q?=E3=83=89=E3=81=8C=E3=81=82=E3=82=8C=E3=81=B0=E5=8B=95=E4=BD=9C?= =?UTF-8?q?=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/tsom_plus_som.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libs/models/tsom_plus_som.py b/libs/models/tsom_plus_som.py index 86c7e0a..7956dcb 100644 --- a/libs/models/tsom_plus_som.py +++ b/libs/models/tsom_plus_som.py @@ -23,7 +23,7 @@ def _fit_1st_TSOM(self, tsom_epoch_num): self.tsom.fit(tsom_epoch_num) def _fit_KDE(self, kernel_width): # 学習した後の潜在空間からKDEで確率分布を作る - prob_data = self._calculate_kde(group_features=self.group_features,kernel_width=kernel_width) + prob_data = self._calculate_kde(group_features=self.group_features, kernel_width=kernel_width) self.params_som['X'] = prob_data self.params_som['metric'] = "KLdivergence" @@ -38,7 +38,7 @@ def _calculate_kde(self, group_features, kernel_width): else: # group_featuresがlist of listsもしくはlist of arraysで与えられた時の処理 prob_data = np.zeros((self.group_num, self.tsom.K1)) # group数*ノード数 - for i,one_group_features in enumerate(group_features): + for i, one_group_features in enumerate(group_features): Dist = dist.cdist(self.tsom.Zeta1, self.tsom.Z1[one_group_features, :], 'sqeuclidean') # KxNの距離行列を計算 @@ -49,10 +49,11 @@ def _calculate_kde(self, group_features, kernel_width): prob_data[i, :] = prob return prob_data - def _fit_2nd_SOM(self, som_epoch_num): # 上位のSOMを self.som = SOM(**self.params_som) self.som.fit(som_epoch_num) - def transform(self, group_features): - pass \ No newline at end of file + def transform(self, group_features, kernel_width): + group_density = self._calculate_kde(group_features=group_features, + kernel_width=kernel_width) + return self.som.transform(X=group_density) From 1b0e1d30756674bfc06bb2b7e5095ec369635752 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 18:15:09 +0900 Subject: [PATCH 20/25] first commit From 683d3cd3b9be5b1da6aa21a1786fc75ff9afe533 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 18:40:21 +0900 Subject: [PATCH 21/25] =?UTF-8?q?X=E3=81=AB=E5=AF=BE=E3=81=99=E3=82=8B?= =?UTF-8?q?=E6=BD=9C=E5=9C=A8=E5=A4=89=E6=95=B0=E3=82=92=E6=B1=82=E3=82=81?= =?UTF-8?q?=E3=82=8Btransform=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89=E3=82=92?= =?UTF-8?q?=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/som.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libs/models/som.py b/libs/models/som.py index b7ebcf8..4ef5685 100644 --- a/libs/models/som.py +++ b/libs/models/som.py @@ -110,3 +110,11 @@ def fit(self, nb_epoch=100, verbose=True): self.history['z'][epoch] = self.Z self.history['y'][epoch] = self.Y self.history['sigma'][epoch] = sigma + + def transform(self,X): + if self.metric == "sqeuclidean": + distance = dist.cdist(X,self.Y,self.metric) + return self.Zeta[distance.argmin(axis=1)] + elif self.metric == "KLdivergence": + divergence = -np.sum(self.X[:,np.newaxis,:] * np.log(self.Y[np.newaxis,:,:]),axis=2) # NxK + return self.Zeta[divergence.argmin(axis=1)] \ No newline at end of file From dfa153ced5cc35cec0329a37c9fbca9dd1d8ab81 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 18:41:20 +0900 Subject: [PATCH 22/25] Reformat --- libs/models/som.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/libs/models/som.py b/libs/models/som.py index 4ef5685..9bbd211 100644 --- a/libs/models/som.py +++ b/libs/models/som.py @@ -6,7 +6,7 @@ class SOM: - def __init__(self, X, latent_dim, resolution, sigma_max, sigma_min, tau, init='random',metric="sqeuclidean"): + def __init__(self, X, latent_dim, resolution, sigma_max, sigma_min, tau, init='random', metric="sqeuclidean"): self.X = X self.N = self.X.shape[0] @@ -28,22 +28,23 @@ def __init__(self, X, latent_dim, resolution, sigma_max, sigma_min, tau, init='r elif latent_dim == 2: if isinstance(init, str) and init == 'PCA': comp1, comp2 = pca.singular_values_[0], pca.singular_values_[1] - zeta = np.meshgrid(np.linspace(-1, 1, resolution), np.linspace(-comp2/comp1, comp2/comp1, resolution)) + zeta = np.meshgrid(np.linspace(-1, 1, resolution), + np.linspace(-comp2 / comp1, comp2 / comp1, resolution)) else: zeta = np.meshgrid(np.linspace(-1, 1, resolution), np.linspace(-1, 1, resolution)) - self.Zeta = np.dstack(zeta).reshape(resolution**2, latent_dim) + self.Zeta = np.dstack(zeta).reshape(resolution ** 2, latent_dim) else: raise ValueError("invalid latent dimension: {}".format(latent_dim)) - self.K = resolution**self.L + self.K = resolution ** self.L if isinstance(init, str) and init == 'random': self.Z = np.random.rand(self.N, latent_dim) * 2.0 - 1.0 elif isinstance(init, str) and init == 'random_bmu': init_bmus = np.random.randint(0, self.Zeta.shape[0] - 1, self.N) - self.Z = self.Zeta[init_bmus,:] + self.Z = self.Zeta[init_bmus, :] elif isinstance(init, str) and init == 'PCA': - self.Z = pca.transform(X)/comp1 + self.Z = pca.transform(X) / comp1 elif isinstance(init, np.ndarray) and init.dtype == int: init_bmus = init.copy() self.Z = self.Zeta[init_bmus, :] @@ -52,9 +53,9 @@ def __init__(self, X, latent_dim, resolution, sigma_max, sigma_min, tau, init='r else: raise ValueError("invalid init: {}".format(init)) - #metricに関する処理 + # metricに関する処理 if metric == "sqeuclidean": - self.metric="sqeuclidean" + self.metric = "sqeuclidean" elif metric == "KLdivergence": self.metric = "KLdivergence" @@ -78,18 +79,18 @@ def fit(self, nb_epoch=100, verbose=True): # 協調過程 # 学習量を計算 # sigma = self.sigma_min + (self.sigma_max - self.sigma_min) * np.exp(-epoch / self.tau) # 近傍半径を設定 - sigma = max(self.sigma_min, self.sigma_max * ( 1 - (epoch / self.tau) ) )# 近傍半径を設定 + sigma = max(self.sigma_min, self.sigma_max * (1 - (epoch / self.tau))) # 近傍半径を設定 Dist = dist.cdist(self.Zeta, self.Z, 'sqeuclidean') # KxNの距離行列を計算 # ノードと勝者ノードの全ての組み合わせにおける距離を網羅した行列 - H = np.exp(-Dist / (2 * sigma * sigma)) # KxNの学習量行列を計算 + H = np.exp(-Dist / (2 * sigma * sigma)) # KxNの学習量行列を計算 # 適合過程 # 参照ベクトルの更新 - G = np.sum(H, axis=1)[:, np.newaxis] # 各ノードが受ける学習量の総和を保持するKx1の列ベクトルを計算 - Ginv = np.reciprocal(G) # Gのそれぞれの要素の逆数を取る - R = H * Ginv # 学習量の総和が1になるように規格化 - self.Y = R @ self.X # 学習量を重みとして観測データの平均を取り参照ベクトルとする + G = np.sum(H, axis=1)[:, np.newaxis] # 各ノードが受ける学習量の総和を保持するKx1の列ベクトルを計算 + Ginv = np.reciprocal(G) # Gのそれぞれの要素の逆数を取る + R = H * Ginv # 学習量の総和が1になるように規格化 + self.Y = R @ self.X # 学習量を重みとして観測データの平均を取り参照ベクトルとする # 競合過程 if self.metric is "sqeuclidean": # ユークリッド距離を使った勝者決定 @@ -111,10 +112,10 @@ def fit(self, nb_epoch=100, verbose=True): self.history['y'][epoch] = self.Y self.history['sigma'][epoch] = sigma - def transform(self,X): + def transform(self, X): if self.metric == "sqeuclidean": - distance = dist.cdist(X,self.Y,self.metric) + distance = dist.cdist(X, self.Y, self.metric) return self.Zeta[distance.argmin(axis=1)] elif self.metric == "KLdivergence": - divergence = -np.sum(self.X[:,np.newaxis,:] * np.log(self.Y[np.newaxis,:,:]),axis=2) # NxK - return self.Zeta[divergence.argmin(axis=1)] \ No newline at end of file + divergence = -np.sum(self.X[:, np.newaxis, :] * np.log(self.Y[np.newaxis, :, :]), axis=2) # NxK + return self.Zeta[divergence.argmin(axis=1)] From 52c6e8396648c5de6266a6447ee779474700b86c Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 18:43:24 +0900 Subject: [PATCH 23/25] =?UTF-8?q?fit=E5=86=85=E3=81=A7=E6=BD=9C=E5=9C=A8?= =?UTF-8?q?=E5=A4=89=E6=95=B0=E6=8E=A8=E5=AE=9A=E3=81=AE=E3=83=A1=E3=83=88?= =?UTF-8?q?=E3=83=AA=E3=83=83=E3=82=AF=E6=8C=87=E5=AE=9A=E3=81=AE=E6=9D=A1?= =?UTF-8?q?=E4=BB=B6=E6=96=87=E3=81=AEis=E3=82=92=3D=3D=E3=81=AB=E4=BF=AE?= =?UTF-8?q?=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/models/som.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/models/som.py b/libs/models/som.py index 9bbd211..68a8ed5 100644 --- a/libs/models/som.py +++ b/libs/models/som.py @@ -93,14 +93,14 @@ def fit(self, nb_epoch=100, verbose=True): self.Y = R @ self.X # 学習量を重みとして観測データの平均を取り参照ベクトルとする # 競合過程 - if self.metric is "sqeuclidean": # ユークリッド距離を使った勝者決定 + if self.metric == "sqeuclidean": # ユークリッド距離を使った勝者決定 # 勝者ノードの計算 Dist = dist.cdist(self.X, self.Y) # NxKの距離行列を計算 bmus = Dist.argmin(axis=1) # Nx1の勝者ノード番号をまとめた列ベクトルを計算 # argmin(axis=1)を用いて各行で最小値を探しそのインデックスを返す self.Z = self.Zeta[bmus, :] # 勝者ノード番号から勝者ノードを求める - elif self.metric is "KLdivergence": # KL情報量を使った勝者決定 + elif self.metric == "KLdivergence": # KL情報量を使った勝者決定 Dist = np.sum(self.X[:, np.newaxis, :] * np.log(self.Y)[np.newaxis, :, :], axis=2) # N*K行列 # 勝者番号の決定 bmus = np.argmax(Dist, axis=1) From a316e87feefb91be05d51a378556250f7a0dec70 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 19:37:28 +0900 Subject: [PATCH 24/25] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=83=A1?= =?UTF-8?q?=E3=82=BD=E3=83=83=E3=83=89=E3=82=92=E4=BD=9C=E6=88=90=E3=81=97?= =?UTF-8?q?fit=E5=86=85=E3=81=AE=E6=BD=9C=E5=9C=A8=E5=A4=89=E6=95=B0?= =?UTF-8?q?=E6=8E=A8=E5=AE=9A=E3=81=A8transform=E5=86=85=E3=81=AE=E3=81=9D?= =?UTF-8?q?=E3=82=8C=E3=81=AE=E7=B5=90=E6=9E=9C=E3=81=8C=E4=B8=80=E8=87=B4?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=81=93=E3=81=A8=E3=82=92=E7=A2=BA=E8=AA=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/som/test_som.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/som/test_som.py b/tests/som/test_som.py index 6c08da7..180bbdb 100644 --- a/tests/som/test_som.py +++ b/tests/som/test_som.py @@ -96,6 +96,37 @@ def test_init_pca(self): np.testing.assert_allclose(SOMResult, EVDResult/np.sqrt(Lambda.real.max()), rtol=1e-06) + def test_transform(self): + n_distributon = 100 + n_category = 20 + + # create categorical distribution + X_categorical = np.random.rand(n_distributon,n_category) + X_categorical = X_categorical / X_categorical.sum(axis=1)[:,None] + + np.testing.assert_allclose(X_categorical.sum(axis=1),np.ones(X_categorical.shape[0])) + + # fit + som_categorical = SOM(X_categorical,latent_dim=2,resolution=50,sigma_max=2.0,sigma_min=0.3,tau=50,metric="KLdivergence") + som_categorical.fit(50) + Z_fit = som_categorical.Z + Z_transformed = som_categorical.transform(X_categorical) + + np.testing.assert_allclose(Z_transformed,Z_fit) + + # confirm to multi variable dataset + n_samples = 100 + n_features = 20 + + X_multi_variate = np.random.normal(0.0,1.0,(n_samples,n_features)) + + # fit + som_multi_variate = SOM(X_multi_variate,latent_dim=2,resolution=50,sigma_max=2.0,sigma_min=0.2,tau=50,metric="sqeuclidean") + som_multi_variate.fit(10) + Z_fit = som_multi_variate.Z + Z_transformed = som_multi_variate.transform(X_multi_variate) + + np.testing.assert_allclose(Z_fit,Z_transformed) From 2cb5bf56ada34db4ef2306992f15e999b9e92a87 Mon Sep 17 00:00:00 2001 From: Ryuji Watanabe Date: Wed, 4 Dec 2019 20:19:22 +0900 Subject: [PATCH 25/25] =?UTF-8?q?transform=E3=81=AE=E3=83=86=E3=82=B9?= =?UTF-8?q?=E3=83=88=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89=E3=82=92=E4=BD=9C?= =?UTF-8?q?=E6=88=90=E3=80=81pass=E3=82=92=E7=A2=BA=E8=AA=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/plus_TSOM/allclose_plusTSOM.py | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/plus_TSOM/allclose_plusTSOM.py b/tests/plus_TSOM/allclose_plusTSOM.py index d18c58a..c61b9d5 100644 --- a/tests/plus_TSOM/allclose_plusTSOM.py +++ b/tests/plus_TSOM/allclose_plusTSOM.py @@ -136,5 +136,62 @@ def test_matching_index_member_as_list_or_bag(self): np.testing.assert_allclose(tsom_plus_som_input_list.som.history['y'], tsom_plus_som_input_bag.som.history['y']) np.testing.assert_allclose(tsom_plus_som_input_list.som.history['z'], tsom_plus_som_input_bag.som.history['z']) + def test_transform(self): + # prepare dataset + seed = 100 + np.random.seed(seed) + n_members = 1000 + n_groups = 10 # group数 + n_features = 3 # 各メンバーの特徴数 + n_members_per_group = np.random.randint(1,30,n_groups) # 各グループにメンバーに何人いるのか + member_features,index_members_of_group = self.create_artficial_data(n_members, + n_features, + n_groups, + n_members_per_group) + bag_of_members = self._transform_list_to_bag(index_members_of_group,num_members=n_members) + + # prepare parameters + Z1 = np.random.rand(n_members, 2) * 2.0 - 1.0 + Z2 = np.random.rand(n_features, 2) * 2.0 - 1.0 + init_TSOM = [Z1, Z2] + init_SOM = np.random.rand(n_groups, 2) * 2.0 - 1.0 + params_tsom = {'latent_dim': [2, 2], + 'resolution': [10, 10], + 'SIGMA_MAX': [1.0, 1.0], + 'SIGMA_MIN': [0.1, 0.1], + 'TAU': [50, 50], + 'init': init_TSOM} + params_som = {'latent_dim': 2, + 'resolution': 10, + 'sigma_max': 2.0, + 'sigma_min': 0.5, + 'tau': 50, + 'init': init_SOM} + tsom_epoch_num = 50 + kernel_width = 0.3 + som_epoch_num = 50 + + # fit + htsom_bag = TSOMPlusSOM(member_features=member_features, + group_features=bag_of_members, + params_tsom=params_tsom, + params_som=params_som) + htsom_bag.fit(tsom_epoch_num,kernel_width,som_epoch_num) + Z_fit_bag = htsom_bag.som.Z + Z_transformed_bag = htsom_bag.transform(group_features=bag_of_members,kernel_width=kernel_width) + + htsom_list = TSOMPlusSOM(member_features=member_features, + group_features=index_members_of_group, + params_tsom=params_tsom, + params_som=params_som) + htsom_list.fit(tsom_epoch_num,kernel_width,som_epoch_num) + Z_fit_list = htsom_list.som.Z + Z_transformed_list = htsom_list.transform(group_features=index_members_of_group,kernel_width=kernel_width) + + # compare estimated latent variables in fit and one in transform + np.testing.assert_allclose(Z_fit_bag,Z_transformed_bag) + np.testing.assert_allclose(Z_fit_list,Z_transformed_list) + + if __name__ == "__main__": unittest.main()