Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TSOMPlusSOMにtransform関数を追加 #108

Open
wants to merge 29 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
cc6e323
グループの構成メンバーをbag of words的な表現で与えられた時の処理を作成するために_fit_KDE内でif文を追加
ae14watanabe Nov 12, 2019
e311e08
bag of words的な表現でグループの構成メンバーを表現した時の処理を追加
ae14watanabe Nov 12, 2019
dce2667
Merge branch 'make_TSOM_plus_SOM' into adapt_bag_of_members
ae14watanabe Nov 16, 2019
e6863b3
Reformat
ae14watanabe Nov 16, 2019
7805edd
テストメソッドを作成中
ae14watanabe Nov 16, 2019
e83648d
テストを書いて実行してみたがエラー
ae14watanabe Nov 18, 2019
5324df1
Merge branch 'make_TSOM_plus_SOM' into adapt_bag_of_members
ae14watanabe Nov 18, 2019
b82e7fe
first commit
ae14watanabe Nov 19, 2019
7c1fc04
自分で以前から作成していたUKRと等価なコードをこちらに移植
ae14watanabe Nov 19, 2019
e3dcca5
余分なコメントアウト文の削除、変数名のrefactor
ae14watanabe Nov 19, 2019
9853f7b
historyをデフォルトでは残さず、コンストラクタで残すか指定できるように変更
ae14watanabe Nov 19, 2019
2e9efbd
写像のhistoryを計算するメソッドの名称を変更
ae14watanabe Nov 20, 2019
dba3fe9
ukrの観測空間における写像の学習過程を可視化するtutorial codeを作成
ae14watanabe Nov 20, 2019
ea5ed3e
tutorialコードをSOMと比較して表示するように修正
ae14watanabe Nov 20, 2019
1069ccb
目的関数の誤差項をサンプル数で割るように変更
ae14watanabe Nov 21, 2019
27ad199
tutorialコードを一部修正
ae14watanabe Nov 23, 2019
fa22858
チュートリアルコードで潜在空間も表示するように変更
ae14watanabe Nov 23, 2019
9ea5a2f
Merge remote-tracking branch 'origin/74_add_ukr' into adapt_bag_of_me…
ae14watanabe Dec 3, 2019
89109e2
変数のrefactorとコメントの追加
ae14watanabe Dec 3, 2019
9f0f30f
テストコードを修正し実行、passを確認
ae14watanabe Dec 3, 2019
a70776f
カーネル密度推定の計算をfit以外でも利用できるように一般化、テストを実行しpassを確認
ae14watanabe Dec 4, 2019
60d5630
Reformatとtransformメソッドの作成、som側でtransformメソッドがあれば動作する
ae14watanabe Dec 4, 2019
1b0e1d3
first commit
ae14watanabe Dec 4, 2019
683d3cd
Xに対する潜在変数を求めるtransformメソッドを作成
ae14watanabe Dec 4, 2019
dfa153c
Reformat
ae14watanabe Dec 4, 2019
52c6e83
fit内で潜在変数推定のメトリック指定の条件文のisを==に修正
ae14watanabe Dec 4, 2019
a316e87
テストメソッドを作成しfit内の潜在変数推定とtransform内のそれの結果が一致することを確認
ae14watanabe Dec 4, 2019
b51ed54
Merge remote-tracking branch 'origin/109_add_transform_in_som' into 1…
ae14watanabe Dec 4, 2019
2cb5bf5
transformのテストメソッドを作成、passを確認
ae14watanabe Dec 4, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 25 additions & 16 deletions libs/models/som.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class SOM:
def __init__(self, X, latent_dim, resolution, sigma_max, sigma_min, tau, init='random',metric="sqeuclidean"):
def __init__(self, X, latent_dim, resolution, sigma_max, sigma_min, tau, init='random', metric="sqeuclidean"):
self.X = X
self.N = self.X.shape[0]

Expand All @@ -28,22 +28,23 @@ def __init__(self, X, latent_dim, resolution, sigma_max, sigma_min, tau, init='r
elif latent_dim == 2:
if isinstance(init, str) and init == 'PCA':
comp1, comp2 = pca.singular_values_[0], pca.singular_values_[1]
zeta = np.meshgrid(np.linspace(-1, 1, resolution), np.linspace(-comp2/comp1, comp2/comp1, resolution))
zeta = np.meshgrid(np.linspace(-1, 1, resolution),
np.linspace(-comp2 / comp1, comp2 / comp1, resolution))
else:
zeta = np.meshgrid(np.linspace(-1, 1, resolution), np.linspace(-1, 1, resolution))
self.Zeta = np.dstack(zeta).reshape(resolution**2, latent_dim)
self.Zeta = np.dstack(zeta).reshape(resolution ** 2, latent_dim)
else:
raise ValueError("invalid latent dimension: {}".format(latent_dim))

self.K = resolution**self.L
self.K = resolution ** self.L

if isinstance(init, str) and init == 'random':
self.Z = np.random.rand(self.N, latent_dim) * 2.0 - 1.0
elif isinstance(init, str) and init == 'random_bmu':
init_bmus = np.random.randint(0, self.Zeta.shape[0] - 1, self.N)
self.Z = self.Zeta[init_bmus,:]
self.Z = self.Zeta[init_bmus, :]
elif isinstance(init, str) and init == 'PCA':
self.Z = pca.transform(X)/comp1
self.Z = pca.transform(X) / comp1
elif isinstance(init, np.ndarray) and init.dtype == int:
init_bmus = init.copy()
self.Z = self.Zeta[init_bmus, :]
Expand All @@ -52,9 +53,9 @@ def __init__(self, X, latent_dim, resolution, sigma_max, sigma_min, tau, init='r
else:
raise ValueError("invalid init: {}".format(init))

#metricに関する処理
# metricに関する処理
if metric == "sqeuclidean":
self.metric="sqeuclidean"
self.metric = "sqeuclidean"

elif metric == "KLdivergence":
self.metric = "KLdivergence"
Expand All @@ -78,28 +79,28 @@ def fit(self, nb_epoch=100, verbose=True):
# 協調過程
# 学習量を計算
# sigma = self.sigma_min + (self.sigma_max - self.sigma_min) * np.exp(-epoch / self.tau) # 近傍半径を設定
sigma = max(self.sigma_min, self.sigma_max * ( 1 - (epoch / self.tau) ) )# 近傍半径を設定
sigma = max(self.sigma_min, self.sigma_max * (1 - (epoch / self.tau))) # 近傍半径を設定
Dist = dist.cdist(self.Zeta, self.Z, 'sqeuclidean')
# KxNの距離行列を計算
# ノードと勝者ノードの全ての組み合わせにおける距離を網羅した行列
H = np.exp(-Dist / (2 * sigma * sigma)) # KxNの学習量行列を計算
H = np.exp(-Dist / (2 * sigma * sigma)) # KxNの学習量行列を計算

# 適合過程
# 参照ベクトルの更新
G = np.sum(H, axis=1)[:, np.newaxis] # 各ノードが受ける学習量の総和を保持するKx1の列ベクトルを計算
Ginv = np.reciprocal(G) # Gのそれぞれの要素の逆数を取る
R = H * Ginv # 学習量の総和が1になるように規格化
self.Y = R @ self.X # 学習量を重みとして観測データの平均を取り参照ベクトルとする
G = np.sum(H, axis=1)[:, np.newaxis] # 各ノードが受ける学習量の総和を保持するKx1の列ベクトルを計算
Ginv = np.reciprocal(G) # Gのそれぞれの要素の逆数を取る
R = H * Ginv # 学習量の総和が1になるように規格化
self.Y = R @ self.X # 学習量を重みとして観測データの平均を取り参照ベクトルとする

# 競合過程
if self.metric is "sqeuclidean": # ユークリッド距離を使った勝者決定
if self.metric == "sqeuclidean": # ユークリッド距離を使った勝者決定
# 勝者ノードの計算
Dist = dist.cdist(self.X, self.Y) # NxKの距離行列を計算
bmus = Dist.argmin(axis=1)
# Nx1の勝者ノード番号をまとめた列ベクトルを計算
# argmin(axis=1)を用いて各行で最小値を探しそのインデックスを返す
self.Z = self.Zeta[bmus, :] # 勝者ノード番号から勝者ノードを求める
elif self.metric is "KLdivergence": # KL情報量を使った勝者決定
elif self.metric == "KLdivergence": # KL情報量を使った勝者決定
Dist = np.sum(self.X[:, np.newaxis, :] * np.log(self.Y)[np.newaxis, :, :], axis=2) # N*K行列
# 勝者番号の決定
bmus = np.argmax(Dist, axis=1)
Expand All @@ -110,3 +111,11 @@ def fit(self, nb_epoch=100, verbose=True):
self.history['z'][epoch] = self.Z
self.history['y'][epoch] = self.Y
self.history['sigma'][epoch] = sigma

def transform(self, X):
if self.metric == "sqeuclidean":
distance = dist.cdist(X, self.Y, self.metric)
return self.Zeta[distance.argmin(axis=1)]
elif self.metric == "KLdivergence":
divergence = -np.sum(self.X[:, np.newaxis, :] * np.log(self.Y[np.newaxis, :, :]), axis=2) # NxK
return self.Zeta[divergence.argmin(axis=1)]
44 changes: 31 additions & 13 deletions libs/models/tsom_plus_som.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@


class TSOMPlusSOM:
def __init__(self, member_features, index_members_of_group, params_tsom, params_som):
def __init__(self, member_features, group_features, params_tsom, params_som):
self.params_tsom = params_tsom
self.params_som = params_som

self.params_tsom['X'] = member_features
self.index_members_of_group = index_members_of_group # グループ数の確認
self.group_num = len(self.index_members_of_group)
self.group_features = group_features # グループ数の確認
self.group_num = len(self.group_features)

def fit(self, tsom_epoch_num, kernel_width, som_epoch_num):
self._fit_1st_TSOM(tsom_epoch_num)
Expand All @@ -23,19 +23,37 @@ def _fit_1st_TSOM(self, tsom_epoch_num):
self.tsom.fit(tsom_epoch_num)

def _fit_KDE(self, kernel_width): # 学習した後の潜在空間からKDEで確率分布を作る
prob_data = np.zeros((self.group_num, self.tsom.K1)) # group数*ノード数
# グループごとにKDEを適用
for i in range(self.group_num):
Dist = dist.cdist(self.tsom.Zeta1, self.tsom.Z1[self.index_members_of_group[i], :],
'sqeuclidean') # KxNの距離行列を計算
H = np.exp(-Dist / (2 * kernel_width * kernel_width)) # KxNの学習量行列を計算
prob = np.sum(H, axis=1)
prob_sum = np.sum(prob)
prob = prob / prob_sum
prob_data[i, :] = prob
prob_data = self._calculate_kde(group_features=self.group_features, kernel_width=kernel_width)
self.params_som['X'] = prob_data
self.params_som['metric'] = "KLdivergence"

def _calculate_kde(self, group_features, kernel_width):
# グループごとにKDEを適用
if isinstance(group_features, np.ndarray) and group_features.ndim == 2:
# group_featuresがbag of membersで与えられた時の処理
distance = dist.cdist(self.tsom.Zeta1, self.tsom.Z1, 'sqeuclidean') # K1 x num_members
H = np.exp(-0.5 * distance / (kernel_width * kernel_width)) # KxN
prob_data = group_features @ H.T # num_group x K1
prob_data = prob_data / prob_data.sum(axis=1)[:, None]
else:
# group_featuresがlist of listsもしくはlist of arraysで与えられた時の処理
prob_data = np.zeros((self.group_num, self.tsom.K1)) # group数*ノード数
for i, one_group_features in enumerate(group_features):
Dist = dist.cdist(self.tsom.Zeta1,
self.tsom.Z1[one_group_features, :],
'sqeuclidean') # KxNの距離行列を計算
H = np.exp(-Dist / (2 * kernel_width * kernel_width)) # KxNのカーネルの値を計算
prob = np.sum(H, axis=1)
prob_sum = np.sum(prob)
prob = prob / prob_sum
prob_data[i, :] = prob
return prob_data

def _fit_2nd_SOM(self, som_epoch_num): # 上位のSOMを
self.som = SOM(**self.params_som)
self.som.fit(som_epoch_num)

def transform(self, group_features, kernel_width):
group_density = self._calculate_kde(group_features=group_features,
kernel_width=kernel_width)
return self.som.transform(X=group_density)
201 changes: 201 additions & 0 deletions libs/models/unsupervised_kernel_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
import numpy as np
import scipy.spatial.distance as dist
from tqdm import tqdm


class UnsupervisedKernelRegression(object):
def __init__(self, X, n_components, bandwidth_gaussian_kernel=1.0,
is_compact=False, lambda_=1.0,
init='random', is_loocv=False, is_save_history=False):
self.X = X.copy()
self.n_samples = X.shape[0]
self.n_dimensions = X.shape[1]
self.n_components = n_components
self.bandwidth_gaussian_kernel = bandwidth_gaussian_kernel
self.precision = 1.0 / (bandwidth_gaussian_kernel * bandwidth_gaussian_kernel)
self.is_compact = is_compact
self.is_loocv = is_loocv
self.is_save_hisotry = is_save_history

self.Z = None
if isinstance(init, str) and init in 'random':
self.Z = np.random.normal(0, 1.0, (self.n_samples, self.n_components)) * bandwidth_gaussian_kernel * 0.5
elif isinstance(init, np.ndarray) and init.shape == (self.n_samples, self.n_components):
self.Z = init.copy()
else:
raise ValueError("invalid init: {}".format(init))

self.lambda_ = lambda_



self._done_fit = False



def fit(self, nb_epoch=100, verbose=True, eta=0.5, expand_epoch=None):

K = self.X @ self.X.T

self.nb_epoch = nb_epoch

if self.is_save_hisotry:
self.history = {}
self.history['z'] = np.zeros((nb_epoch, self.n_samples, self.n_components))
self.history['y'] = np.zeros((nb_epoch, self.n_samples, self.n_dimensions))
self.history['zvar'] = np.zeros((nb_epoch, self.n_components))
self.history['obj_func'] = np.zeros(nb_epoch)


if verbose:
bar = tqdm(range(nb_epoch))
else:
bar = range(nb_epoch)


for epoch in bar:
Delta = self.Z[:, None, :] - self.Z[None, :, :]
DistZ = np.sum(np.square(Delta), axis=2)
H = np.exp(-0.5 * self.precision * DistZ)
if self.is_loocv:
H -= np.identity(H.shape[0])

G = np.sum(H, axis=1)[:, None]
GInv = 1 / G
R = H * GInv

Y = R @ self.X
DeltaYX = Y[:,None,:] - self.X[None, :, :]
Error = Y - self.X
obj_func = np.sum(np.square(Error)) / self.n_samples + self.lambda_ * np.sum(np.square(self.Z))

A = self.precision * R * np.einsum('nd,nid->ni', Y - self.X, DeltaYX)
dFdZ = -2.0 * np.sum((A + A.T)[:, :, None] * Delta, axis=1) / self.n_samples

dFdZ -= self.lambda_ * self.Z

self.Z += eta * dFdZ
if self.is_compact:
self.Z = np.clip(self.Z,-1.0,1.0)
else:
self.Z -= self.Z.mean(axis=0)


if self.is_save_hisotry:
self.history['z'][epoch] = self.Z
self.history['y'][epoch] = Y
self.history['zvar'][epoch] = np.mean(np.square(self.Z - self.Z.mean(axis=0)),axis=0)
self.history['obj_func'][epoch] = obj_func



self._done_fit = True
return self.history

def calculation_history_of_mapping(self, resolution, size='auto'):
"""
:param resolution:
:param size:
:return:
"""
if not self._done_fit:
raise ValueError("fit is not done")

self.resolution = resolution
Zeta = create_zeta(-1, 1, self.n_components, resolution)
M = Zeta.shape[0]

self.history['f'] = np.zeros((self.nb_epoch, M, self.n_dimensions))

for epoch in range(self.nb_epoch):
Z = self.history['z'][epoch]
if size == 'auto':
Zeta = create_zeta(Z.min(), Z.max(), self.n_components, resolution)
else:
Zeta = create_zeta(size.min(), size.max(), self.n_components, resolution)

Dist = dist.cdist(Zeta, Z, 'sqeuclidean')

H = np.exp(-0.5 * self.precision * Dist)
G = np.sum(H, axis=1)[:, None]
GInv = np.reciprocal(G)
R = H * GInv

Y = np.dot(R, self.X)

self.history['f'][epoch] = Y

def transform(self, Xnew, nb_epoch_trans=100, eta_trans=0.5, verbose=True, constrained=True):
# calculate latent variables of new data using gradient descent
# objective function is square error E = ||f(z)-x||^2

if not self._done_fit:
raise ValueError("fit is not done")

Nnew = Xnew.shape[0]

# initialize Znew, using latent variables of observed data
Dist_Xnew_X = dist.cdist(Xnew, self.X)
BMS = np.argmin(Dist_Xnew_X, axis=1) # calculate Best Matching Sample
Znew = self.Z[BMS,:] # initialize Znew

if verbose:
bar = tqdm(range(nb_epoch_trans))
else:
bar = range(nb_epoch_trans)

for epoch in bar:
# calculate gradient
Delta = self.Z[None,:,:] - Znew[:,None,:] # shape = (Nnew,N,L)
Dist_Znew_Z = dist.cdist(Znew,self.Z,"sqeuclidean") # shape = (Nnew,N)
H = np.exp(-0.5 * self.precision * Dist_Znew_Z) # shape = (Nnew,N)
G = np.sum(H,axis=1)[:,None] # shape = (Nnew,1)
Ginv = np.reciprocal(G) # shape = (Nnew,1)
R = H * Ginv # shape = (Nnew,N)
F = R @ self.X # shape = (Nnew,D)

Delta_bar = np.einsum("kn,knl->kl",R,Delta) # (Nnew,N)times(Nnew,N,L)=(Nnew,L)
# Delta_bar = np.sum(R[:,:,None] * Delta, axis=1) # same calculate
dRdZ = self.precision * R[:, :, None] * (Delta - Delta_bar[:, None, :]) # shape = (Nnew,N,L)

dFdZ = np.einsum("nd,knl->kdl",self.X,dRdZ) # shape = (Nnew,D,L)
# dFdZ = np.sum(self.X[None,:,:,None]*dRdZ[:,:,None,:],axis=1) # same calculate
dEdZ = 2.0 * np.einsum("kd,kdl->kl",F-Xnew,dFdZ) # shape (Nnew, L)
# update latent variables
Znew -= eta_trans * dEdZ
if self.is_compact:
Znew = np.clip(Znew,-1.0,1.0)
if constrained:
Znew = np.clip(Znew, self.Z.min(axis=0), self.Z.max(axis=0))

return Znew

def inverse_transform(self, Znew):
if not self._done_fit:
raise ValueError("fit is not done")
if Znew.shape[1]!=self.n_components:
raise ValueError("Znew dimension must be {}".format(self.n_components))

Dist_Znew_Z = dist.cdist(Znew,self.Z,"sqeuclidean") # shape = (Nnew,N)
H = np.exp(-0.5 * self.precision * Dist_Znew_Z) # shape = (Nnew,N)
G = np.sum(H,axis=1)[:,None] # shape = (Nnew,1)
Ginv = np.reciprocal(G) # shape = (Nnew,1)
R = H * Ginv # shape = (Nnew,N)
F = R @ self.X # shape = (Nnew,D)

return F




def create_zeta(zeta_min, zeta_max, latent_dim, resolution):
mesh1d, step = np.linspace(zeta_min, zeta_max, resolution, endpoint=False, retstep=True)
mesh1d += step / 2.0
if latent_dim == 1:
Zeta = mesh1d
elif latent_dim == 2:
Zeta = np.meshgrid(mesh1d, mesh1d)
else:
raise ValueError("invalid latent dim {}".format(latent_dim))
Zeta = np.dstack(Zeta).reshape(-1, latent_dim)
return Zeta
Loading