Skip to content

Commit

Permalink
Merge pull request #101 from furukawa-laboratory/#6_make_TSOM_with_Mi…
Browse files Browse the repository at this point in the history
…ssing_value

#6 make tsom with missing value
  • Loading branch information
takuro-Ishida authored Jun 6, 2020
2 parents 1abb81a + 2ee4ada commit 2aaa356
Show file tree
Hide file tree
Showing 5 changed files with 456 additions and 18 deletions.
64 changes: 58 additions & 6 deletions libs/datasets/artificial/kura_tsom.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import numpy as np
import random
import itertools


def load_kura_tsom(xsamples, ysamples, retz=False):
def load_kura_tsom(xsamples, ysamples, missing_rate=None,retz=False):
z1 = np.linspace(-1, 1, xsamples)
z2 = np.linspace(-1, 1, ysamples)

Expand All @@ -13,10 +14,56 @@ def load_kura_tsom(xsamples, ysamples, retz=False):
x = np.concatenate((x1[:, :, np.newaxis], x2[:, :, np.newaxis], x3[:, :, np.newaxis]), axis=2)
truez = np.concatenate((z1_repeated[:, :, np.newaxis], z2_repeated[:, :, np.newaxis]), axis=2)

if retz:
return x, truez
#欠損値を入れない場合(missing_rateが0か特に指定していない場合はそのまま返す)
if missing_rate == 0 or missing_rate == None:
if retz:
return x, truez
else:
return x

#欠損値を入れる場合
else:
return x
# データの欠損数を決定する
if 0 < missing_rate < 1: #全体のサンプル数から率から欠損数を計算する
missing_num = missing_rate
all_samples = xsamples * ysamples
missing_rate = int(all_samples * missing_num)
else: #missing_rateが0~1でなかったら場合はerror文を返す
raise ValueError("invalid missing_rate: {}\nmissing_rate is 0<=missing_rate<1.".format(missing_rate))

#どのデータを欠損させるかを決定する

# list1とlist2の全組み合わせの配列を作成して、それをシャッフルして0番目からmissing_num個だけ欠損させる
missing_list1 = np.arange(xsamples)
missing_list2 = np.arange(ysamples)
p = list(itertools.product(missing_list1, missing_list2)) # List数はN1*N2
random.shuffle(p) # listをshuffle

Gamma = np.ones((xsamples, ysamples))#Gammaはどのデータが欠損かを表す

for n in np.arange(missing_rate): # 欠損させたいデータ数分、Gammaの要素を欠損させる
if Gamma[p[n]] == 1:
Gamma[p[n]] = 0
elif Gamma[p[n]] == 0:#同じ場所を欠損させようとしたらエラーを吐く
raise ValueError("invalid Gamma: {}\n".format(Gamma))

#true_zを欠損させる
for n in np.arange(missing_rate):
truez[p[n][0],p[n][1],:]=np.nan

#Gammaに基づいてデータ行列を欠損させる
# 欠損値をNan埋めする
for i in np.arange(xsamples):
for j in np.arange(ysamples):
if Gamma[i, j] == 0:
x[i, j, :] = np.nan

if retz:
return x,truez,Gamma
else:
return x,Gamma




if __name__ == '__main__':
Expand All @@ -26,7 +73,11 @@ def load_kura_tsom(xsamples, ysamples, retz=False):
xsamples = 10
ysamples = 10

x, truez = load_kura_tsom(10, 10, retz=True)
#欠損なしver
# x, truez = load_kura_tsom(xsamples, ysamples, retz=True)

# 欠損ありver
x, truez, Gamma = load_kura_tsom(xsamples, ysamples, retz=True,missing_rate=0.7)

fig = plt.figure(figsize=[10, 5])
ax_x = fig.add_subplot(1, 2, 1, projection='3d')
Expand All @@ -36,3 +87,4 @@ def load_kura_tsom(xsamples, ysamples, retz=False):
ax_x.set_title('Generated three-dimensional data')
ax_truez.set_title('True two-dimensional latent variable')
plt.show()

103 changes: 91 additions & 12 deletions libs/models/tsom.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import numpy as np
from scipy.spatial import distance
from tqdm import tqdm

from ..tools.create_zeta import create_zeta

class TSOM2():
def __init__(self, X, latent_dim, resolution, SIGMA_MAX, SIGMA_MIN, TAU, init='random'):
def __init__(self, X, latent_dim, resolution, SIGMA_MAX, SIGMA_MIN, TAU, model=None, gamma=None, init='random'):

# 入力データXについて
if X.ndim == 2:
Expand All @@ -21,6 +22,42 @@ def __init__(self, X, latent_dim, resolution, SIGMA_MAX, SIGMA_MIN, TAU, init='r
else:
raise ValueError("invalid X: {}\nX must be 2d or 3d ndarray".format(X))

if gamma is not None: # gammaが指定されている時
# 欠損値アルゴリズム処理
if X.shape != gamma.shape:
raise ValueError("invalid gamma: {}\ndata size and gamma size is not match. ".format(gamma))

elif X.shape == gamma.shape:
if np.any(np.isnan(self.X)) == 1: # gamma指定してデータに欠損がある場合
temp_gamma = np.where(np.isnan(self.X) == 1, 0, 1) # データに基づいてgammaを作る
temp_is_missing = np.allclose(temp_gamma, gamma)
self.X[np.isnan(self.X)] = 0 # 欠損値の部分を0で置換
if temp_is_missing is True: # データの欠損しているところとgammaの0の値が一致する時
self.gamma = gamma
self.is_missing = 1
else:
raise ValueError("invalid gamma: {}\ndata size and gamma size is not match. ".format(gamma))
elif np.any(np.isnan(self.X)) == 0: # 観測データの一部を無視したい時
self.gamma = gamma
self.is_missing = 1
elif gamma is None:#データXに欠損がある場合はそれに基づいてgammaを作成する
self.is_missing=np.any(np.isnan(self.X))# 欠損値があるかを判定.欠損があれば1,欠損がなければ0
# 欠損値がある場合
if self.is_missing == 1:
gamma = np.where(np.isnan(self.X) == 1, 0, 1)#nan格納されているindexを返す
self.gamma = gamma
self.X[np.isnan(self.X)] = 0#欠損値の部分を0で置換
elif self.is_missing==0:#欠損値がない場合はgammaは作らない
pass

# 1次モデル型と直接型を選択する引数
if model=="direct":
self.model = "direct"
elif model==None or model=="indirect":
self.model="indirect"
else:
raise ValueError("invalid model: {}\nmodel is only direct or indirect. ".format(model))

# 最大近傍半径(SIGMAX)の設定
if type(SIGMA_MAX) is float:
self.SIGMA1_MAX = SIGMA_MAX
Expand Down Expand Up @@ -62,7 +99,7 @@ def __init__(self, X, latent_dim, resolution, SIGMA_MAX, SIGMA_MIN, TAU, init='r
raise ValueError("invalid resolution: {}".format(resolution))

# 潜在空間の設定
if type(latent_dim) is int: # latent_dimがintであればどちらのモードも潜在空間の次元は同じ
if type(latent_dim) is int:
self.latent_dim1 = latent_dim
self.latent_dim2 = latent_dim

Expand All @@ -75,7 +112,6 @@ def __init__(self, X, latent_dim, resolution, SIGMA_MAX, SIGMA_MIN, TAU, init='r
self.Zeta1 = create_zeta(-1.0, 1.0, latent_dim=self.latent_dim1, resolution=resolution1, include_min_max=True)
self.Zeta2 = create_zeta(-1.0, 1.0, latent_dim=self.latent_dim2, resolution=resolution2, include_min_max=True)

# K1とK2は潜在空間の設定が終わった後がいいよね
self.K1 = self.Zeta1.shape[0]
self.K2 = self.Zeta2.shape[0]

Expand Down Expand Up @@ -121,19 +157,62 @@ def fit(self, nb_epoch=200):
H2 = np.exp(-distance2 / (2 * pow(sigma2, 2))) # かっこに気を付ける
G2 = np.sum(H2, axis=1) # Gは行ごとの和をとったベクトル
R2 = (H2.T / G2).T # 行列の計算なので.Tで転置を行う
# 1次モデル,2次モデルの決定
self.U = np.einsum('lj,ijd->ild', R2, self.X)
self.V = np.einsum('ki,ijd->kjd', R1, self.X)
self.Y = np.einsum('ki,lj,ijd->kld', R1, R2, self.X)
# 勝者決定
self.k_star1 = np.argmin(np.sum(np.square(self.U[:, None, :, :] - self.Y[None, :, :, :]), axis=(2, 3)), axis=1)
self.k_star2 = np.argmin(np.sum(np.square(self.V[:, :, None, :] - self.Y[:, None, :, :]), axis=(0, 3)), axis=1)

if self.is_missing == 1: # 欠損値有り
# 2次モデルの決定

G = np.einsum("ik,jl,ijd->kld", H1.T, H2.T, self.gamma)#K1*K2*D

self.Y = np.einsum('ik,jl,ijd,ijd->kld', H1.T, H2.T, self.gamma, self.X) / G
if self.model == "indirect": # 1次モデル型
# 1次モデル,2次モデルの決定
self.U = np.einsum('jl,ijd,ijd->ild', H2.T, self.gamma, self.X)/np.einsum('ijd,jl->ild', self.gamma, H2.T)
self.V = np.einsum('ik,ijd,ijd->kjd', H1.T, self.gamma, self.X)/np.einsum('ijd,ik->kjd', self.gamma, H1.T)
# 勝者決定
self.k_star1 = np.argmin(
np.sum(np.square(self.U[:, None, :, :] - self.Y[None, :, :, :]), axis=(2, 3)), axis=1)
self.k_star2 = np.argmin(
np.sum(np.square(self.V[:, :, None, :] - self.Y[:, None, :, :]), axis=(0, 3)), axis=1)

elif self.model == "direct": # 直接型
# 勝者決定
Dist = self.gamma[:, :, None, None, :] * np.square(
self.X[:, :, None, None, :] - self.Y[None, None, :, :, :])
self.k_star1 = np.argmin(np.einsum("jl,ijklm->ik", H2.T, Dist), axis=1)
self.k_star2 = np.argmin(np.einsum("ik,ijklm->jl", H1.T, Dist), axis=1)

else:
raise ValueError("invalid model: {}\nmodel must be None or direct".format(self.model))


else: # 欠損値無し
#2次モデルの決定
self.Y = np.einsum('ki,lj,ijd->kld', R1, R2, self.X)
if self.model == "indirect": # 1次モデル型
# 1次モデル,2次モデルの決定
self.U = np.einsum('lj,ijd->ild', R2, self.X)
self.V = np.einsum('ki,ijd->kjd', R1, self.X)

# 勝者決定
self.k_star1 = np.argmin(
np.sum(np.square(self.U[:, None, :, :] - self.Y[None, :, :, :]), axis=(2, 3)), axis=1)
self.k_star2 = np.argmin(
np.sum(np.square(self.V[:, :, None, :] - self.Y[:, None, :, :]), axis=(0, 3)), axis=1)

elif self.model == "direct": # 直接型
# 勝者決定
Dist = np.square(
self.X[:, :, None, None, :] - self.Y[None, None, :, :, :])
self.k_star1 = np.argmin(np.einsum("jl,ijklm->ik", H2.T, Dist), axis=1)
self.k_star2 = np.argmin(np.einsum("ik,ijklm->jl", H1.T, Dist), axis=1)

else:
raise ValueError("invalid model: {}\nmodel must be None or direct".format(self.model))
self.Z1 = self.Zeta1[self.k_star1, :] # k_starのZの座標N*L(L=2
self.Z2 = self.Zeta2[self.k_star2, :] # k_starのZの座標N*L(L=2

self.history['y'][epoch, :, :] = self.Y
self.history['z1'][epoch, :] = self.Z1
self.history['z2'][epoch, :] = self.Z2
self.history['sigma1'][epoch] = sigma1
self.history['sigma2'][epoch] = sigma2

self.history['sigma2'][epoch] = sigma2
Loading

0 comments on commit 2aaa356

Please sign in to comment.