Skip to content
This repository has been archived by the owner on May 30, 2022. It is now read-only.

UKRの実装@安藤 #22

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Binary file modified .DS_Store
Binary file not shown.
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"python.pythonPath": "/opt/homebrew/bin/python3"
}
Binary file added change_ver1.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test .gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added ukr/.DS_Store
Binary file not shown.
88 changes: 88 additions & 0 deletions ukr/ando/TUKRsample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import numpy as np
import tensorflow as tf
tf.keras.backend.set_floatx("float64")


def E(Z: tf.Variable, X: np.ndarray) -> tf.Variable:
Dzz = Z[:, None, :] - Z[None, :, :]
D = tf.reduce_sum(tf.square(Dzz), axis=2)
H = tf.exp(-0.5 * D)
G = tf.reduce_sum(H, axis=1, keepdims=True)
R = H / G
Y = R @ X
result = 0.5 * tf.reduce_sum((Y - X)**2)
return result


def fit(Z1: np.ndarray, Z2: np.ndarray, X: np.ndarray, index1: np.ndarray, index2: np.ndarray, n_epoch: int, eta: float):
N1, L1 = Z1.shape
N2, L2 = Z2.shape
tZ = tf.Variable(np.concatenate((Z1[index1, :], Z2[index2, :]), axis=1))
count1 = np.bincount(index1, minlength=N1)
count2 = np.bincount(index2, minlength=N2)

optimizer = tf.keras.optimizers.SGD(learning_rate=eta)
for epoch in range(n_epoch):
with tf.GradientTape() as tape:
result = E(tZ, X)
grad = tape.gradient(result, tZ)
grad1 = tf.concat([tf.math.bincount(index1, grad[:, i], minlength=N1)[:, None] for i in range(L1)], axis=1) / count1[:, None]
grad2 = tf.concat([tf.math.bincount(index2, grad[:, i+L1], minlength=N2)[:, None] for i in range(L2)], axis=1) / count2[:, None]
grad = tf.concat([tf.gather(grad1, index1), tf.gather(grad2, index2)], axis=1)
optimizer.apply_gradients([(grad, tZ)])

_, index1_inverse = np.unique(index1, return_index=True)
_, index2_inverse = np.unique(index2, return_index=True)
Z1 = tZ.numpy()[index1_inverse, :L1]
Z2 = tZ.numpy()[index2_inverse, L1:]
return Z1, Z2


if __name__ == '__main__':
import itertools
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

N1 = 20
N2 = 30
D = 3
L = 1 # This will work in more than two dimensions
seed = 100
observation_rate = 0.5
n_epoch = 100
eta = 0.1
np.random.seed(seed)

# create true latent variable
oZ1 = np.random.uniform(-1, 1, (N1, L))
oZ2 = np.random.uniform(-1, 1, (N2, L))
oZ = np.array(list(itertools.product(oZ1, oZ2))).reshape((N1, N2, -1))

# create full data
X_full = np.zeros((N1, N2, D))
X_full[:, :, 0] = oZ[:, :, 0]
X_full[:, :, 1] = oZ[:, :, L]
X_full[:, :, 2] = oZ[:, :, 0] ** 2 - oZ[:, :, L] ** 2

# create observed data
Gamma = np.random.binomial(1, observation_rate, (N1, N2))
index1, index2 = Gamma.nonzero()
X = X_full[index1, index2, :]

# init latent variable
eZ1 = np.random.normal(0.0, 0.1, (N1, L))
eZ2 = np.random.normal(0.0, 0.1, (N2, L))

# learn
eZ1, eZ2 = fit(eZ1, eZ2, X, index1, index2, n_epoch=n_epoch, eta=eta)
eZ = np.array(list(itertools.product(eZ1, eZ2)))

# plot
fig = plt.figure()
ax1 = fig.add_subplot(121, projection='3d')
# ax1.scatter(X_full[:, :, 0], X_full[:, :, 1], X_full[:, :, 2])
ax1.scatter(X[:, 0], X[:, 1], X[:, 2])
ax2 = fig.add_subplot(122, aspect='equal')
ax2.scatter(eZ[:, 0], eZ[:, L])
plt.show()

97 changes: 97 additions & 0 deletions ukr/ando/ando_TUKR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import numpy as np
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import tensorflow as tf
from utils import make_grid
tf.keras.backend.set_floatx("float64")

#目的関数の値の用意
def E(Z1: tf.Variable, Z2: tf.Variable, X: np.ndarray, sigma: float) -> tf.Variable:
#ドメイン1のデータの距離の計算
distance_N1 = Z1[:, None, :] - Z1[None, :, :]
dist_N1 = tf.reduce_sum(tf.square(distance_N1), axis=2)
#ドメイン2のデータの距離
distance_N2 = Z2[:, None, :] - Z2[None, :, :]
dist_N2 = tf.reduce_sum(tf.square(distance_N2), axis=2)
#
k_N1 = tf.exp(-1/(2*(sigma**2))*dist_N1)
k_N2 = tf.exp(-1/(2*(sigma**2))*dist_N2)
h1 = k_N1[:, :, None, None]*k_N2[None, None, :, :]
K = tf.reduce_sum(h1,axis=(1,3),keepdims=True)
h2 = k_N1[:, :, None, None, None]*k_N2[None, None, :, :, None]*X[None, :, None, :, :]
k= tf.reduce_sum(h2,axis=(1,3),keepdims=True)
Y = k/K[:, :, None]
result = 0.5 * tf.reduce_sum((Y - X)**2)
return result

#目的関数の微分
def fit(Z1: np.ndarray, Z2: np.ndarray, X: np.ndarray, T: int, eta: float) -> np.ndarray:
tZ1 = tf.Variable(Z1)
tZ2 = tf.Variable(Z2)

optimizer = tf.keras.optimizers.SGD(learning_rate=eta)
for t in range(T):
with tf.GradientTape() as tape:
result = E(tZ1, tZ2, X)
grad1 = tape.gradient(result, tZ1)#ここで微分
grad2 = tape.gradient(result, tZ2)
optimizer.apply_gradients([(grad1, tZ1),(grad2, tZ2)])
Z1 = tZ1.numpy()
Z2 = tZ2.numpy()

return Z1,Z2
#学習用の関数
#def fit(Z1: np.nadarry, Z2: np.ndarray, X: np.ndarray,):
#写像の推定の部分の準備
#誤差関数の計算
#自動微分
#潜在変数の更新




if __name__ == '__main__':
import dataTUKR

N1 = 30 #ドメイン1のデータ数
N2 = 20 #ドメイン2のデータ数
D = 3 #データ一つあたりの次元数
L = 2 #潜在空間の次元数
seed = 0
T = 100 #学習回数
eta = 0.1 #勾配法で用いるステップ幅
sigma = 0.1 #カーネル関数で使う.
np.random.seed(seed)

#人口データ
X = dataTUKR.load_kura_tsom(N1, N2, retz=False)

#描画の関数
def update(i, history, x):
#plt.cla()
ax_latent.cla()
ax_observable.cla()

fig.suptitle(f"epoch: {i}")
Z = history['Z'][i]
f = history['f'][i]

ax_latent.scatter(Z[:, 0], Z[:, 1], s=50, edgecolors="k", c=x[:, 0])

ax_latent.set_xlim(-1.1, 1.1)
ax_latent.set_ylim(-1.1, 1.1)

ax_observable.scatter(x[:, 0], x[:, 1],x[:, 2], c=x[:, 0], s=50, marker='x')
ax_observable.plot_wireframe(f[:, :, 0], f[:, :, 1],f[:, :, 2], color='black')


ax_observable.set_xlim(x[:, 0].min(), x[:, 0].max())
ax_observable.set_ylim(x[:, 1].min(), x[:, 1].max())

ani = animation.FuncAnimation(fig, update, fargs=(history, X), interval=50, frames=T)
ani.save("change_ver1.gif", writer = "pillow")
plt.show()
#HTML(ani.to_jshtml())



151,258 changes: 151,258 additions & 0 deletions ukr/ando/ando_ukr.ipynb

Large diffs are not rendered by default.

132 changes: 132 additions & 0 deletions ukr/ando/ando_ukr_training.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import numpy as np
import matplotlib.animation as animation
import matplotlib.pyplot as plt
from utils import make_grid

#seed = 0
#import scipy.spatial.distance as dist

class UKR(object):
#各変数の準備 インスタンス変数?
def __init__(self, N, D, L, eta, sigma, rambda, scale,clipping=(-1, +1)):
self.N = N
self.D = D
self.L = L
self.eta = eta
self.sigma = sigma
self.scale = scale
self.clipping = clipping
self.λ = rambda

#データの距離
def distance_function(self,Z1,Z2):
distance= np.sum((Z1[:, None, :]-Z2[None, :, :])**2, axis=2)
return distance

#写像の推定
def estimate_Y (self,X,Z1,Z2):
k = np.exp((-1/(2*(self.sigma**2)))*self.distance_function(Z1,Z2))
K = np.sum(k,axis=1,keepdims=True)
Y = (k@X)/K
return Y

#潜在変数の推定
def estimate_Z(self,Z1,Z2,X,Y):
k = np.exp((-1/(2*self.sigma**2))*self.distance_function(Z1,Z2))
K = np.sum(k,axis=1,keepdims=True)
r = k/K #N*N
d_ij = Y[:,None]-X[None,:] #N*N*D
d_nn = Y-X #N*D
delta = Z1[:,None]-Z2[None,:] #N*N*L
#誤差関数の微分 (8)式 : (2/(N*sigma**2))*C (B = A+A_T) (C = Σ[B○δ])
A = np.einsum("ni,nd,nid->ni",r,d_nn,d_ij) #N*I
B = A+A.T #N*I
C = np.einsum("ni,nil->nl",B,delta)
dE = (2/(self.N*self.sigma**2))*C
dE += 2 * self.λ * Z2
#勾配法による潜在変数の更新
Z_new = Z1-self.eta*dE
return Z_new


#学習用の関数
def fit(self, X, T, f_reso, seed):
np.random.seed(seed)
Z = np.random.normal(scale=self.scale, size=(self.N, self.L))

history = dict(
E=np.zeros((T,)),
Z = np.zeros((T, self.N, self.L)),
Y = np.zeros((T, self.N, self.D)),
f = np.zeros((T,f_reso,f_reso,self.D))
)

for t in range(T):
Y = self.estimate_Y(X,Z,Z)
Z = self.estimate_Z(Z,Z,X,Y)

history['Y'][t] = Y
history['Z'][t] = Z
history['E'][t] = np.sum((Y - X)**2) / N + self.λ * np.sum(Z**2)

A = np.linspace(Z.min(),Z.max(),f_reso)
B = np.linspace(Z.min(),Z.max(),f_reso)
XX, YY = np.meshgrid(A,B)
xx = XX.reshape(-1)
yy = YY.reshape(-1)
M = np.concatenate([xx[:, None], yy[:, None]], axis=1) #変数表でいうζkに該当する
Z_new = make_grid(f_reso,
bounds=(np.min(Z), np.max(Z)),
dim=self.L)
f = self.estimate_Y(X,M,Z)
history['f'][t] = f.reshape(f_reso,f_reso,self.D)

return history

if __name__ == '__main__':
import data
seed = 0
#from visualizer import visualize_history
N = 100
D = 3
L = 2
T = 200
eta = 2.0
sigma = 0.5
f_reso = 10

X = data.gen_saddle_shape(num_samples=N, random_seed=1, noise_scale=0.05)
ukr = UKR(N, D, L, eta, sigma, rambda=3e-3, scale=1e-2,clipping=(-1, 1))
history = ukr.fit(X, T, f_reso = f_reso, seed=seed)
fig = plt.figure(figsize=(10, 5))
ax_observable = fig.add_subplot(122, projection='3d')
ax_latent = fig.add_subplot(121)
#%matplotlib nbagg

#描画の関数
def update(i, history, x):
#plt.cla()
ax_latent.cla()
ax_observable.cla()

fig.suptitle(f"epoch: {i}")
Z = history['Z'][i]
f = history['f'][i]

ax_latent.scatter(Z[:, 0], Z[:, 1], s=50, edgecolors="k", c=x[:, 0])

ax_latent.set_xlim(-1.1, 1.1)
ax_latent.set_ylim(-1.1, 1.1)

ax_observable.scatter(x[:, 0], x[:, 1],x[:, 2], c=x[:, 0], s=50, marker='x')
ax_observable.plot_wireframe(f[:, :, 0], f[:, :, 1],f[:, :, 2], color='black')


ax_observable.set_xlim(x[:, 0].min(), x[:, 0].max())
ax_observable.set_ylim(x[:, 1].min(), x[:, 1].max())

ani = animation.FuncAnimation(fig, update, fargs=(history, X), interval=50, frames=T)
ani.save("change_ver1.gif", writer = "pillow")
plt.show()
#HTML(ani.to_jshtml())

Binary file added ukr/ando/change_ver1.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added ukr/ando/change_ver2.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion ukr/ando/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def gen_2d_sin_curve(num_samples, random_seed=None, noise_scale=0.01):
from matplotlib import pyplot as plt


X = gen_saddle_shape(200, random_seed=0, noise_scale=0.05)
X = gen_saddle_shape(1000, random_seed=0, noise_scale=0.05)
# X = gen_2d_sin_curve(100, random_seed=0, noise_scale=0.01)

_, D = X.shape
Expand Down
46 changes: 46 additions & 0 deletions ukr/ando/dataTUKR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import numpy as np
import random
import itertools

def load_kura_tsom(xsamples, ysamples, missing_rate=None,retz=False):
z1 = np.linspace(-1, 1, xsamples)
z2 = np.linspace(-1, 1, ysamples)

z1_repeated, z2_repeated = np.meshgrid(z1, z2, indexing='ij')
x1 = z1_repeated
x2 = z2_repeated
x3 = (z1_repeated ** 2.0 - z2_repeated ** 2.0)+np.random.normal(loc = 0.0, scale = 0.1, size=(xsamples,ysamples))
#ノイズを加えたい時はここをいじる,locがガウス分布の平均、scaleが分散,size何個ノイズを作るか
#このノイズを加えることによって三次元空間のデータ点は上下に動く

x = np.concatenate((x1[:, :, np.newaxis], x2[:, :, np.newaxis], x3[:, :, np.newaxis]), axis=2)
truez = np.concatenate((z1_repeated[:, :, np.newaxis], z2_repeated[:, :, np.newaxis]), axis=2)

#欠損値を入れない場合(missing_rateが0か特に指定していない場合はそのまま返す)
if missing_rate == 0 or missing_rate == None:
if retz:
return x, truez
else:
return x

if __name__ == '__main__':
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

xsamples = 20 #ドメイン1のデータ数
ysamples = 30 #ドメイン2のデータ数

#欠損なしver
x, truez = load_kura_tsom(xsamples, ysamples, retz=True) #retzは真の潜在変数を返す,Xは観測データ,truezは真の潜在変数
#Xは左側の図の作成に必要、truez右側の図の作成に必要(実行結果の図より)
# 欠損ありver
#x, truez, Gamma = load_kura_tsom(xsamples, ysamples, retz=True,missing_rate=0.7)

fig = plt.figure(figsize=[10, 5])
ax_x = fig.add_subplot(1, 2, 1, projection='3d')
ax_truez = fig.add_subplot(1, 2, 2)
ax_x.scatter(x[:, :, 0].flatten(), x[:, :, 1].flatten(), x[:, :, 2].flatten(), c=x[:, :, 0].flatten())
ax_truez.scatter(truez[:, :, 0].flatten(), truez[:, :, 1].flatten(), c=x[:, :, 0].flatten())
ax_x.set_title('Generated three-dimensional data')
ax_truez.set_title('True two-dimensional latent variable')
plt.show()
Loading