furukawa-laboratory · nahoando · Jun 16, 2021 · Jun 16, 2021 · Jun 16, 2021 · Jul 7, 2021
diff --git a/.DS_Store b/.DS_Store
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.pythonPath": "/opt/homebrew/bin/python3"
+}
diff --git a/change_ver1.gif b/change_ver1.gif
diff --git a/test .gif b/test .gif
diff --git a/ukr/.DS_Store b/ukr/.DS_Store
diff --git a/ukr/ando/TUKRsample.py b/ukr/ando/TUKRsample.py
@@ -0,0 +1,88 @@
+import numpy as np
+import tensorflow as tf
+tf.keras.backend.set_floatx("float64")
+
+
+def E(Z: tf.Variable, X: np.ndarray) -> tf.Variable:
+    Dzz = Z[:, None, :] - Z[None, :, :]
+    D = tf.reduce_sum(tf.square(Dzz), axis=2)
+    H = tf.exp(-0.5 * D)
+    G = tf.reduce_sum(H, axis=1, keepdims=True)
+    R = H / G
+    Y = R @ X
+    result = 0.5 * tf.reduce_sum((Y - X)**2)
+    return result
+
+
+def fit(Z1: np.ndarray, Z2: np.ndarray, X: np.ndarray, index1: np.ndarray, index2: np.ndarray, n_epoch: int, eta: float):
+    N1, L1 = Z1.shape
+    N2, L2 = Z2.shape
+    tZ = tf.Variable(np.concatenate((Z1[index1, :], Z2[index2, :]), axis=1))
+    count1 = np.bincount(index1, minlength=N1)
+    count2 = np.bincount(index2, minlength=N2)
+
+    optimizer = tf.keras.optimizers.SGD(learning_rate=eta)
+    for epoch in range(n_epoch):
+        with tf.GradientTape() as tape:
+            result = E(tZ, X)
+        grad = tape.gradient(result, tZ)
+        grad1 = tf.concat([tf.math.bincount(index1, grad[:, i], minlength=N1)[:, None] for i in range(L1)], axis=1) / count1[:, None]
+        grad2 = tf.concat([tf.math.bincount(index2, grad[:, i+L1], minlength=N2)[:, None] for i in range(L2)], axis=1) / count2[:, None]
+        grad = tf.concat([tf.gather(grad1, index1), tf.gather(grad2, index2)], axis=1)
+        optimizer.apply_gradients([(grad, tZ)])
+
+    _, index1_inverse = np.unique(index1, return_index=True)
+    _, index2_inverse = np.unique(index2, return_index=True)
+    Z1 = tZ.numpy()[index1_inverse, :L1]
+    Z2 = tZ.numpy()[index2_inverse, L1:]
+    return Z1, Z2
+
+
+if __name__ == '__main__':
+    import itertools
+    import matplotlib.pyplot as plt
+    from mpl_toolkits.mplot3d import Axes3D
+
+    N1 = 20
+    N2 = 30
+    D = 3
+    L = 1  # This will work in more than two dimensions
+    seed = 100
+    observation_rate = 0.5
+    n_epoch = 100
+    eta = 0.1
+    np.random.seed(seed)
+
+    # create true latent variable
+    oZ1 = np.random.uniform(-1, 1, (N1, L))
+    oZ2 = np.random.uniform(-1, 1, (N2, L))
+    oZ = np.array(list(itertools.product(oZ1, oZ2))).reshape((N1, N2, -1))
+
+    # create full data
+    X_full = np.zeros((N1, N2, D))
+    X_full[:, :, 0] = oZ[:, :, 0]
+    X_full[:, :, 1] = oZ[:, :, L]
+    X_full[:, :, 2] = oZ[:, :, 0] ** 2 - oZ[:, :, L] ** 2
+
+    # create observed data
+    Gamma = np.random.binomial(1, observation_rate, (N1, N2))
+    index1, index2 = Gamma.nonzero()
+    X = X_full[index1, index2, :]
+
+    # init latent variable
+    eZ1 = np.random.normal(0.0, 0.1, (N1, L))
+    eZ2 = np.random.normal(0.0, 0.1, (N2, L))
+
+    # learn
+    eZ1, eZ2 = fit(eZ1, eZ2, X, index1, index2, n_epoch=n_epoch, eta=eta)
+    eZ = np.array(list(itertools.product(eZ1, eZ2)))
+
+    # plot
+    fig = plt.figure()
+    ax1 = fig.add_subplot(121, projection='3d')
+    # ax1.scatter(X_full[:, :, 0], X_full[:, :, 1], X_full[:, :, 2])
+    ax1.scatter(X[:, 0], X[:, 1], X[:, 2])
+    ax2 = fig.add_subplot(122, aspect='equal')
+    ax2.scatter(eZ[:, 0], eZ[:, L])
+    plt.show()
+
diff --git a/ukr/ando/ando_TUKR.py b/ukr/ando/ando_TUKR.py
@@ -0,0 +1,97 @@
+import numpy as np
+import matplotlib.animation as animation
+import matplotlib.pyplot as plt
+import tensorflow as tf
+from utils import make_grid
+tf.keras.backend.set_floatx("float64")
+
+#目的関数の値の用意
+def E(Z1: tf.Variable, Z2: tf.Variable, X: np.ndarray, sigma: float) -> tf.Variable:
+    #ドメイン１のデータの距離の計算
+    distance_N1 = Z1[:, None, :] - Z1[None, :, :]
+    dist_N1 = tf.reduce_sum(tf.square(distance_N1), axis=2) 
+    #ドメイン２のデータの距離
+    distance_N2 = Z2[:, None, :] - Z2[None, :, :]
+    dist_N2 = tf.reduce_sum(tf.square(distance_N2), axis=2) 
+    #
+    k_N1 = tf.exp(-1/(2*(sigma**2))*dist_N1)
+    k_N2 = tf.exp(-1/(2*(sigma**2))*dist_N2)
+    h1 = k_N1[:, :, None, None]*k_N2[None, None, :, :]
+    K = tf.reduce_sum(h1,axis=(1,3),keepdims=True)
+    h2 = k_N1[:, :, None, None, None]*k_N2[None, None, :, :, None]*X[None, :, None, :, :]
+    k= tf.reduce_sum(h2,axis=(1,3),keepdims=True)
+    Y = k/K[:, :, None]
+    result = 0.5 * tf.reduce_sum((Y - X)**2)
+    return result
+
+#目的関数の微分
+def fit(Z1: np.ndarray, Z2: np.ndarray, X: np.ndarray, T: int, eta: float) -> np.ndarray:
+    tZ1 = tf.Variable(Z1)
+    tZ2 = tf.Variable(Z2)
+
+    optimizer = tf.keras.optimizers.SGD(learning_rate=eta)
+    for t in range(T):
+        with tf.GradientTape() as tape:
+            result = E(tZ1, tZ2, X)
+        grad1 = tape.gradient(result, tZ1)#ここで微分
+        grad2 = tape.gradient(result, tZ2)
+        optimizer.apply_gradients([(grad1, tZ1),(grad2, tZ2)])
+    Z1 = tZ1.numpy()
+    Z2 = tZ2.numpy()
+
+    return Z1,Z2
+#学習用の関数
+#def fit(Z1: np.nadarry, Z2: np.ndarray, X: np.ndarray,):
+    #写像の推定の部分の準備
+    #誤差関数の計算
+    #自動微分
+    #潜在変数の更新
+
+
+
+
+if __name__ == '__main__':
+    import dataTUKR
+
+    N1 = 30 #ドメイン１のデータ数
+    N2 = 20 #ドメイン２のデータ数
+    D = 3 #データ一つあたりの次元数
+    L = 2 #潜在空間の次元数
+    seed = 0
+    T = 100 #学習回数
+    eta = 0.1 #勾配法で用いるステップ幅
+    sigma = 0.1 #カーネル関数で使う.
+    np.random.seed(seed)
+
+    #人口データ
+    X = dataTUKR.load_kura_tsom(N1, N2, retz=False)
+
+        #描画の関数
+    def update(i, history, x):
+        #plt.cla()
+        ax_latent.cla()
+        ax_observable.cla()
+
+        fig.suptitle(f"epoch: {i}")
+        Z = history['Z'][i]
+        f = history['f'][i]
+
+        ax_latent.scatter(Z[:, 0], Z[:, 1], s=50, edgecolors="k", c=x[:, 0])
+
+        ax_latent.set_xlim(-1.1, 1.1)
+        ax_latent.set_ylim(-1.1, 1.1)
+
+        ax_observable.scatter(x[:, 0], x[:, 1],x[:, 2], c=x[:, 0], s=50, marker='x')
+        ax_observable.plot_wireframe(f[:, :, 0], f[:, :, 1],f[:, :, 2], color='black')
+
+
+        ax_observable.set_xlim(x[:, 0].min(), x[:, 0].max())
+        ax_observable.set_ylim(x[:, 1].min(), x[:, 1].max())
+
+    ani = animation.FuncAnimation(fig, update, fargs=(history, X), interval=50, frames=T)
+    ani.save("change_ver1.gif", writer = "pillow")
+    plt.show()
+    #HTML(ani.to_jshtml())
+
+
+
diff --git a/ukr/ando/ando_ukr.ipynb b/ukr/ando/ando_ukr.ipynb
diff --git a/ukr/ando/ando_ukr_training.py b/ukr/ando/ando_ukr_training.py
@@ -0,0 +1,132 @@
+import numpy as np
+import matplotlib.animation as animation
+import matplotlib.pyplot as plt
+from utils import make_grid
+
+#seed = 0
+#import scipy.spatial.distance as dist
+
+class UKR(object):
+    #各変数の準備　インスタンス変数？
+    def __init__(self, N, D, L, eta, sigma, rambda, scale,clipping=(-1, +1)):
+        self.N = N
+        self.D = D
+        self.L = L
+        self.eta = eta
+        self.sigma = sigma
+        self.scale = scale
+        self.clipping = clipping
+        self.λ = rambda
+
+    #データの距離
+    def distance_function(self,Z1,Z2):
+        distance= np.sum((Z1[:, None, :]-Z2[None, :, :])**2, axis=2)
+        return distance
+
+    #写像の推定
+    def estimate_Y (self,X,Z1,Z2):
+        k = np.exp((-1/(2*(self.sigma**2)))*self.distance_function(Z1,Z2))
+        K = np.sum(k,axis=1,keepdims=True)
+        Y = (k@X)/K
+        return Y
+
+    #潜在変数の推定
+    def estimate_Z(self,Z1,Z2,X,Y):
+        k = np.exp((-1/(2*self.sigma**2))*self.distance_function(Z1,Z2))
+        K = np.sum(k,axis=1,keepdims=True)
+        r = k/K #N*N
+        d_ij = Y[:,None]-X[None,:] #N*N*D
+        d_nn = Y-X #N*D
+        delta = Z1[:,None]-Z2[None,:] #N*N*L
+        #誤差関数の微分 (8)式 : (2/(N*sigma**2))*C (B = A+A_T) (C = Σ[B○δ])
+        A = np.einsum("ni,nd,nid->ni",r,d_nn,d_ij) #N*I
+        B = A+A.T #N*I
+        C = np.einsum("ni,nil->nl",B,delta)
+        dE = (2/(self.N*self.sigma**2))*C
+        dE += 2 * self.λ * Z2
+        #勾配法による潜在変数の更新
+        Z_new = Z1-self.eta*dE
+        return Z_new
+
+
+    #学習用の関数
+    def fit(self, X, T, f_reso, seed):
+        np.random.seed(seed)
+        Z = np.random.normal(scale=self.scale, size=(self.N, self.L))
+
+        history = dict(
+            E=np.zeros((T,)),
+            Z = np.zeros((T, self.N, self.L)),
+            Y = np.zeros((T, self.N, self.D)),
+            f = np.zeros((T,f_reso,f_reso,self.D))
+        )
+
+        for t in range(T):
+            Y = self.estimate_Y(X,Z,Z)
+            Z = self.estimate_Z(Z,Z,X,Y)
+
+            history['Y'][t] = Y
+            history['Z'][t] = Z
+            history['E'][t] = np.sum((Y - X)**2) / N + self.λ * np.sum(Z**2)
+
+            A = np.linspace(Z.min(),Z.max(),f_reso)
+            B = np.linspace(Z.min(),Z.max(),f_reso)
+            XX, YY = np.meshgrid(A,B)
+            xx = XX.reshape(-1)
+            yy = YY.reshape(-1)
+            M = np.concatenate([xx[:, None], yy[:, None]], axis=1) #変数表でいうζkに該当する
+            Z_new = make_grid(f_reso,
+                              bounds=(np.min(Z), np.max(Z)),
+                              dim=self.L)
+            f = self.estimate_Y(X,M,Z)
+            history['f'][t] = f.reshape(f_reso,f_reso,self.D)
+
+        return history
+
+if __name__ == '__main__':
+    import data
+    seed = 0
+    #from visualizer import visualize_history
+    N = 100
+    D = 3
+    L = 2
+    T = 200
+    eta = 2.0
+    sigma = 0.5
+    f_reso = 10
+
+    X = data.gen_saddle_shape(num_samples=N, random_seed=1, noise_scale=0.05)
+    ukr = UKR(N, D, L, eta, sigma, rambda=3e-3, scale=1e-2,clipping=(-1, 1))
+    history = ukr.fit(X, T, f_reso = f_reso, seed=seed)
+    fig = plt.figure(figsize=(10, 5))
+    ax_observable = fig.add_subplot(122, projection='3d')
+    ax_latent = fig.add_subplot(121)
+    #%matplotlib nbagg
+
+    #描画の関数
+    def update(i, history, x):
+        #plt.cla()
+        ax_latent.cla()
+        ax_observable.cla()
+
+        fig.suptitle(f"epoch: {i}")
+        Z = history['Z'][i]
+        f = history['f'][i]
+
+        ax_latent.scatter(Z[:, 0], Z[:, 1], s=50, edgecolors="k", c=x[:, 0])
+
+        ax_latent.set_xlim(-1.1, 1.1)
+        ax_latent.set_ylim(-1.1, 1.1)
+
+        ax_observable.scatter(x[:, 0], x[:, 1],x[:, 2], c=x[:, 0], s=50, marker='x')
+        ax_observable.plot_wireframe(f[:, :, 0], f[:, :, 1],f[:, :, 2], color='black')
+
+
+        ax_observable.set_xlim(x[:, 0].min(), x[:, 0].max())
+        ax_observable.set_ylim(x[:, 1].min(), x[:, 1].max())
+
+    ani = animation.FuncAnimation(fig, update, fargs=(history, X), interval=50, frames=T)
+    ani.save("change_ver1.gif", writer = "pillow")
+    plt.show()
+    #HTML(ani.to_jshtml())
+
diff --git a/ukr/ando/change_ver1.gif b/ukr/ando/change_ver1.gif
diff --git a/ukr/ando/change_ver2.gif b/ukr/ando/change_ver2.gif
diff --git a/ukr/ando/data.py b/ukr/ando/data.py
@@ -32,7 +32,7 @@ def gen_2d_sin_curve(num_samples, random_seed=None, noise_scale=0.01):
     from matplotlib import pyplot as plt
 
 
-    X = gen_saddle_shape(200, random_seed=0, noise_scale=0.05)
+    X = gen_saddle_shape(1000, random_seed=0, noise_scale=0.05)
     # X = gen_2d_sin_curve(100, random_seed=0, noise_scale=0.01)
 
     _, D = X.shape

diff --git a/ukr/ando/dataTUKR.py b/ukr/ando/dataTUKR.py
@@ -0,0 +1,46 @@
+import numpy as np
+import random
+import itertools
+
+def load_kura_tsom(xsamples, ysamples, missing_rate=None,retz=False):
+    z1 = np.linspace(-1, 1, xsamples)
+    z2 = np.linspace(-1, 1, ysamples)
+
+    z1_repeated, z2_repeated = np.meshgrid(z1, z2, indexing='ij')
+    x1 = z1_repeated
+    x2 = z2_repeated
+    x3 = (z1_repeated ** 2.0 - z2_repeated ** 2.0)+np.random.normal(loc = 0.0, scale = 0.1, size=(xsamples,ysamples)) 
+    #ノイズを加えたい時はここをいじる,locがガウス分布の平均、scaleが分散,size何個ノイズを作るか
+    #このノイズを加えることによって三次元空間のデータ点は上下に動く
+
+    x = np.concatenate((x1[:, :, np.newaxis], x2[:, :, np.newaxis], x3[:, :, np.newaxis]), axis=2)
+    truez = np.concatenate((z1_repeated[:, :, np.newaxis], z2_repeated[:, :, np.newaxis]), axis=2)
+
+    #欠損値を入れない場合(missing_rateが0か特に指定していない場合はそのまま返す)
+    if missing_rate == 0 or missing_rate == None:
+        if retz:
+            return x, truez
+        else:
+            return x
+
+if __name__ == '__main__':
+    import matplotlib.pyplot as plt
+    from mpl_toolkits.mplot3d import Axes3D
+
+    xsamples = 20 #ドメイン１のデータ数
+    ysamples = 30 #ドメイン２のデータ数
+
+    #欠損なしver
+    x, truez = load_kura_tsom(xsamples, ysamples, retz=True) #retzは真の潜在変数を返す,Xは観測データ,truezは真の潜在変数
+    #Xは左側の図の作成に必要、truez右側の図の作成に必要（実行結果の図より）
+    # 欠損ありver
+    #x, truez, Gamma = load_kura_tsom(xsamples, ysamples, retz=True,missing_rate=0.7)
+
+    fig = plt.figure(figsize=[10, 5])
+    ax_x = fig.add_subplot(1, 2, 1, projection='3d')
+    ax_truez = fig.add_subplot(1, 2, 2)
+    ax_x.scatter(x[:, :, 0].flatten(), x[:, :, 1].flatten(), x[:, :, 2].flatten(), c=x[:, :, 0].flatten())
+    ax_truez.scatter(truez[:, :, 0].flatten(), truez[:, :, 1].flatten(), c=x[:, :, 0].flatten())
+    ax_x.set_title('Generated three-dimensional data')
+    ax_truez.set_title('True two-dimensional latent variable')
+    plt.show()