add Cauchy kernel

ttgump · Jan 3, 2023 · 20a9834 · 20a9834
1 parent bc4e69a
commit 20a9834
Show file tree

Hide file tree

Showing 5 changed files with 127 additions and 8 deletions.
diff --git a/src/scDHMap/run_scDHMap.py b/src/scDHMap/run_scDHMap.py
@@ -40,6 +40,8 @@
                         help='coefficient of the t-SNE loss')
     parser.add_argument('--beta', default=10., type=float,
                         help='coefficient of the KLD loss')
+    parser.add_argument('--gamma', default=1., type=float,
+                        help='GAMMA coefficient of the Cauchy kernel')
     parser.add_argument('--prob', default=0., type=float,
                         help='dropout probability')
     parser.add_argument('--perplexity', nargs="+", default=[30.], type=float)
@@ -92,7 +94,7 @@
     # encoderLayer and decoderLayer set the hidden layer sizes in encoder and decoder
     # z_dim sets the dimension of the latent embedding
     model = scDHMap(input_dim=adata.n_vars, encodeLayer=[128, 64, 32, 16], decodeLayer=[16, 32, 64, 128], 
-            batch_size=args.batch_size, activation="elu", z_dim=2, alpha=args.alpha, beta=args.beta, 
+            batch_size=args.batch_size, activation="elu", z_dim=2, alpha=args.alpha, beta=args.beta, gamma=args.gamma, 
             perplexity=args.perplexity, prob=args.prob, likelihood_type=args.likelihood_type, device=args.device).to(args.device)
 
     print(str(model))

diff --git a/src/scDHMap/scDHMap.py b/src/scDHMap/scDHMap.py
@@ -86,7 +86,7 @@ def save_checkpoint(self, loss, model):
 
 class scDHMap(nn.Module):
     def __init__(self, input_dim, encodeLayer=[], decodeLayer=[], batch_size=512,
-            activation="relu", z_dim=2, alpha=1., beta=1., perplexity=[30.], 
+            activation="relu", z_dim=2, alpha=1., beta=1., gamma=1., perplexity=[30.], 
             prob=0., likelihood_type="zinb", device="cuda"):
         super(scDHMap, self).__init__()
         self.input_dim = input_dim
@@ -95,6 +95,7 @@ def __init__(self, input_dim, encodeLayer=[], decodeLayer=[], batch_size=512,
         self.batch_size = batch_size
         self.alpha = alpha
         self.beta = beta
+        self.gamma = gamma          # If gamma = 1, the Cauchy kernel will reduce to the Student's t-kernel
         self.perplexity = perplexity
         self.prob = prob
         self.likelihood_type = likelihood_type
@@ -148,8 +149,10 @@ def tsne_repel(self, z, p):
         n = z.size()[0]
 
         ### pairwise distances
-        num = lorentz_distance_mat(z, z)**2
-        num = torch.pow(1.0 + num, -1)
+#        num = lorentz_distance_mat(z, z)**2
+#        num = torch.pow(1.0 + num, -1)
+        num = (lorentz_distance_mat(z, z)/self.gamma)**2
+        num = 1/self.gamma/(1.0 + num)
         p = p / torch.unsqueeze(torch.sum(p, dim=1), 1)
 
         attraction = p * torch.log(num)

diff --git a/src/scDHMap_batch/run_scDHMap_batch.py b/src/scDHMap_batch/run_scDHMap_batch.py
@@ -41,6 +41,8 @@
                         help='coefficient of the t-SNE loss')
     parser.add_argument('--beta', default=10., type=float,
                         help='coefficient of the KLD loss')
+    parser.add_argument('--gamma', default=1., type=float,
+                        help='GAMMA coefficient of the Cauchy kernel')
     parser.add_argument('--prob', default=0., type=float,
                         help='dropout probability')
     parser.add_argument('--perplexity', nargs="+", default=[30.], type=float)
@@ -104,7 +106,7 @@
 
     # Build the model
     model = scDHMap(input_dim=adata.n_vars, n_batch=n_batch, encodeLayer=[128, 64, 32, 16], decodeLayer=[16, 32, 64, 128], 
-            batch_size=args.batch_size, activation="elu", z_dim=2, alpha=args.alpha, beta=args.beta, 
+            batch_size=args.batch_size, activation="elu", z_dim=2, alpha=args.alpha, beta=args.beta, gamma=args.gamma,
             perplexity=args.perplexity, prob=args.prob, device=args.device).to(args.device)
 
     print(str(model))

diff --git a/src/scDHMap_batch/scDHMap_batch.py b/src/scDHMap_batch/scDHMap_batch.py
@@ -86,7 +86,7 @@ def save_checkpoint(self, loss, model):
 
 class scDHMap(nn.Module):
     def __init__(self, input_dim, n_batch, encodeLayer=[], decodeLayer=[], batch_size=512,
-            activation="relu", z_dim=2, alpha=1., beta=1., perplexity=[30.], 
+            activation="relu", z_dim=2, alpha=1., beta=1., gamma=1., perplexity=[30.], 
             prob=0., device="cuda"):
         super(scDHMap, self).__init__()
         self.input_dim = input_dim
@@ -96,6 +96,7 @@ def __init__(self, input_dim, n_batch, encodeLayer=[], decodeLayer=[], batch_siz
         self.batch_size = batch_size
         self.alpha = alpha
         self.beta = beta
+        self.gamma = gamma          # If gamma = 1, the Cauchy kernel will reduce to the Student's t-kernel
         self.perplexity = perplexity
         self.prob = prob
         self.encoder = buildNetwork([input_dim+n_batch]+encodeLayer, type="encode", activation=activation, prob=prob)
@@ -147,8 +148,10 @@ def tsne_repel(self, z, p):
         n = z.size()[0]
 
         ### pairwise distances
-        num = lorentz_distance_mat(z, z)**2
-        num = torch.pow(1.0 + num, -1)
+#        num = lorentz_distance_mat(z, z)**2
+#        num = torch.pow(1.0 + num, -1)
+        num = (lorentz_distance_mat(z, z)/self.gamma)**2
+        num = 1/self.gamma/(1.0 + num)
         p = p / torch.unsqueeze(torch.sum(p, dim=1), 1)
 
         attraction = p * torch.log(num)

diff --git a/src/utilities/Poincare_kmeans.py b/src/utilities/Poincare_kmeans.py
@@ -0,0 +1,109 @@
+from random import sample
+import numpy as np
+
+## K-Means in the Poincare Disk model
+
+class PoincareKMeans(object):
+    def __init__(self,n_clusters=8,n_init=100,max_iter=300,tol=1e-8,verbose=True):
+        self.n_clusters = n_clusters
+        self.n_init = n_init
+        self.max_iter = max_iter
+        self.tol = tol
+        self.verbose =  verbose
+        self.labels_=None
+        self.cluster_centers_ = None
+
+
+    def fit(self,X):
+        n_samples = X.shape[0]
+        self.inertia = None
+
+        for run_it in range(self.n_init):
+            centroids = X[sample(range(n_samples),self.n_clusters),:]
+            for it in range(self.max_iter):
+                distances = self._get_distances_to_clusters(X,centroids)
+                labels = np.argmin(distances,axis=1)
+
+                new_centroids = np.zeros((self.n_clusters,2))
+                for i in range(self.n_clusters):
+                    indices = np.where(labels==i)[0]
+                    if len(indices)>0:
+                        new_centroids[i,:] = self._hyperbolic_centroid(X[indices,:])
+                    else:
+                        new_centroids[i,:] = X[sample(range(n_samples),1),:]
+                m = np.ravel(centroids-new_centroids, order='K')
+                diff = np.dot(m,m)
+                centroids = new_centroids.copy()
+                if(diff<self.tol):
+                    break
+
+            distances = self._get_distances_to_clusters(X,centroids)
+            labels = np.argmin(distances,axis=1)
+            inertia = np.sum([np.sum(distances[np.where(labels==i)[0],i]**2) for i in range(self.n_clusters)])
+            if (self.inertia == None) or (inertia<self.inertia):
+                self.inertia = inertia
+                self.labels_ = labels.copy()
+                self.cluster_centers_ = centroids.copy()
+
+            if self.verbose:
+                print("Iteration: {} - Best Inertia: {}".format(run_it,self.inertia))
+
+    def fit_predict(self,X):
+        self.fit(X)
+        return self.labels_
+
+    def fit_transform(self,X):
+        self.fit(X)
+        return self.transform(X)
+
+    def predict(self,X):
+        distances = self.transform(X)
+        return np.argmin(distances,axis=1)
+
+    def transform(self,X):
+        return _get_distances_to_clusters(X,self.cluster_centers_)
+
+    def _get_distances_to_clusters(self,X,clusters):
+        n_samples,n_clusters = X.shape[0],clusters.shape[0]
+
+        distances = np.zeros((n_samples,n_clusters))
+        for i in range(n_clusters):
+            centroid = np.tile(clusters[i,:],(n_samples,1))
+            #den1 = 1-np.linalg.norm(X,axis=1)**2
+            a1 = np.linalg.norm(X,axis=1)
+            a1 = np.clip(a1,a_min = 0, a_max = 1-1e-7)
+            den1 = 1-a1**2
+            #den2 = 1-np.linalg.norm(centroid,axis=1)**2
+            a2 = np.linalg.norm(centroid,axis=1)
+            a2 = np.clip(a2,a_min = 0, a_max = 1-1e-7)
+            den2 = 1-a2**2
+            #the_num = np.linalg.norm(X-centroid,axis=1)**2
+            a3 = np.linalg.norm(X-centroid,axis=1)
+            a3 = np.clip(a3,a_min = 0, a_max = 1-1e-7)
+            the_num = a3**2
+            #pdb.set_trace()
+            #print(1+2*the_num/(den1*den2))
+            distances[:,i] = np.arccosh(1+2*the_num/(den1*den2))
+
+        return distances
+
+    def _poinc_to_minsk(self,points):
+        minsk_points = np.zeros((points.shape[0],3))
+        minsk_points[:,0] = np.apply_along_axis(arr=points,axis=1,func1d=lambda v: 2*v[0]/(1-v[0]**2-v[1]**2))
+        minsk_points[:,1] = np.apply_along_axis(arr=points,axis=1,func1d=lambda v: 2*v[1]/(1-v[0]**2-v[1]**2))
+        minsk_points[:,2] = np.apply_along_axis(arr=points,axis=1,func1d=lambda v: (1+v[0]**2+v[1]**2)/(1-v[0]**2-v[1]**2))
+        return minsk_points
+
+    def _minsk_to_poinc(self,points):
+        poinc_points = np.zeros((points.shape[0],2))
+        poinc_points[:,0] = points[:,0]/(1+points[:,2])
+        poinc_points[:,1] = points[:,1]/(1+points[:,2])
+        return poinc_points
+
+    def _hyperbolic_centroid(self,points):
+        minsk_points = self._poinc_to_minsk(points)
+        minsk_centroid = np.mean(minsk_points,axis=0)
+        normalizer = np.sqrt(np.abs(minsk_centroid[0]**2+minsk_centroid[1]**2-minsk_centroid[2]**2))
+        #minsk_centroid = minsk_centroid/normalizer
+        minsk_centroid = minsk_centroid/np.clip(normalizer,a_min = 1e-7, a_max = np.inf)
+        return self._minsk_to_poinc(minsk_centroid.reshape((1,3)))[0]