Merge pull request #26 from KrishnaswamyLab/dev

Dev
KrishnaswamyLab · Sep 16, 2019 · 12ecd0d · 12ecd0d
2 parents 6b7bb57 + 92f4fbb
commit 12ecd0d
Show file tree

Hide file tree

Showing 8 changed files with 135 additions and 73 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -9,7 +9,8 @@ cache:
 - pip
 - apt
 install:
-- python setup.py install
+- pip install -r requirements.txt
+- pip install .
 script:
 - pip install .[test]
 - python setup.py test

diff --git a/doc/source/requirements.txt b/doc/source/requirements.txt
@@ -1,5 +1,6 @@
 numpy>=1.14.0
 scipy>=1.1.0
+pandas<0.24
 future
 graphtools>=0.1.8.1
 sphinx<=1.8.5

diff --git a/meld/__init__.py b/meld/__init__.py
@@ -2,3 +2,4 @@
 
 from .meld import MELD
 from .cluster import VertexFrequencyCluster
+from .version import __version__
diff --git a/meld/cluster.py b/meld/cluster.py
@@ -8,8 +8,9 @@
 from sklearn import preprocessing, decomposition
 import warnings
 
-from . import utils
+import scprep
 
+from . import utils
 
 class VertexFrequencyCluster(BaseEstimator):
     """Performs Vertex Frequency clustering for data given a
@@ -45,7 +46,6 @@ class VertexFrequencyCluster(BaseEstimator):
 
     def __init__(self, n_clusters=10, window_count=9, window_sizes=None,
                  sparse=False, suppress=False, random_state=None, **kwargs):
-
         self.suppress = suppress
         self.sparse = sparse
         self._basewindow = None
@@ -61,9 +61,10 @@ def __init__(self, n_clusters=10, window_count=9, window_sizes=None,
         self.N = None
         self.spec_hist = None
         self.spectrogram = None
+        self.combined_spectrogram_ees = None
         self.isfit = False
-        self.ees = None
-        self.res = None
+        self.EES = None
+        self.RES = None
         self._sklearn_params = kwargs
         self._clusterobj = KMeans(n_clusters=n_clusters,
                                   random_state=random_state, **kwargs)
@@ -108,15 +109,42 @@ def _compute_spectrogram(self, RES, window):
         """
         if len(RES.shape) == 1:
             RES = RES[:, None]
-        self.RES = RES
         if sparse.issparse(window):
             # the next computation becomes dense - better to make dense now
-            C = window.multiply(self.RES).toarray()
+            C = window.multiply(RES).toarray()
         else:
-            C = np.multiply(window, self.RES)
+            C = np.multiply(window, RES)
         C = preprocessing.normalize(self.eigenvectors.T @ C, axis=0)
         return C.T
 
+
+    def _compute_multiresolution_spectrogram(self, RES):
+        ''' Compute multiresolution spectrogram by repeatedly calling
+            _compute_spectrogram '''
+
+        #spectrogram = np.zeros((self.windows[0].shape[1],
+        #                        self.eigenvectors.shape[1]))
+
+        #for window in self.windows:
+        #    curr_spectrogram = self._compute_spectrogram(
+        #        RES, window)
+        #    curr_spectrogram = self._activate(curr_spectrogram)
+        #    spectrogram += curr_spectrogram
+
+        spectrogram = np.zeros((self.windows[0].shape[1],
+                                     self.eigenvectors.shape[1]))
+
+        for window in self.windows:
+            curr_spectrogram = self._compute_spectrogram(
+                RES=RES, window=window)
+            curr_spectrogram = self._activate(curr_spectrogram)
+            spectrogram += curr_spectrogram
+
+
+
+        return spectrogram
+
+
     def _compute_window(self, window, t=1):
         """_compute_window
         apply operation to window function
@@ -144,15 +172,15 @@ def _compute_window(self, window, t=1):
             window = np.linalg.matrix_power(window, t)
         return preprocessing.normalize(window, 'l2', axis=0).T
 
-    def _concat_EES_to_spectrogram(self, weight):
-        '''Concatenates the EES to the spectrogram for clustering'''
+    def _combine_spectrogram_EES(self, spectrogram, EES):
+        ''' Normalizes and concatenates the EES to the
+            spectrogram for clustering'''
+
+        spectrogram_n = spectrogram / np.linalg.norm(spectrogram)
 
-        data = decomposition.PCA(25).fit_transform(self.spectrogram)
+        ees_n = EES / np.linalg.norm(EES, ord=2, axis=0)
 
-        range_dim = np.max(np.max(data, axis=0) - np.min(data, axis=0))
-        range_meld = np.max(self.EES) - np.min(self.EES)
-        scale = (range_dim / range_meld) * weight
-        data_nu = np.c_[data, (self.EES * scale)]
+        data_nu = np.c_[spectrogram_n, ees_n]
         return data_nu
 
     def fit(self, G):
@@ -179,58 +207,55 @@ def fit(self, G):
         self.isfit = True
         return self
 
-    def transform(self, RES, EES=None, weight=1, center=True):
+    def transform(self, RES, EES=None, center=True):
         '''Calculates the spectrogram of the graph using the RES'''
         self.RES = RES
         self.EES = EES
         if not self.isfit:
             raise ValueError(
                 'Estimator must be `fit` before running `transform`.')
 
-        else:
-            if not isinstance(self.RES, (list, tuple, np.ndarray, pd.Series)):
-                raise TypeError('`RES` must be array-like.')
+        if not isinstance(self.RES, (list, tuple, np.ndarray, pd.Series, pd.DataFrame)):
+            raise TypeError('`RES` must be array-like.')
 
-            if EES is not None and not isinstance(self.EES, (list, tuple, np.ndarray, pd.Series)):
-                raise TypeError('`EES` must be array-like.')
-            if EES is not None:
-                self.EES = np.array(self.EES)
+        if EES is not None and not isinstance(self.EES, (list, tuple, np.ndarray, pd.Series)):
+            raise TypeError('`EES` must be array-like.')
 
-            self.RES = np.array(self.RES)
-            if not self.N in self.RES.shape:
-                raise ValueError('At least one axis of `RES` must be'
-                                 ' of length `N`.')
-            if EES is not None and self.N not in self.EES.shape:
+        # Checking shape of RES
+        self.RES = np.array(self.RES)
+        if not self.N in self.RES.shape:
+            raise ValueError('At least one axis of `RES` must be'
+                             ' of length `N`.')
+
+        # Checking shape of EES
+        if EES is not None:
+            if  self.N not in self.EES.shape:
                 raise ValueError('At least one axis of `EES` must be'
                                  ' of length `N`.')
+            if EES.shape != RES.shape:
+                raise ValueError('`RES` and `EES` must have the same shape.'
+                'Got RES: {} and EES: {}'.format(str(RES.shape), str(EES.shape)))
+            self.EES = np.array(self.EES)
 
+        # Subtract the mean from the RES
+        if center:
             self.RES = self.RES - self.RES.mean()
-            self.spectrogram = np.zeros((self.windows[0].shape[1],
-                                         self.eigenvectors.shape[1]))
-            for window in self.windows:
-                spectrogram = self._compute_spectrogram(
-                    self.RES, window)
-                # There's maybe something wrong here
-                spectrogram = self._activate(spectrogram)
-                self.spectrogram += spectrogram
-
-            """ This can be added later to support multiple signals
-            for i in range(ncols):
-                for t in range(self.window_count):
-                    temp = self._compute_spectrogram(
-                        s[:, i], self.eigenvectors, self.window[:, :, t])
-                    if self._activated:
-                        temp = self._activate(
-                            temp)
-                    if self._store_spec_hist:
-                        self.spec_hist[:, :, t, i] = temp
-                    else:
-                        self.spectrogram[:, :, i] += temp"""
+
+        # If only one RES, no need to collect
+        if len(self.RES.shape) == 1:
+            self.spectrogram = self._compute_multiresolution_spectrogram(self.RES)
+        else:
+            # Create a list of spectrograms and concatenate them
+            spectrograms = []
+            for i in range(self.RES.shape[1]):
+                curr_RES = scprep.select.select_cols(self.RES, idx=i)
+                spectrograms.append(self._compute_multiresolution_spectrogram(curr_RES))
+            self.spectrogram = np.hstack(spectrograms)
 
         # Appending the EES to the spectrogram
-        # TODO: is this a bad idea?
-        if EES is not None:
-            self.spectrogram = self._concat_EES_to_spectrogram(weight)
+        if self.EES is not None:
+            self.combined_spectrogram_ees = self._combine_spectrogram_EES(
+                spectrogram=self.spectrogram, EES=self.EES)
 
         return self.spectrogram
 
@@ -247,7 +272,13 @@ def predict(self, **kwargs):
             raise ValueError("Estimator is not transformed. "
                              "Call VertexFrequencyCluster.transform().")
 
-        self.labels_ = self._clusterobj.fit_predict(self.spectrogram)
+        if self.combined_spectrogram_ees is None:
+            data = self.spectrogram
+        else:
+            data = self.combined_spectrogram_ees
+        data = decomposition.PCA(self.n_clusters).fit_transform(data)
+        self.labels_ = self._clusterobj.fit_predict(data)
+
         self.labels_ = utils.sort_clusters_by_meld_score(
             self.labels_, self.RES)
         return self.labels_

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,5 @@
-numpy>=1.10.0
-scipy>=0.18.0,!=1.2.0.*
+numpy>=1.14.0
+scipy>=1.1.0
 graphtools>=1.0.0
+scprep
 pygsp
diff --git a/setup.py b/setup.py
@@ -2,12 +2,14 @@
 
 import os
 import sys
-from setuptools import setup
+from setuptools import setup, find_packages
 
 install_requires = [
     'numpy>=1.14.0',
     'scipy>=1.1.0',
     'graphtools>=0.1.8.1',
+    'pandas<0.24',
+    'scprep',
     'pygsp'
 ]
 
@@ -16,8 +18,8 @@
     'nose2',
     'coverage',
     'coveralls',
-    'pandas<0.24',
-    'scikit-learn'
+    'scikit-learn',
+    'packaging'
 ]
 
 doc_requires = [
@@ -41,7 +43,7 @@
       description='MELD',
       author='Daniel Burkhardt, Krishnaswamy Lab, Yale University',
       author_email='[email protected]',
-      packages=['meld', ],
+      packages=find_packages(),
       license='Dual License - See LICENSE file',
       install_requires=install_requires,
       extras_require={'test': test_requires,

diff --git a/test/test_meld.py b/test/test_meld.py
@@ -13,6 +13,8 @@
 
 from utils import make_batches
 
+from packaging import version
+
 
 def test_mnn():
     data, labels = make_batches(n_pts_per_cluster=250)
@@ -34,7 +36,6 @@ def test_check_pygsp_graph():
         meld.utils._check_pygsp_graph,
         G='hello world')
 
-
 def test_meld():
     # MELD operator
     # Numerical accuracy
@@ -53,12 +54,19 @@ def norm(x):
     meld_op = meld.MELD()
     B = meld_op.fit_transform(G, RES)
 
-    assert np.isclose(np.sum(B), 532.0001992193013)
+    if version.parse(np.__version__) < version.parse('1.17'):
+        np.testing.assert_allclose(np.sum(B), 532.0001992193013)
+    else:
+        np.testing.assert_allclose(np.sum(B), 519.0001572740623)
 
     meld_op = meld.MELD()
     B = meld_op.fit_transform(gt.Graph(
         D, knn=20, decay=10, use_pygsp=False), RES)
-    assert np.isclose(np.sum(B), 532.0001992193013)
+
+    if version.parse(np.__version__) < version.parse('1.17'):
+        np.testing.assert_allclose(np.sum(B), 532.0001992193013)
+    else:
+        np.testing.assert_allclose(np.sum(B), 519.0001572740623)
 
     # lap type TypeError
     lap_type = 'hello world'
@@ -121,16 +129,34 @@ def test_cluster(self):
         assert sparse_spectrogram.shape == spectrogram.shape
         assert sparse.issparse(vfc_op._basewindow)
 
-    #def test_2d(self):
-    #    RES = np.array([self.labels, self.labels]).T
-    #    vfc_op = meld.VertexFrequencyCluster(
-    #        window_sizes=self.window_sizes)
-    #    meld_op = meld.MELD()
-    #    EES = meld_op.fit_transform(G=self.G, RES=RES)
-    #    clusters = vfc_op.fit_predict(
-    #        self.G, RES=RES,
-    #        EES=EES)
-    #    assert len(clusters) == len(self.labels)
+    def test_cluster_no_EES(self):
+        vfc_op = meld.VertexFrequencyCluster(
+            window_sizes=self.window_sizes)
+        spectrogram = vfc_op.fit_predict(
+            self.G, RES=self.labels, EES=None)
+
+    def test_2d(self):
+        RES = np.array([self.labels, self.labels]).T
+        vfc_op = meld.VertexFrequencyCluster(
+            window_sizes=self.window_sizes)
+        meld_op = meld.MELD()
+        EES = meld_op.fit_transform(G=self.G, RES=RES)
+        clusters = vfc_op.fit_predict(
+            self.G, RES=RES,
+            EES=EES)
+        assert len(clusters) == len(self.labels)
+
+    def test_RES_EES_shape(self):
+        RES = np.array([self.labels, self.labels]).T
+        vfc_op = meld.VertexFrequencyCluster(
+            window_sizes=self.window_sizes)
+        meld_op = meld.MELD()
+        EES = meld_op.fit_transform(G=self.G, RES=RES)
+        assert_raise_message(ValueError,
+        '`RES` and `EES` must have the same shape.'
+        'Got RES: {} and EES: {}'.format(str(RES[:,1].shape), str(EES.shape)),
+        vfc_op.fit_predict, G=self.G, RES=RES[:,1], EES=EES)
+
 
     def test_transform_before_fit(self):
         # Transform before fit

diff --git a/test/utils/__init__.py b/test/utils/__init__.py
@@ -15,7 +15,6 @@ def ignore_numpy_warning():
         message="the matrix subclass is not the recommended way to represent "
         "matrices or deal with linear algebra ")
 
-
 reset_warnings()
 
 def make_batches(n_pts_per_cluster=5000):
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,3 +2,4 @@

		from .meld import MELD
		from .cluster import VertexFrequencyCluster
		from .version import __version__