Skip to content

Commit

Permalink
Merge pull request #26 from KrishnaswamyLab/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
dburkhardt authored Sep 16, 2019
2 parents 6b7bb57 + 92f4fbb commit 12ecd0d
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 73 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ cache:
- pip
- apt
install:
- python setup.py install
- pip install -r requirements.txt
- pip install .
script:
- pip install .[test]
- python setup.py test
Expand Down
1 change: 1 addition & 0 deletions doc/source/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
numpy>=1.14.0
scipy>=1.1.0
pandas<0.24
future
graphtools>=0.1.8.1
sphinx<=1.8.5
Expand Down
1 change: 1 addition & 0 deletions meld/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@

from .meld import MELD
from .cluster import VertexFrequencyCluster
from .version import __version__
135 changes: 83 additions & 52 deletions meld/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
from sklearn import preprocessing, decomposition
import warnings

from . import utils
import scprep

from . import utils

class VertexFrequencyCluster(BaseEstimator):
"""Performs Vertex Frequency clustering for data given a
Expand Down Expand Up @@ -45,7 +46,6 @@ class VertexFrequencyCluster(BaseEstimator):

def __init__(self, n_clusters=10, window_count=9, window_sizes=None,
sparse=False, suppress=False, random_state=None, **kwargs):

self.suppress = suppress
self.sparse = sparse
self._basewindow = None
Expand All @@ -61,9 +61,10 @@ def __init__(self, n_clusters=10, window_count=9, window_sizes=None,
self.N = None
self.spec_hist = None
self.spectrogram = None
self.combined_spectrogram_ees = None
self.isfit = False
self.ees = None
self.res = None
self.EES = None
self.RES = None
self._sklearn_params = kwargs
self._clusterobj = KMeans(n_clusters=n_clusters,
random_state=random_state, **kwargs)
Expand Down Expand Up @@ -108,15 +109,42 @@ def _compute_spectrogram(self, RES, window):
"""
if len(RES.shape) == 1:
RES = RES[:, None]
self.RES = RES
if sparse.issparse(window):
# the next computation becomes dense - better to make dense now
C = window.multiply(self.RES).toarray()
C = window.multiply(RES).toarray()
else:
C = np.multiply(window, self.RES)
C = np.multiply(window, RES)
C = preprocessing.normalize(self.eigenvectors.T @ C, axis=0)
return C.T


def _compute_multiresolution_spectrogram(self, RES):
''' Compute multiresolution spectrogram by repeatedly calling
_compute_spectrogram '''

#spectrogram = np.zeros((self.windows[0].shape[1],
# self.eigenvectors.shape[1]))

#for window in self.windows:
# curr_spectrogram = self._compute_spectrogram(
# RES, window)
# curr_spectrogram = self._activate(curr_spectrogram)
# spectrogram += curr_spectrogram

spectrogram = np.zeros((self.windows[0].shape[1],
self.eigenvectors.shape[1]))

for window in self.windows:
curr_spectrogram = self._compute_spectrogram(
RES=RES, window=window)
curr_spectrogram = self._activate(curr_spectrogram)
spectrogram += curr_spectrogram



return spectrogram


def _compute_window(self, window, t=1):
"""_compute_window
apply operation to window function
Expand Down Expand Up @@ -144,15 +172,15 @@ def _compute_window(self, window, t=1):
window = np.linalg.matrix_power(window, t)
return preprocessing.normalize(window, 'l2', axis=0).T

def _concat_EES_to_spectrogram(self, weight):
'''Concatenates the EES to the spectrogram for clustering'''
def _combine_spectrogram_EES(self, spectrogram, EES):
''' Normalizes and concatenates the EES to the
spectrogram for clustering'''

spectrogram_n = spectrogram / np.linalg.norm(spectrogram)

data = decomposition.PCA(25).fit_transform(self.spectrogram)
ees_n = EES / np.linalg.norm(EES, ord=2, axis=0)

range_dim = np.max(np.max(data, axis=0) - np.min(data, axis=0))
range_meld = np.max(self.EES) - np.min(self.EES)
scale = (range_dim / range_meld) * weight
data_nu = np.c_[data, (self.EES * scale)]
data_nu = np.c_[spectrogram_n, ees_n]
return data_nu

def fit(self, G):
Expand All @@ -179,58 +207,55 @@ def fit(self, G):
self.isfit = True
return self

def transform(self, RES, EES=None, weight=1, center=True):
def transform(self, RES, EES=None, center=True):
'''Calculates the spectrogram of the graph using the RES'''
self.RES = RES
self.EES = EES
if not self.isfit:
raise ValueError(
'Estimator must be `fit` before running `transform`.')

else:
if not isinstance(self.RES, (list, tuple, np.ndarray, pd.Series)):
raise TypeError('`RES` must be array-like.')
if not isinstance(self.RES, (list, tuple, np.ndarray, pd.Series, pd.DataFrame)):
raise TypeError('`RES` must be array-like.')

if EES is not None and not isinstance(self.EES, (list, tuple, np.ndarray, pd.Series)):
raise TypeError('`EES` must be array-like.')
if EES is not None:
self.EES = np.array(self.EES)
if EES is not None and not isinstance(self.EES, (list, tuple, np.ndarray, pd.Series)):
raise TypeError('`EES` must be array-like.')

self.RES = np.array(self.RES)
if not self.N in self.RES.shape:
raise ValueError('At least one axis of `RES` must be'
' of length `N`.')
if EES is not None and self.N not in self.EES.shape:
# Checking shape of RES
self.RES = np.array(self.RES)
if not self.N in self.RES.shape:
raise ValueError('At least one axis of `RES` must be'
' of length `N`.')

# Checking shape of EES
if EES is not None:
if self.N not in self.EES.shape:
raise ValueError('At least one axis of `EES` must be'
' of length `N`.')
if EES.shape != RES.shape:
raise ValueError('`RES` and `EES` must have the same shape.'
'Got RES: {} and EES: {}'.format(str(RES.shape), str(EES.shape)))
self.EES = np.array(self.EES)

# Subtract the mean from the RES
if center:
self.RES = self.RES - self.RES.mean()
self.spectrogram = np.zeros((self.windows[0].shape[1],
self.eigenvectors.shape[1]))
for window in self.windows:
spectrogram = self._compute_spectrogram(
self.RES, window)
# There's maybe something wrong here
spectrogram = self._activate(spectrogram)
self.spectrogram += spectrogram

""" This can be added later to support multiple signals
for i in range(ncols):
for t in range(self.window_count):
temp = self._compute_spectrogram(
s[:, i], self.eigenvectors, self.window[:, :, t])
if self._activated:
temp = self._activate(
temp)
if self._store_spec_hist:
self.spec_hist[:, :, t, i] = temp
else:
self.spectrogram[:, :, i] += temp"""

# If only one RES, no need to collect
if len(self.RES.shape) == 1:
self.spectrogram = self._compute_multiresolution_spectrogram(self.RES)
else:
# Create a list of spectrograms and concatenate them
spectrograms = []
for i in range(self.RES.shape[1]):
curr_RES = scprep.select.select_cols(self.RES, idx=i)
spectrograms.append(self._compute_multiresolution_spectrogram(curr_RES))
self.spectrogram = np.hstack(spectrograms)

# Appending the EES to the spectrogram
# TODO: is this a bad idea?
if EES is not None:
self.spectrogram = self._concat_EES_to_spectrogram(weight)
if self.EES is not None:
self.combined_spectrogram_ees = self._combine_spectrogram_EES(
spectrogram=self.spectrogram, EES=self.EES)

return self.spectrogram

Expand All @@ -247,7 +272,13 @@ def predict(self, **kwargs):
raise ValueError("Estimator is not transformed. "
"Call VertexFrequencyCluster.transform().")

self.labels_ = self._clusterobj.fit_predict(self.spectrogram)
if self.combined_spectrogram_ees is None:
data = self.spectrogram
else:
data = self.combined_spectrogram_ees
data = decomposition.PCA(self.n_clusters).fit_transform(data)
self.labels_ = self._clusterobj.fit_predict(data)

self.labels_ = utils.sort_clusters_by_meld_score(
self.labels_, self.RES)
return self.labels_
Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
numpy>=1.10.0
scipy>=0.18.0,!=1.2.0.*
numpy>=1.14.0
scipy>=1.1.0
graphtools>=1.0.0
scprep
pygsp
10 changes: 6 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

import os
import sys
from setuptools import setup
from setuptools import setup, find_packages

install_requires = [
'numpy>=1.14.0',
'scipy>=1.1.0',
'graphtools>=0.1.8.1',
'pandas<0.24',
'scprep',
'pygsp'
]

Expand All @@ -16,8 +18,8 @@
'nose2',
'coverage',
'coveralls',
'pandas<0.24',
'scikit-learn'
'scikit-learn',
'packaging'
]

doc_requires = [
Expand All @@ -41,7 +43,7 @@
description='MELD',
author='Daniel Burkhardt, Krishnaswamy Lab, Yale University',
author_email='[email protected]',
packages=['meld', ],
packages=find_packages(),
license='Dual License - See LICENSE file',
install_requires=install_requires,
extras_require={'test': test_requires,
Expand Down
52 changes: 39 additions & 13 deletions test/test_meld.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

from utils import make_batches

from packaging import version


def test_mnn():
data, labels = make_batches(n_pts_per_cluster=250)
Expand All @@ -34,7 +36,6 @@ def test_check_pygsp_graph():
meld.utils._check_pygsp_graph,
G='hello world')


def test_meld():
# MELD operator
# Numerical accuracy
Expand All @@ -53,12 +54,19 @@ def norm(x):
meld_op = meld.MELD()
B = meld_op.fit_transform(G, RES)

assert np.isclose(np.sum(B), 532.0001992193013)
if version.parse(np.__version__) < version.parse('1.17'):
np.testing.assert_allclose(np.sum(B), 532.0001992193013)
else:
np.testing.assert_allclose(np.sum(B), 519.0001572740623)

meld_op = meld.MELD()
B = meld_op.fit_transform(gt.Graph(
D, knn=20, decay=10, use_pygsp=False), RES)
assert np.isclose(np.sum(B), 532.0001992193013)

if version.parse(np.__version__) < version.parse('1.17'):
np.testing.assert_allclose(np.sum(B), 532.0001992193013)
else:
np.testing.assert_allclose(np.sum(B), 519.0001572740623)

# lap type TypeError
lap_type = 'hello world'
Expand Down Expand Up @@ -121,16 +129,34 @@ def test_cluster(self):
assert sparse_spectrogram.shape == spectrogram.shape
assert sparse.issparse(vfc_op._basewindow)

#def test_2d(self):
# RES = np.array([self.labels, self.labels]).T
# vfc_op = meld.VertexFrequencyCluster(
# window_sizes=self.window_sizes)
# meld_op = meld.MELD()
# EES = meld_op.fit_transform(G=self.G, RES=RES)
# clusters = vfc_op.fit_predict(
# self.G, RES=RES,
# EES=EES)
# assert len(clusters) == len(self.labels)
def test_cluster_no_EES(self):
vfc_op = meld.VertexFrequencyCluster(
window_sizes=self.window_sizes)
spectrogram = vfc_op.fit_predict(
self.G, RES=self.labels, EES=None)

def test_2d(self):
RES = np.array([self.labels, self.labels]).T
vfc_op = meld.VertexFrequencyCluster(
window_sizes=self.window_sizes)
meld_op = meld.MELD()
EES = meld_op.fit_transform(G=self.G, RES=RES)
clusters = vfc_op.fit_predict(
self.G, RES=RES,
EES=EES)
assert len(clusters) == len(self.labels)

def test_RES_EES_shape(self):
RES = np.array([self.labels, self.labels]).T
vfc_op = meld.VertexFrequencyCluster(
window_sizes=self.window_sizes)
meld_op = meld.MELD()
EES = meld_op.fit_transform(G=self.G, RES=RES)
assert_raise_message(ValueError,
'`RES` and `EES` must have the same shape.'
'Got RES: {} and EES: {}'.format(str(RES[:,1].shape), str(EES.shape)),
vfc_op.fit_predict, G=self.G, RES=RES[:,1], EES=EES)


def test_transform_before_fit(self):
# Transform before fit
Expand Down
1 change: 0 additions & 1 deletion test/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def ignore_numpy_warning():
message="the matrix subclass is not the recommended way to represent "
"matrices or deal with linear algebra ")


reset_warnings()

def make_batches(n_pts_per_cluster=5000):
Expand Down

0 comments on commit 12ecd0d

Please sign in to comment.