From 6e34c6aa64753226656d90b663be362a08467f6a Mon Sep 17 00:00:00 2001 From: Awni Mousa Date: Fri, 6 Mar 2020 14:29:00 -0500 Subject: [PATCH] PyPI release updates Speed up nearest neighbor computation --- README.md | 18 ++++-------- setup.py | 65 ++++++++++++++++++++++++++---------------- src/harmony/core.py | 40 +++++++++++++++++++------- src/harmony/version.py | 2 +- 4 files changed, 78 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 92c2422..c7b8ceb 100644 --- a/README.md +++ b/README.md @@ -6,26 +6,20 @@ Harmony is a unified framework for data visualization, analysis and interpretati #### Installation and dependencies 1. Harmony has been implemented in Python3 and can be installed using: -``` - $> git clone git://github.com/dpeerlab/Harmony.git - $> cd Harmony - $> sudo -H pip3 install . - - $> cd ../ - $> git clone git://github.com/dpeerlab/Palantir.git - $> cd Palantir - $> sudo -H pip3 install . -``` + + $> pip install harmonyTS + $> pip install palantir + 2. Harmony depends on a number of `python3` packages available on pypi and these dependencies are listed in `setup.py` All the dependencies will be automatically installed using the above commands 3. To uninstall: - $> sudo -H pip3 uninstall harmony + $> pip uninstall harmonyTS 4. If you would like to determine gene expression trends, please install R programming language and the R package GAM . You will also need to install the rpy2 module using - $> sudo -H pip3 install rpy2 + $> pip install rpy2 #### Usage diff --git a/setup.py b/setup.py index 6f17d81..dc9cef1 100644 --- a/setup.py +++ b/setup.py @@ -5,35 +5,52 @@ from warnings import warn if sys.version_info.major != 3: - raise RuntimeError('Palantir requires Python 3') + raise RuntimeError("Palantir requires Python 3") if sys.version_info.minor < 6: - warn('Analysis methods were developed using Python 3.6') + warn("Analysis methods were developed using Python 3.6") # get version -with open('src/harmony/version.py') as f: +with open("src/harmony/version.py") as f: exec(f.read()) - +with open("README.md", "r") as fh: + long_description = fh.read() # install GraphDiffusion -if shutil.which('pip3'): - call(['pip3', 'install', 'git+https://github.com/dpeerlab/Palantir.git']) +if shutil.which("pip3"): + call(["pip3", "install", "git+https://github.com/dpeerlab/Palantir.git"]) -setup(name='harmony', - version=__version__, # read in from the exec of version.py; ignore error - description='Harmony is a unified framework for data visualization, analysis and interpretation of scRNA-seq data measured across discrete time points', - url='https://github.com/dpeerlab/harmony', - author='Manu Setty', - author_email='manu.talanki@gmail.com', - package_dir={'': 'src'}, - packages=['harmony'], - install_requires=[ - 'numpy>=1.14.2', - 'pandas>=0.22.0', - 'scipy>=1.0.1', - 'sklearn', - 'fa2', - 'matplotlib>=2.2.2', - 'seaborn>=0.8.1' - ], - ) +setup( + name="harmony_time_series", + version=__version__, # read in from the exec of version.py; ignore error + description=( + "Harmony is a unified framework for data visualization, analysis " + "and interpretation of scRNA-seq data measured across discrete time points" + ), + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/dpeerlab/harmony", + author="Manu Setty", + author_email="manu.talanki@gmail.com", + package_dir={"": "src"}, + packages=["harmony"], + install_requires=[ + "numpy>=1.14.2", + "pandas>=0.22.0", + "scipy>=1.0.1", + "sklearn", + "fa2", + "matplotlib>=2.2.2", + "seaborn>=0.8.1", + ], + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Operating System :: POSIX :: Linux", + "Development Status :: 5 - Production/Stable", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Visualization", + ], + python_requires='>=3.6', +) diff --git a/src/harmony/core.py b/src/harmony/core.py index ecda2c9..22dcc6b 100644 --- a/src/harmony/core.py +++ b/src/harmony/core.py @@ -1,5 +1,6 @@ import pandas as pd import numpy as np +import scanpy as sc from scipy.sparse import find, csr_matrix from sklearn.neighbors import NearestNeighbors @@ -41,16 +42,35 @@ def augmented_affinity_matrix(data_df, timepoints, timepoint_connections, # Nearest neighbor graph construction and affinity matrix print('Nearest neighbor computation...') - nbrs = NearestNeighbors(n_neighbors=n_neighbors, - metric='euclidean', n_jobs=-2) - nbrs.fit(pca_projections.values) - dists, _ = nbrs.kneighbors(pca_projections.values) - adj = nbrs.kneighbors_graph(pca_projections.values, mode='distance') - # Scaling factors for affinity matrix construction - ka = np.int(n_neighbors / 3) - scaling_factors = pd.Series(dists[:, ka], index=cell_order) + + # -------------------------------------------------------------------------- + # nbrs = NearestNeighbors(n_neighbors=n_neighbors, + # metric='euclidean', n_jobs=-2) + # nbrs.fit(pca_projections.values) + # dists, _ = nbrs.kneighbors(pca_projections.values) + # adj = nbrs.kneighbors_graph(pca_projections.values, mode='distance') + # # Scaling factors for affinity matrix construction + # ka = np.int(n_neighbors / 3) + # scaling_factors = pd.Series(dists[:, ka], index=cell_order) + # # Affinity matrix + # nn_aff = _convert_to_affinity(adj, scaling_factors, True) + # -------------------------------------------------------------------------- + + temp = sc.AnnData(data_df.values) + sc.pp.neighbors(temp, n_pcs=0, n_neighbors=n_neighbors) + kNN = temp.uns['neighbors']['distances'] + + # Adaptive k + adaptive_k = int(np.floor(n_neighbors / 3)) + scaling_factors = np.zeros(data_df.shape[0]) + + for i in np.arange(len(scaling_factors)): + scaling_factors[i] = np.sort(kNN.data[kNN.indptr[i]:kNN.indptr[i + 1]])[adaptive_k - 1] + + scaling_factors = pd.Series(scaling_factors, index=cell_order) + # Affinity matrix - nn_aff = _convert_to_affinity(adj, scaling_factors, True) + nn_aff = _convert_to_affinity(kNN, scaling_factors, True) # Mututally nearest neighbor affinity matrix # Initilze mnn affinity matrix @@ -96,7 +116,7 @@ def _convert_to_affinity(adj, scaling_factors, with_self_loops=False): """ N = adj.shape[0] rows, cols, dists = find(adj) - dists = dists ** 2/ (scaling_factors.values[rows] ** 2) + dists = dists ** 2/(scaling_factors.values[rows] ** 2) # Self loops if with_self_loops: diff --git a/src/harmony/version.py b/src/harmony/version.py index 485f44a..b3f4756 100644 --- a/src/harmony/version.py +++ b/src/harmony/version.py @@ -1 +1 @@ -__version__ = "0.1.1" +__version__ = "0.1.2"