diff --git a/README.md b/README.md index 733d233..a356a4a 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,8 @@ Dependencies ### Mandatory +The latest versions of the following packages are required: + - [numpy](http://www.numpy.org/) - [scipy](http://www.scipy.org/) @@ -120,9 +122,18 @@ You may instead want to use the development version from Github, by running Development ----------- -https://github.com/msmexplorer/msmexplorer +All development happens here, on +[Github](https://github.com/msmexplorer/msmexplorer). + +If you're interested in contributing to MSMExplorer, please refer to our +[Contributing](http://msmbuilder.org/msmexplorer/development/contributing.html) +guide. + +Support +------- -Please [submit](https://github.com/msmexplorer/msmexplorer/issues/new) any bugs you encounter to the Github issue tracker. +Please [submit](https://github.com/msmexplorer/msmexplorer/issues/new) any bugs +or questions to the Github issue tracker. License ------- diff --git a/devtools/conda-recipe/meta.yaml b/devtools/conda-recipe/meta.yaml index 09c2a2b..94407e9 100644 --- a/devtools/conda-recipe/meta.yaml +++ b/devtools/conda-recipe/meta.yaml @@ -13,8 +13,6 @@ requirements: build: - python - setuptools - - cython - - numpy run: - python diff --git a/docs/changelog.rst b/docs/changelog.rst index 78619db..f73a7a6 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,12 +4,8 @@ Changelog ========= -v0.4.0 (Development) --------------------- - -API Changes -~~~~~~~~~~~ - +v1.0.0 (March 30, 2017) +----------------------- New Features ~~~~~~~~~~~~ @@ -20,8 +16,9 @@ New Features Improvements ~~~~~~~~~~~~ -- The ``shade`` option now works for ``plot_free_energy`` in the 1D case. - +- The ``shade`` option now works for ``plot_free_energy`` in the 1D case (#76). +- Fixed an issue where adding cluster centers would break the visualization +if ``ndim`` > 2 (#70). v0.3.0 (October 24, 2016) ------------------------- diff --git a/docs/installing.rst b/docs/installing.rst index 0ce95ba..835d1df 100644 --- a/docs/installing.rst +++ b/docs/installing.rst @@ -38,6 +38,8 @@ Dependencies Mandatory dependencies ^^^^^^^^^^^^^^^^^^^^^^ +The latest versions of the following packages are required: + - `numpy `__ - `scipy `__ diff --git a/examples/plot_chord.py b/examples/plot_chord.py index 74a41ce..00a9d44 100644 --- a/examples/plot_chord.py +++ b/examples/plot_chord.py @@ -2,20 +2,29 @@ Chord Diagram ============= """ +from msmbuilder.example_datasets import FsPeptide + import numpy as np +import mdtraj as md import msmexplorer as msme from msmexplorer.utils import make_colormap +# # Load Fs Peptide Data +trajs = FsPeptide().get().trajectories + +# Compute Hydrogen Bonding Residue Pairs +baker_hubbard = md.baker_hubbard(trajs[0]) +top = trajs[0].topology +pairs = [(top.atom(di).residue.index, top.atom(ai).residue.index) + for di, _, ai in baker_hubbard] -# Create a random square matrix -rs = np.random.RandomState(42) -n, p = 12, 12 -d = rs.normal(0, 2, (n, p)) -d += np.log(np.arange(1, p + 1)) * -5 + 10 +# Create Hydrogen Bonding Network +hbonds = np.zeros((top.n_residues, top.n_residues)) +hbonds[list(zip(*pairs))] = 1. -# Make a colormap +# Make a Colormap cmap = make_colormap(['rawdenim', 'lightgray', 'pomegranate']) # Plot Chord Diagram -msme.plot_chord(d, cmap=cmap, threshold=.2) +msme.plot_chord(hbonds, cmap=cmap) diff --git a/examples/plot_free_energy_1d.py b/examples/plot_free_energy_1d.py index d7f8c53..48daa4e 100644 --- a/examples/plot_free_energy_1d.py +++ b/examples/plot_free_energy_1d.py @@ -2,6 +2,7 @@ Free Energy Plot (Univariate) ============================= """ +from msmbuilder.example_datasets import FsPeptide from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.decomposition import tICA from msmbuilder.cluster import MiniBatchKMeans @@ -10,7 +11,6 @@ import numpy as np import msmexplorer as msme -from msmexplorer.example_datasets import FsPeptide rs = np.random.RandomState(42) diff --git a/examples/plot_free_energy_2d.py b/examples/plot_free_energy_2d.py index a7f93fb..282bccc 100644 --- a/examples/plot_free_energy_2d.py +++ b/examples/plot_free_energy_2d.py @@ -2,6 +2,7 @@ Free Energy Plot (Bivariate) ============================ """ +from msmbuilder.example_datasets import FsPeptide from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.decomposition import tICA from msmbuilder.cluster import MiniBatchKMeans @@ -10,7 +11,6 @@ import numpy as np import msmexplorer as msme -from msmexplorer.example_datasets import FsPeptide rs = np.random.RandomState(42) diff --git a/examples/plot_histogram.py b/examples/plot_histogram.py index f82fcb7..2879eea 100644 --- a/examples/plot_histogram.py +++ b/examples/plot_histogram.py @@ -2,13 +2,14 @@ Histogram Plot ============== """ +from msmbuilder.example_datasets import FsPeptide from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.decomposition import tICA import numpy as np import msmexplorer as msme -from msmexplorer.example_datasets import FsPeptide + # Load Fs Peptide Data trajs = FsPeptide().get().trajectories diff --git a/examples/plot_msm_network.py b/examples/plot_msm_network.py index 72eacf0..07a5948 100644 --- a/examples/plot_msm_network.py +++ b/examples/plot_msm_network.py @@ -2,6 +2,7 @@ MSM Network Plot ================ """ +from msmbuilder.example_datasets import FsPeptide from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.decomposition import tICA from msmbuilder.cluster import MiniBatchKMeans @@ -10,7 +11,6 @@ import numpy as np import msmexplorer as msme -from msmexplorer.example_datasets import FsPeptide rs = np.random.RandomState(42) diff --git a/examples/plot_pop_resids.py b/examples/plot_pop_resids.py index 8fc5e24..0052e0f 100644 --- a/examples/plot_pop_resids.py +++ b/examples/plot_pop_resids.py @@ -2,6 +2,7 @@ Population Residuals Plot ========================= """ +from msmbuilder.example_datasets import FsPeptide from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.decomposition import tICA from msmbuilder.cluster import MiniBatchKMeans @@ -10,7 +11,6 @@ import numpy as np import msmexplorer as msme -from msmexplorer.example_datasets import FsPeptide rs = np.random.RandomState(42) diff --git a/examples/plot_timescales.py b/examples/plot_timescales.py index 0559fcf..dd03951 100644 --- a/examples/plot_timescales.py +++ b/examples/plot_timescales.py @@ -2,6 +2,7 @@ Timescales Plot =============== """ +from msmbuilder.example_datasets import FsPeptide from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.decomposition import tICA from msmbuilder.cluster import MiniBatchKMeans @@ -10,7 +11,6 @@ import numpy as np import msmexplorer as msme -from msmexplorer.example_datasets import FsPeptide rs = np.random.RandomState(42) diff --git a/examples/plot_tpaths.py b/examples/plot_tpaths.py index 8c3ac3f..8fe6f44 100644 --- a/examples/plot_tpaths.py +++ b/examples/plot_tpaths.py @@ -2,6 +2,7 @@ Transition Pathway Plot ======================= """ +from msmbuilder.example_datasets import FsPeptide from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.decomposition import tICA from msmbuilder.cluster import MiniBatchKMeans @@ -10,7 +11,6 @@ import numpy as np import msmexplorer as msme -from msmexplorer.example_datasets import FsPeptide rs = np.random.RandomState(42) diff --git a/examples/plot_trace.py b/examples/plot_trace.py index bfa7219..2639e99 100644 --- a/examples/plot_trace.py +++ b/examples/plot_trace.py @@ -2,10 +2,10 @@ Trace Plot ========== """ +from msmbuilder.example_datasets import FsPeptide from msmbuilder.featurizer import RMSDFeaturizer import msmexplorer as msme -from msmexplorer.example_datasets import FsPeptide # Load Fs Peptide Data traj = FsPeptide().get().trajectories[0] diff --git a/msmexplorer/example_datasets/.gitignore b/msmexplorer/example_datasets/.gitignore deleted file mode 100644 index 14dfc67..0000000 --- a/msmexplorer/example_datasets/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -_muller.c -*.so diff --git a/msmexplorer/example_datasets/__init__.py b/msmexplorer/example_datasets/__init__.py deleted file mode 100644 index 8790b10..0000000 --- a/msmexplorer/example_datasets/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from .base import get_data_home, clear_data_home, has_msmb_data -from .brownian1d import DoubleWell, QuadWell -from .brownian1d import load_doublewell, load_quadwell -from .brownian1d import doublewell_eigs, quadwell_eigs -from .alanine_dipeptide import fetch_alanine_dipeptide, AlanineDipeptide -from .met_enkephalin import fetch_met_enkephalin, MetEnkephalin -from .fs_peptide import fetch_fs_peptide, FsPeptide, MinimalFsPeptide -from .muller import MullerPotential, load_muller - -__all__ = [ - 'get_data_home', - 'clear_data_home', - 'has_msmb_data', - 'load_doublewell', - 'load_quadwell', - 'doublewell_eigs', - 'quadwell_eigs', - 'fetch_alanine_dipeptide', - 'fetch_met_enkephalin', - 'fetch_fs_peptide', - 'AlanineDipeptide', - 'MetEnkephalin', - 'FsPeptide', - 'MinimalFsPeptide', - 'DoubleWell', - 'QuadWell', - 'Muller', - 'load_muller', -] diff --git a/msmexplorer/example_datasets/_muller.pyx b/msmexplorer/example_datasets/_muller.pyx deleted file mode 100644 index b6f01d4..0000000 --- a/msmexplorer/example_datasets/_muller.pyx +++ /dev/null @@ -1,97 +0,0 @@ -import numpy as np -from sklearn.utils import check_random_state -from libc.math cimport exp, sqrt -from numpy cimport npy_intp - -cdef double *MULLER_aa = [-1, -1, -6.5, 0.7] -cdef double *MULLER_bb = [0, 0, 11, 0.6] -cdef double *MULLER_cc = [-10, -10, -6.5, 0.7] -cdef double *MULLER_AA = [-200, -100, -170, 15] -cdef double *MULLER_XX = [1, 0, -0.5, -1] -cdef double *MULLER_YY = [0, 0.5, 1.5, 1] - - -def propagate(int n_steps=5000, x0=None, int thin=1, - double kT=1.5e4, double dt=0.1, double D=0.010, - random_state=None, double min_x=-np.inf, double max_x=np.inf, - double min_y=-np.inf, double max_y=np.inf): - random = check_random_state(random_state) - if x0 is None: - x0 = (-0.5, 0.5) - cdef int i, j - cdef int save_index = 0 - cdef double DT_SQRT_2D = dt * sqrt(2 * D) - cdef double beta = 1.0 / kT - cdef double[:, ::1] r = random.randn(n_steps, 2) - cdef double[:, ::1] saved_x = np.zeros(((n_steps)/thin, 2)) - cdef double x = x0[0] - cdef double y = x0[1] - cdef double[2] grad - - with nogil: - for i in range(n_steps): - _muller_grad(x, y, beta, &grad[0]) - - # Brownian update - x = x - dt * grad[0] + DT_SQRT_2D * r[i, 0] - y = y - dt * grad[1] + DT_SQRT_2D * r[i, 1] - - if x > max_x: - x = 2 * max_x - x - if x < min_x: - x = 2 * min_x - x - if y > max_y: - y = 2 * max_y - y - if y < min_y: - y = 2 * min_y - y - - if i % thin == 0: - saved_x[save_index, 0] = x - saved_x[save_index, 1] = y - save_index += 1 - - return np.asarray(saved_x) - - -def muller_potential(x, y, beta=1): - """Muller potential. - - This function can take vectors (x,y) as input and will broadcast - """ - wrapper = lambda x, y, beta: _muller_potential(x, y, beta) - return np.vectorize(wrapper)(x, y, beta) - - -cdef double _muller_potential(double x, double y, double beta=1) nogil: - cdef npy_intp j - cdef double value = 0 - - for j in range(4): - value += MULLER_AA[j] * exp( - MULLER_aa[j] * (x - MULLER_XX[j])**2 - + MULLER_bb[j] * (x - MULLER_XX[j]) * (y - MULLER_YY[j]) - + MULLER_cc[j] * (y - MULLER_YY[j])**2) - return beta * value - - -cdef void _muller_grad(double x, double y, double beta, double grad[2]) nogil: - cdef int j - cdef double value = 0 - cdef double term - grad[0] = 0 - grad[1] = 0 - - for j in range(4): - # this is the potential term - term = MULLER_AA[j] * exp( - MULLER_aa[j] * (x - MULLER_XX[j])**2 - + MULLER_bb[j] * (x - MULLER_XX[j]) * (y - MULLER_YY[j]) - + MULLER_cc[j] * (y - MULLER_YY[j])**2) - - grad[0] += (2 * MULLER_aa[j] * (x - MULLER_XX[j]) - + MULLER_bb[j] * (y - MULLER_YY[j])) * term - grad[1] += (MULLER_bb[j] * (x - MULLER_XX[j]) - + 2 * MULLER_cc[j] * (y - MULLER_YY[j])) * term - - grad[0] *= beta - grad[1] *= beta diff --git a/msmexplorer/example_datasets/alanine_dipeptide.py b/msmexplorer/example_datasets/alanine_dipeptide.py deleted file mode 100644 index ab64349..0000000 --- a/msmexplorer/example_datasets/alanine_dipeptide.py +++ /dev/null @@ -1,59 +0,0 @@ -# Author: Robert McGibbon -# Contributors: Matthew Harrigan -# Copyright (c) 2016, Stanford University and the Authors -# All rights reserved. - -# ----------------------------------------------------------------------------- -# Imports -# ----------------------------------------------------------------------------- -from glob import glob -from os.path import join - -import mdtraj as md - -from .base import Bunch, _MDDataset - -DATA_URL = "https://ndownloader.figshare.com/articles/1026131/versions/8" -TARGET_DIRECTORY = "alanine_dipeptide" - - -class AlanineDipeptide(_MDDataset): - """Alanine dipeptide dataset - - Parameters - ---------- - data_home : optional, default: None - Specify another download and cache folder for the datasets. By default - all MSMBuilder data is stored in '~/msmbuilder_data' subfolders. - - - Notes - ----- - The dataset consists of ten 10ns trajectories of of alanine dipeptide, - simulated using OpenMM 6.0.1 (CUDA platform, NVIDIA GTX660) with the - AMBER99SB-ILDN force field at 300K (langevin dynamics, friction coefficient - of 91/ps, timestep of 2fs) with GBSA implicit solvent. The coordinates are - saved every 1ps. Each trajectory contains 9,999 snapshots. - - The dataset, including the script used to generate the dataset - is available on figshare at - - http://dx.doi.org/10.6084/m9.figshare.1026131 - """ - target_directory = TARGET_DIRECTORY - data_url = DATA_URL - - def get_cached(self): - top = md.load(join(self.data_dir, 'ala2.pdb')) - trajectories = [] - for fn in glob(join(self.data_dir, 'trajectory*.dcd')): - trajectories.append(md.load(fn, top=top)) - - return Bunch(trajectories=trajectories, DESCR=self.description()) - - -def fetch_alanine_dipeptide(data_home=None): - return AlanineDipeptide(data_home).get() - - -fetch_alanine_dipeptide.__doc__ = AlanineDipeptide.__doc__ diff --git a/msmexplorer/example_datasets/base.py b/msmexplorer/example_datasets/base.py deleted file mode 100644 index 1789332..0000000 --- a/msmexplorer/example_datasets/base.py +++ /dev/null @@ -1,267 +0,0 @@ -import numbers -import shutil -import sys -import time -from functools import wraps -from io import BytesIO -from os import environ -from os import makedirs -from os.path import exists -from os.path import expanduser -from os.path import join -from zipfile import ZipFile - -import numpy as np -from six.moves.urllib.error import HTTPError -from six.moves.urllib.request import urlopen -from sklearn.utils import check_random_state - - -def retry(max_retries=1): - """ Retry a function `max_retries` times. """ - - def retry_func(func): - @wraps(func) - def wrapper(*args, **kwargs): - num_retries = 0 - while num_retries <= max_retries: - try: - ret = func(*args, **kwargs) - break - except HTTPError: - if num_retries == max_retries: - raise - num_retries += 1 - time.sleep(5) - return ret - - return wrapper - - return retry_func - - -class Dataset(object): - @classmethod - def description(cls): - """Get a description from the Notes section of the docstring.""" - lines = [s.strip() for s in cls.__doc__.splitlines()] - note_i = lines.index("Notes") - return "\n".join(lines[note_i + 2:]) - - def cache(self): - raise NotImplementedError - - def get(self): - raise NotImplementedError - - def get_cached(self): - raise NotImplementedError - - -class _MDDataset(Dataset): - target_directory = "" # set in subclass - data_url = "" # set in subclass - - def __init__(self, data_home=None, verbose=True): - self.data_home = get_data_home(data_home) - self.data_dir = join(self.data_home, self.target_directory) - self.cached = False - self.verbose = verbose - - def _msmbdata_cache(self): - if self.verbose: - print("Copying {} from msmb_data package to {}" - .format(self.target_directory, self.data_home)) - msmb_data = has_msmb_data() - assert msmb_data is not None - shutil.copytree("{}/{}".format(msmb_data, self.target_directory), - self.data_dir) - - @retry(3) - def _figshare_cache(self): - if self.verbose: - print('downloading {} from {} to {}' - .format(self.target_directory, self.data_url, - self.data_home)) - fhandle = urlopen(self.data_url) - buf = BytesIO(fhandle.read()) - zip_file = ZipFile(buf) - makedirs(self.data_dir) - for name in zip_file.namelist(): - zip_file.extract(name, path=self.data_dir) - - @retry(3) - def cache(self): - if not exists(self.data_home): - makedirs(self.data_home) - - if not exists(self.data_dir): - if has_msmb_data() is not None: - self._msmbdata_cache() - else: - self._figshare_cache() - elif self.verbose: - print("{} already is cached".format(self.target_directory)) - - self.cached = True - - def get_cached(self): - raise NotImplementedError - - def get(self): - if not self.cached: - self.cache() - return self.get_cached() - - -class _NWell(Dataset): - """Base class for brownian dynamics on a potential - - Parameters - ---------- - data_home : optional, default: None - Specify another cache folder for the datasets. By default - all MSMBuilder data is stored in '~/msmbuilder_data' subfolders. - random_state : {int, None}, default: None - Seed the psuedorandom number generator to generate trajectories. If - seed is None, the global numpy PRNG is used. If random_state is an - int, the simulations will be cached in ``data_home``, or loaded from - ``data_home`` if simulations with that seed have been performed already. - With random_state=None, new simulations will be performed and the - trajectories will not be cached. - """ - - target_name = "" # define in subclass - n_trajectories = 0 # define in subclass - version = 1 # override in subclass if parameters are updated - - def __init__(self, data_home=None, random_state=None): - self.data_home = get_data_home(data_home) - self.data_dir = join(self.data_home, self.target_name) - self.random_state = random_state - self.cache_path = self._get_cache_path(random_state) - - def _get_cache_path(self, random_state): - path = "{}/version-{}/randomstate-{}".format(self.data_dir, - self.version, - self.random_state) - return path - - def _load(self, path): - return [np.load("{}/{}.npy".format(path, i)) - for i in range(self.n_trajectories)] - - def _save(self, path, trajectories): - assert len(trajectories) == self.n_trajectories - if not exists(path): - makedirs(path) - for i, traj in enumerate(trajectories): - np.save("{}/{}.npy".format(path, i), traj) - - def cache(self): - random = check_random_state(self.random_state) - if not exists(self.data_dir): - makedirs(self.data_dir) - - if self.random_state is None: - trajectories = self.simulate_func(random) - return trajectories - - if not isinstance(self.random_state, numbers.Integral): - raise TypeError('random_state must be an int') - if exists(self.cache_path): - return self._load(self.cache_path) - - trajectories = self.simulate_func(random) - self._save(self.cache_path, trajectories) - return trajectories - - def get_cached(self): - if self.cache_path is None: - raise ValueError("You must specify a random state to get " - "cached trajectories.") - trajectories = self._load(self.cache_path) - return Bunch(trajectories=trajectories, DESCR=self.description()) - - def get(self): - trajectories = self.cache() - return Bunch(trajectories=trajectories, DESCR=self.description()) - - def simulate_func(self, random): - # Implement in subclass - raise NotImplementedError - - def potential(self, x): - # Implement in subclass - raise NotImplementedError - - -class Bunch(dict): - """Container object for datasets: dictionary-like object that - exposes its keys as attributes.""" - - def __init__(self, **kwargs): - dict.__init__(self, kwargs) - self.__dict__ = self - - -def has_msmb_data(): - """We provide a conda package containing the saved data. - - This package was introduced because the figshare downloads could - be 'iffy' at times. - - Returns - ------- - path : str or None - The path (if it exists). otherwise None - """ - msmb_data_dir = join(sys.prefix, 'share', 'msmb_data') - if exists(msmb_data_dir): - return msmb_data_dir - else: - return None - - -def _expand_and_makedir(data_home): - data_home = expanduser(data_home) - if not exists(data_home): - makedirs(data_home) - return data_home - - -def get_data_home(data_home=None): - """Return the path of the msmbuilder data dir. - - As of msmbuilder v3.6, this function will prefer data downloaded via - the msmb_data conda package (and located within the python installation - directory). If this package exists, we will use its data directory as - the data home. Otherwise, we use the old logic: - - This folder is used by some large dataset loaders to avoid - downloading the data several times. - - By default the data dir is set to a folder named 'msmbuilder_data' - in the user's home folder. - - Alternatively, it can be set by the 'MSMBUILDER_DATA' environment - variable or programmatically by giving an explicit folder path. The - '~' symbol is expanded to the user home folder. - - If the folder does not already exist, it is automatically created. - """ - if data_home is not None: - return _expand_and_makedir(data_home) - - msmb_data = has_msmb_data() - if msmb_data is not None: - return _expand_and_makedir(msmb_data) - - data_home = environ.get('MSMBUILDER_DATA', join('~', 'msmbuilder_data')) - return _expand_and_makedir(data_home) - - -def clear_data_home(data_home=None): - """Delete all the content of the data home cache.""" - data_home = get_data_home(data_home) - shutil.rmtree(data_home) diff --git a/msmexplorer/example_datasets/brownian1d.py b/msmexplorer/example_datasets/brownian1d.py deleted file mode 100644 index 2320ed7..0000000 --- a/msmexplorer/example_datasets/brownian1d.py +++ /dev/null @@ -1,257 +0,0 @@ -"""Very simple datasets of brownian dynamics in one dimension.""" -# Author: Robert McGibbon -# Contributors: -# Copyright (c) 2014, Stanford University -# All rights reserved. - -# ----------------------------------------------------------------------------- -# Imports -# ----------------------------------------------------------------------------- -from __future__ import absolute_import - -import time - -import numpy as np - -from .base import _NWell -from msmbuilder.msm import _solve_msm_eigensystem - -# ----------------------------------------------------------------------------- -# Globals -# ----------------------------------------------------------------------------- - -# DO NOT CHANGE THESE CONSTANTS WITHOUT UPDATING THE -# "DOUBLEWELL_DESCRIPTION" VARIABLE -DIFFUSION_CONST = 1e3 -DT = 1e-3 -DT_SQRT_2D = DT * np.sqrt(2 * DIFFUSION_CONST) - -__all__ = ['load_doublewell', 'load_quadwell', - 'doublewell_eigs', 'quadwell_eigs'] - - -# ----------------------------------------------------------------------------- -# User functions -# ----------------------------------------------------------------------------- - - - -class DoubleWell(_NWell): - r"""Brownian dynamics on a 1D double well potential - - Parameters - ---------- - data_home : optional, default: None - Specify another cache folder for the datasets. By default - all MSMBuilder data is stored in '~/msmbuilder_data' subfolders. - random_state : {int, None}, default: None - Seed the psuedorandom number generator to generate trajectories. If - seed is None, the global numpy PRNG is used. If random_state is an - int, the simulations will be cached in ``data_home``, or loaded from - ``data_home`` if simulations with that seed have been performed already. - With random_state=None, new simulations will be performed and the - trajectories will not be cached. - - Notes - ----- - This dataset consists of 10 trajectories simulated with Brownian dynamics on - the reduced potential function - - V(x) = 1 + cos(2x) - - with reflecting boundary conditions at x=-pi and x=pi. The simulations - are governed by the stochastic differential equation - - dx_t/dt = -\nabla V(x) + \sqrt{2D} * R(t), - - where R(t) is a standard normal white-noise process, and D=1e3. The - timsetep is 1e-3. Each trajectory is 10^5 steps long, and starts at - x_0 = 0. - """ - target_name = "doublewell" - n_trajectories = 10 - - def simulate_func(self, random): - return _simulate_doublewell(random) - - def potential(self, x): - return 1 + np.cos(2 * x) - - -def load_doublewell(data_home=None, random_state=None): - return DoubleWell(data_home, random_state).get() - - -load_doublewell.__doc__ = DoubleWell.__doc__ - - -class QuadWell(_NWell): - r"""Brownian dynamics on a 1D four well potential - - Parameters - ---------- - data_home : optional, default: None - Specify another cache folder for the datasets. By default - all MSMBuilder data is stored in '~/msmbuilder_data' subfolders. - random_state : {int, None}, default: None - Seed the psuedorandom number generator to generate trajectories. If - seed is None, the global numpy PRNG is used. If random_state is an - int, the simulations will be cached in ``data_home``, or loaded from - ``data_home`` if simulations with that seed have been performed already. - With random_state=None, new simulations will be performed and the - trajectories will not be cached. - - Notes - ----- - This dataset consists of 100 trajectories simulated with Brownian dynamics - on the reduced potential function - - V = 4(x^8 + 0.8 exp(-80 x^2) + 0.2 exp(-80 (x-0.5)^2) + 0.5 exp(-40 (x+0.5)^2)). - - The simulations are governed by the stochastic differential equation - - dx_t/dt = -\nabla V(x) + \sqrt{2D} * R(t), - - where R(t) is a standard normal white-noise process, and D=1e3. The timsetep - is 1e-3. Each trajectory is 10^3 steps long, and starts from a random - initial point sampled from the uniform distribution on [-1, 1]. - """ - - target_name = "quadwell" - n_trajectories = 100 - - def simulate_func(self, random): - return _simulate_quadwell(random) - - def potential(self, x): - return 4 * (x ** 8 + 0.8 * np.exp(-80 * x ** 2) + 0.2 * np.exp( - -80 * (x - 0.5) ** 2) + - 0.5 * np.exp(-40 * (x + 0.5) ** 2)) - - -def load_quadwell(data_home=None, random_state=None): - return QuadWell(data_home, random_state).get() - - -load_quadwell.__doc__ = QuadWell.__doc__ - - -def doublewell_eigs(n_grid, lag_time=1): - """Analytic eigenvalues/eigenvectors for the doublwell system - - TODO: DOCUMENT ME - """ - return _brownian_eigs(n_grid, lag_time, DOUBLEWELL_GRAD_POTENTIAL, - -np.pi, np.pi, reflect_bc=True) - - -def quadwell_eigs(n_grid, lag_time=1): - """Analytic eigenvalues/eigenvectors for the quadwell system - - TODO: DOCUMENT ME - """ - return _brownian_eigs(n_grid, lag_time, QUADWELL_GRAD_POTENTIAL, - -1.2, 1.2, reflect_bc=False) - - -# ----------------------------------------------------------------------------- -# Internal functions -# ----------------------------------------------------------------------------- - -DOUBLEWELL_GRAD_POTENTIAL = lambda x: -2 * np.sin(2 * x) -QUADWELL_GRAD_POTENTIAL = lambda x: 4 * ( - 8 * x ** 7 - 128 * x * np.exp(-80 * x ** 2) - \ - 32 * (x - 0.5) * np.exp(-80 * (x - 0.5) ** 2) - 40 * (x + 0.5) * np.exp( - -40 * (x + 0.5) ** 2)) - - -def _simulate_doublewell(random): - # DO NOT CHANGE THESE CONSTANTS WITHOUT UPDATING THE - # "DOUBLEWELL_DESCRIPTION" VARIABLE AND UPDATING THE VERSION NUMBER - # in the load_doublewell FUNCTION - x0 = 0 - n_steps = 1e5 - n_trajectories = 10 - - trajectories = [_propagate1d( - x0, n_steps, DOUBLEWELL_GRAD_POTENTIAL, random, bc_min=-np.pi, - bc_max=np.pi, verbose=True).reshape(-1, 1) - for i in range(n_trajectories)] - return trajectories - - -def _simulate_quadwell(random): - # DO NOT CHANGE THESE CONSTANTS WITHOUT UPDATING THE - # "QUADWELL_DESCRIPTION" VARIABLE AND UPDATING THE VERSION NUMBER - # in the load_quadwell FUNCTION - n_steps = 1e3 - n_trajectories = 100 - x0 = random.uniform(-1, 1, size=n_trajectories) - - trajectories = [_propagate1d( - x0[i], n_steps, QUADWELL_GRAD_POTENTIAL, - random=random, verbose=False).reshape(-1, 1) - for i in range(n_trajectories)] - return trajectories - - -def _reflect_boundary_conditions(x, min, max): - if x > max: - return 2 * max - x - if x < min: - return 2 * min - x - return x - - -def _propagate1d(x0, n_steps, grad_potential, random, bc_min=None, bc_max=None, - verbose=True): - start = time.time() - n_steps = int(n_steps) - - if bc_min is None and bc_max is None: - bc = lambda x: x - else: - bc = lambda x: _reflect_boundary_conditions(x, bc_min, bc_max) - - rand = random.randn(n_steps) - x = np.zeros(n_steps + 1) - x[0] = x0 - for i in range(n_steps): - x_i_plus_1 = x[i] - DT * grad_potential(x[i]) + DT_SQRT_2D * rand[i] - x[i + 1] = bc(x_i_plus_1) - - if verbose: - print('%d steps/s' % (n_steps / (time.time() - start))) - return x - - -def _brownian_eigs(n_grid, lag_time, grad_potential, xmin, xmax, reflect_bc): - """Analytic eigenvalues/eigenvectors for 1D Brownian dynamics - """ - - ONE_OVER_SQRT_2PI = 1.0 / (np.sqrt(2 * np.pi)) - normalpdf = lambda x: ONE_OVER_SQRT_2PI * np.exp(-0.5 * (x * x)) - - grid = np.linspace(xmin, xmax, n_grid) - width = grid[1] - grid[0] - transmat = np.zeros((n_grid, n_grid)) - for i, x_i in enumerate(grid): - if reflect_bc: - for offset in range(-(n_grid - 1), n_grid): - x_j = x_i + (offset * width) - j = _reflect_boundary_conditions(i + offset, 0, n_grid - 1) - - # What is the probability of going from x_i to x_j in one step? - diff = (x_j - x_i + DT * grad_potential(x_i)) / DT_SQRT_2D - transmat[i, j] += normalpdf(diff) - else: - for j, x_j in enumerate(grid): - # What is the probability of going from x_i to x_j in one step? - diff = (x_j - x_i + DT * grad_potential(x_i)) / DT_SQRT_2D - transmat[i, j] += normalpdf(diff) - - transmat[i, :] = transmat[i, :] / np.sum(transmat[i, :]) - - transmat = np.linalg.matrix_power(transmat, lag_time) - u, lv, rv = _solve_msm_eigensystem(transmat, k=len(transmat) - 1) - return u, rv diff --git a/msmexplorer/example_datasets/fs_peptide.py b/msmexplorer/example_datasets/fs_peptide.py deleted file mode 100644 index 3777bb9..0000000 --- a/msmexplorer/example_datasets/fs_peptide.py +++ /dev/null @@ -1,101 +0,0 @@ -# Author: Robert McGibbon -# Contributors: Matthew Harrigan -# Copyright (c) 2016, Stanford University and the Authors -# All rights reserved. - -# ----------------------------------------------------------------------------- -# Imports -# ----------------------------------------------------------------------------- -from glob import glob -from os.path import basename -from os.path import join - -import mdtraj as md - -from .base import Bunch, _MDDataset - -DATA_URL = "https://ndownloader.figshare.com/articles/1030363/versions/1" -TARGET_DIRECTORY = "fs_peptide" - - -class FsPeptide(_MDDataset): - """Fs peptide (implicit solvent) dataset - - Parameters - ---------- - data_home : optional, default: None - Specify another download and cache folder for the datasets. By default - all MSMBuilder data is stored in '~/msmbuilder_data' subfolders. - - Notes - ----- - This dataset consists of 28 molecular dynamics trajectories of Fs peptide - (Ace-A_5(AAARA)_3A-NME), a widely studied model system for protein folding. - Each trajectory is 500 ns in length, and saved at a 50 ps time interval (14 - us aggegrate sampling). The simulations were performed using the AMBER99SB-ILDN - force field with GBSA-OBC implicit solvent at 300K, starting from randomly - sampled conformations from an initial 400K unfolding simulation. The - simulations were performed with OpenMM 6.0.1. - - The dataset, including the script used to generate the dataset - is available on figshare at - - http://dx.doi.org/10.6084/m9.figshare.1030363 - """ - - target_directory = TARGET_DIRECTORY - data_url = DATA_URL - - def get_cached(self): - try: - top = md.load(join(self.data_dir, 'fs-peptide.pdb')) - except IOError: - top = md.load(join(self.data_dir, 'fs_peptide.pdb')) - trajectories = [] - for fn in sorted(glob(join(self.data_dir, 'trajectory*.xtc'))): - if self.verbose: - print('loading %s...' % basename(fn)) - trajectories.append(md.load(fn, top=top)) - - return Bunch(trajectories=trajectories, DESCR=self.description()) - - -class MinimalFsPeptide(_MDDataset): - """Minimal Fs peptide (implicit solvent) dataset - - Notes - ----- - This dataset consists of 28 molecular dynamics trajectories of Fs peptide - (Ace-A_5(AAARA)_3A-NME), a widely studied model system for protein folding. - Each trajectory is 500 ns in length, and saved at a 10 ns time interval (14 - us aggegrate sampling). The simulations were performed using the AMBER99SB-ILDN - force field with GBSA-OBC implicit solvent at 300K, starting from randomly - sampled conformations from an initial 400K unfolding simulation. The - simulations were performed with OpenMM 6.0.1. - - The dataset is a subsampling of the FsPeptide dataset described on - figshare at - - http://dx.doi.org/10.6084/m9.figshare.1030363 - """ - - target_directory = TARGET_DIRECTORY - data_url = DATA_URL - - def get_cached(self): - try: - top = md.load(join(self.data_dir, 'fs-peptide.pdb')) - except IOError: - top = md.load(join(self.data_dir, 'fs_peptide.pdb')) - trajectories = [ - md.load(fn, top=top, stride=200) - for fn in sorted(glob("{}/trajectory*.xtc".format(self.data_dir))) - ] - return Bunch(trajectories=trajectories, DESCR=self.description()) - - -def fetch_fs_peptide(data_home=None): - return FsPeptide(data_home).get() - - -fetch_fs_peptide.__doc__ = FsPeptide.__doc__ diff --git a/msmexplorer/example_datasets/met_enkephalin.py b/msmexplorer/example_datasets/met_enkephalin.py deleted file mode 100644 index 426f884..0000000 --- a/msmexplorer/example_datasets/met_enkephalin.py +++ /dev/null @@ -1,69 +0,0 @@ -# Author: Robert McGibbon -# Contributors: -# Copyright (c) 2014, Stanford University and the Authors -# All rights reserved. - -# ----------------------------------------------------------------------------- -# Imports -# ----------------------------------------------------------------------------- -from glob import glob -from os.path import join - -import mdtraj as md - -from .base import Bunch, _MDDataset - -DATA_URL = "https://ndownloader.figshare.com/articles/1026324/versions/1" -TARGET_DIRECTORY = "met_enkephalin" - - -class MetEnkephalin(_MDDataset): - """Loader for the met-enkephalin dataset - - Parameters - ---------- - data_home : optional, default: None - Specify another download and cache folder for the datasets. By default - all MSMBuilder data is stored in '~/msmbuilder_data' subfolders. - - download_if_missing: optional, True by default - If False, raise a IOError if the data is not locally available - instead of trying to download the data from the source site. - - Notes - ----- - The dataset consists of ten ~50 ns molecular dynamics (MD) simulation - trajectories of the 5 residue Met-enkaphalin peptide. The aggregate - sampling is 499.58 ns. Simulations were performed starting from the 1st - model in the 1PLX PDB file, solvated with 832 TIP3P water molecules using - OpenMM 6.0. The coordinates (protein only -- the water was stripped) - are saved every 5 picoseconds. Each of the ten trajectories is roughly - 50 ns long and contains about 10,000 snapshots. - - Forcefield: amber99sb-ildn; water: tip3p; nonbonded method: PME; cutoffs: - 1nm; bonds to hydrogen were constrained; integrator: langevin dynamics; - temperature: 300K; friction coefficient: 1.0/ps; pressure control: Monte - Carlo barostat (interval of 25 steps); timestep 2 fs. - - The dataset is available on figshare at - - http://dx.doi.org/10.6084/m9.figshare.1026324 - """ - - data_url = DATA_URL - target_directory = TARGET_DIRECTORY - - def get_cached(self): - top = md.load(join(self.data_dir, '1plx.pdb')) - trajectories = [] - for fn in glob(join(self.data_dir, 'trajectory*.dcd')): - trajectories.append(md.load(fn, top=top)) - - return Bunch(trajectories=trajectories, DESCR=self.description()) - - -def fetch_met_enkephalin(data_home=None): - return MetEnkephalin(data_home).get() - - -fetch_met_enkephalin.__doc__ = MetEnkephalin.__doc__ diff --git a/msmexplorer/example_datasets/muller.py b/msmexplorer/example_datasets/muller.py deleted file mode 100644 index 5492ff4..0000000 --- a/msmexplorer/example_datasets/muller.py +++ /dev/null @@ -1,118 +0,0 @@ -from multiprocessing import cpu_count -from multiprocessing.pool import ThreadPool - -import numpy as np - -from .base import _NWell -from ._muller import propagate, muller_potential - -__all__ = ['load_muller', 'MullerPotential'] - - -############################################################################### -# Constants -############################################################################### - -# DO NOT CHANGE THESE CONSTANTS WITHOUT UPDATING VERSION ATTRIBUTE -# AND THE DOCSTRING -MULLER_PARAMETERS = dict( - MIN_X=-1.5, MIN_Y=-0.2, - MAX_X=1.2, MAX_Y=2, - N_TRAJECTORIES=10, - N_STEPS=1000000, - THIN=100, - KT=1.5e4, - DT=0.1, - DIFFUSION_CONST=1e-2, - VERSION=1) - - -############################################################################### -# Code -############################################################################### - -class MullerPotential(_NWell): - """Browian dynamics on Muller potential dataset - - Parameters - ---------- - data_home : optional, default: None - Specify another cache folder for the datasets. By default - all MSMBuilder data is stored in '~/msmbuilder_data' subfolders. - random_state : {int, None}, default: None - Seed the psuedorandom number generator to generate trajectories. If - seed is None, the global numpy PRNG is used. If random_state is an - int, the simulations will be cached in ``data_home``, or loaded from - ``data_home`` if simulations with that seed have been performed already. - With random_state=None, new simulations will be performed and the - trajectories will not be cached. - - Notes - ----- - This dataset consists of 10 trajectories simulated with Brownian dynamics - on the Muller potential, a two-dimensional, three-well potential energy - surface. The potential is defined in [1]. The dynamics are governed by the - stochastic differential equation:: - - dx_t/dt = -\nabla V(x)/(kT) + \sqrt{2D} * R(t) - - where R(t) is a standard normal white-noise process, and D=1e-2. The - dynamics are discretized with an euler integrator with timsetep dt=0.1, - and kT=1.5e4 Each trajectory is simulated for 1000000 steps, and - coordinates are are saved every 100 steps. The starting points for the - trajectories are sampled from the uniform distribution over the rectangular - box between x=(-1.5, 1.2) and y=(-0.2, 2.0). - - References - ---------- - .. [1] Muller, Klaus, and Leo D. Brown. "Location of saddle points and minimum - energypaths by a constrained simplex optimization procedure." Theoretica - chimica acta 53.1 (1979): 75-93. - """ - - target_name = "muller" - version = MULLER_PARAMETERS['VERSION'] - n_trajectories = MULLER_PARAMETERS['N_TRAJECTORIES'] - - def simulate_func(self, random): - M = MULLER_PARAMETERS - x0s = random.uniform( - low=[M['MIN_X'], M['MIN_Y']], - high=[M['MAX_X'], M['MAX_Y']], - size=(M['N_TRAJECTORIES'], 2)) - - # propagate releases the GIL, so we can use a thread pool and - # get a nice speedup - tp = ThreadPool(cpu_count()) - return tp.map(lambda x0: - propagate( - n_steps=M['N_STEPS'], x0=x0, thin=M['THIN'], kT=M['KT'], - dt=M['DT'], D=M['DIFFUSION_CONST'], random_state=random, - min_x=M['MIN_X'], max_x=M['MAX_X'], min_y=M['MIN_Y'], - max_y=M['MAX_Y']), x0s) - - def potential(self, x, y): - return muller_potential(x, y) - - def plot(self, minx=-1.5, maxx=1.2, miny=-0.2, maxy=2, **kwargs): - """Helper function to plot the Muller potential - """ - import matplotlib.pyplot as pp - grid_width = max(maxx-minx, maxy-miny) / 200.0 - - ax = kwargs.pop('ax', None) - - xx, yy = np.mgrid[minx:maxx:grid_width, miny:maxy:grid_width] - V = self.potential(xx, yy) - # clip off any values greater than 200, since they mess up - # the color scheme - if ax is None: - ax = pp - - ax.contourf(xx, yy, V.clip(max=200), 40, **kwargs) - - -def load_muller(data_home=None, random_state=None): - return MullerPotential(data_home, random_state).get() - -load_muller.__doc__ = MullerPotential.__doc__ diff --git a/notebooks/Fs-Peptide-Example.ipynb b/notebooks/Fs-Peptide-Example.ipynb index 817c10f..eacb88a 100644 --- a/notebooks/Fs-Peptide-Example.ipynb +++ b/notebooks/Fs-Peptide-Example.ipynb @@ -19,6 +19,7 @@ "source": [ "%matplotlib inline\n", "\n", + "from msmbuilder.example_datasets import FsPeptide\n", "from msmbuilder.featurizer import DihedralFeaturizer\n", "from msmbuilder.decomposition import tICA\n", "from msmbuilder.preprocessing import RobustScaler\n", @@ -28,7 +29,7 @@ "import numpy as np\n", "\n", "import msmexplorer as msme\n", - "from msmexplorer.example_datasets import FsPeptide\n", + "\n", "rs = np.random.RandomState(42)" ] diff --git a/setup.py b/setup.py index 0cf7687..c423806 100644 --- a/setup.py +++ b/setup.py @@ -6,9 +6,7 @@ import sys import subprocess -import numpy as np -from os.path import join as pjoin -from setuptools import setup, Extension, find_packages +from setuptools import setup, find_packages from distutils.spawn import find_executable try: @@ -18,20 +16,9 @@ finally: sys.dont_write_bytecode = False -try: - import Cython - from Cython.Distutils import build_ext - - if Cython.__version__ < '0.18': - raise ImportError() -except ImportError: - print( - 'Cython version 0.18 or later is required. Try "conda install cython"') - sys.exit(1) - NAME = "msmexplorer" -VERSION = "0.4.0.dev0" -ISRELEASED = False +VERSION = "1.0.0" +ISRELEASED = True __version__ = VERSION @@ -49,13 +36,6 @@ def readme_to_rst(): } -extensions = [] -extensions.append( - Extension('msmexplorer.example_datasets._muller', - sources=[pjoin('msmexplorer', 'example_datasets', '_muller.pyx')], - include_dirs=[np.get_include()])) - - def main(**kwargs): write_version_py(VERSION, ISRELEASED, 'msmexplorer/version.py') @@ -92,8 +72,6 @@ def main(**kwargs): 'requirements.txt'], }, zip_safe=False, - ext_modules=extensions, - cmdclass={'build_ext': build_ext}, **kwargs )