diff --git a/README.md b/README.md
index 733d233..a356a4a 100644
--- a/README.md
+++ b/README.md
@@ -75,6 +75,8 @@ Dependencies
### Mandatory
+The latest versions of the following packages are required:
+
- [numpy](http://www.numpy.org/)
- [scipy](http://www.scipy.org/)
@@ -120,9 +122,18 @@ You may instead want to use the development version from Github, by running
Development
-----------
-https://github.com/msmexplorer/msmexplorer
+All development happens here, on
+[Github](https://github.com/msmexplorer/msmexplorer).
+
+If you're interested in contributing to MSMExplorer, please refer to our
+[Contributing](http://msmbuilder.org/msmexplorer/development/contributing.html)
+guide.
+
+Support
+-------
-Please [submit](https://github.com/msmexplorer/msmexplorer/issues/new) any bugs you encounter to the Github issue tracker.
+Please [submit](https://github.com/msmexplorer/msmexplorer/issues/new) any bugs
+or questions to the Github issue tracker.
License
-------
diff --git a/devtools/conda-recipe/meta.yaml b/devtools/conda-recipe/meta.yaml
index 09c2a2b..94407e9 100644
--- a/devtools/conda-recipe/meta.yaml
+++ b/devtools/conda-recipe/meta.yaml
@@ -13,8 +13,6 @@ requirements:
build:
- python
- setuptools
- - cython
- - numpy
run:
- python
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 78619db..f73a7a6 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -4,12 +4,8 @@ Changelog
=========
-v0.4.0 (Development)
---------------------
-
-API Changes
-~~~~~~~~~~~
-
+v1.0.0 (March 30, 2017)
+-----------------------
New Features
~~~~~~~~~~~~
@@ -20,8 +16,9 @@ New Features
Improvements
~~~~~~~~~~~~
-- The ``shade`` option now works for ``plot_free_energy`` in the 1D case.
-
+- The ``shade`` option now works for ``plot_free_energy`` in the 1D case (#76).
+- Fixed an issue where adding cluster centers would break the visualization
+if ``ndim`` > 2 (#70).
v0.3.0 (October 24, 2016)
-------------------------
diff --git a/docs/installing.rst b/docs/installing.rst
index 0ce95ba..835d1df 100644
--- a/docs/installing.rst
+++ b/docs/installing.rst
@@ -38,6 +38,8 @@ Dependencies
Mandatory dependencies
^^^^^^^^^^^^^^^^^^^^^^
+The latest versions of the following packages are required:
+
- `numpy `__
- `scipy `__
diff --git a/examples/plot_chord.py b/examples/plot_chord.py
index 74a41ce..00a9d44 100644
--- a/examples/plot_chord.py
+++ b/examples/plot_chord.py
@@ -2,20 +2,29 @@
Chord Diagram
=============
"""
+from msmbuilder.example_datasets import FsPeptide
+
import numpy as np
+import mdtraj as md
import msmexplorer as msme
from msmexplorer.utils import make_colormap
+# # Load Fs Peptide Data
+trajs = FsPeptide().get().trajectories
+
+# Compute Hydrogen Bonding Residue Pairs
+baker_hubbard = md.baker_hubbard(trajs[0])
+top = trajs[0].topology
+pairs = [(top.atom(di).residue.index, top.atom(ai).residue.index)
+ for di, _, ai in baker_hubbard]
-# Create a random square matrix
-rs = np.random.RandomState(42)
-n, p = 12, 12
-d = rs.normal(0, 2, (n, p))
-d += np.log(np.arange(1, p + 1)) * -5 + 10
+# Create Hydrogen Bonding Network
+hbonds = np.zeros((top.n_residues, top.n_residues))
+hbonds[list(zip(*pairs))] = 1.
-# Make a colormap
+# Make a Colormap
cmap = make_colormap(['rawdenim', 'lightgray', 'pomegranate'])
# Plot Chord Diagram
-msme.plot_chord(d, cmap=cmap, threshold=.2)
+msme.plot_chord(hbonds, cmap=cmap)
diff --git a/examples/plot_free_energy_1d.py b/examples/plot_free_energy_1d.py
index d7f8c53..48daa4e 100644
--- a/examples/plot_free_energy_1d.py
+++ b/examples/plot_free_energy_1d.py
@@ -2,6 +2,7 @@
Free Energy Plot (Univariate)
=============================
"""
+from msmbuilder.example_datasets import FsPeptide
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.decomposition import tICA
from msmbuilder.cluster import MiniBatchKMeans
@@ -10,7 +11,6 @@
import numpy as np
import msmexplorer as msme
-from msmexplorer.example_datasets import FsPeptide
rs = np.random.RandomState(42)
diff --git a/examples/plot_free_energy_2d.py b/examples/plot_free_energy_2d.py
index a7f93fb..282bccc 100644
--- a/examples/plot_free_energy_2d.py
+++ b/examples/plot_free_energy_2d.py
@@ -2,6 +2,7 @@
Free Energy Plot (Bivariate)
============================
"""
+from msmbuilder.example_datasets import FsPeptide
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.decomposition import tICA
from msmbuilder.cluster import MiniBatchKMeans
@@ -10,7 +11,6 @@
import numpy as np
import msmexplorer as msme
-from msmexplorer.example_datasets import FsPeptide
rs = np.random.RandomState(42)
diff --git a/examples/plot_histogram.py b/examples/plot_histogram.py
index f82fcb7..2879eea 100644
--- a/examples/plot_histogram.py
+++ b/examples/plot_histogram.py
@@ -2,13 +2,14 @@
Histogram Plot
==============
"""
+from msmbuilder.example_datasets import FsPeptide
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.decomposition import tICA
import numpy as np
import msmexplorer as msme
-from msmexplorer.example_datasets import FsPeptide
+
# Load Fs Peptide Data
trajs = FsPeptide().get().trajectories
diff --git a/examples/plot_msm_network.py b/examples/plot_msm_network.py
index 72eacf0..07a5948 100644
--- a/examples/plot_msm_network.py
+++ b/examples/plot_msm_network.py
@@ -2,6 +2,7 @@
MSM Network Plot
================
"""
+from msmbuilder.example_datasets import FsPeptide
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.decomposition import tICA
from msmbuilder.cluster import MiniBatchKMeans
@@ -10,7 +11,6 @@
import numpy as np
import msmexplorer as msme
-from msmexplorer.example_datasets import FsPeptide
rs = np.random.RandomState(42)
diff --git a/examples/plot_pop_resids.py b/examples/plot_pop_resids.py
index 8fc5e24..0052e0f 100644
--- a/examples/plot_pop_resids.py
+++ b/examples/plot_pop_resids.py
@@ -2,6 +2,7 @@
Population Residuals Plot
=========================
"""
+from msmbuilder.example_datasets import FsPeptide
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.decomposition import tICA
from msmbuilder.cluster import MiniBatchKMeans
@@ -10,7 +11,6 @@
import numpy as np
import msmexplorer as msme
-from msmexplorer.example_datasets import FsPeptide
rs = np.random.RandomState(42)
diff --git a/examples/plot_timescales.py b/examples/plot_timescales.py
index 0559fcf..dd03951 100644
--- a/examples/plot_timescales.py
+++ b/examples/plot_timescales.py
@@ -2,6 +2,7 @@
Timescales Plot
===============
"""
+from msmbuilder.example_datasets import FsPeptide
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.decomposition import tICA
from msmbuilder.cluster import MiniBatchKMeans
@@ -10,7 +11,6 @@
import numpy as np
import msmexplorer as msme
-from msmexplorer.example_datasets import FsPeptide
rs = np.random.RandomState(42)
diff --git a/examples/plot_tpaths.py b/examples/plot_tpaths.py
index 8c3ac3f..8fe6f44 100644
--- a/examples/plot_tpaths.py
+++ b/examples/plot_tpaths.py
@@ -2,6 +2,7 @@
Transition Pathway Plot
=======================
"""
+from msmbuilder.example_datasets import FsPeptide
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.decomposition import tICA
from msmbuilder.cluster import MiniBatchKMeans
@@ -10,7 +11,6 @@
import numpy as np
import msmexplorer as msme
-from msmexplorer.example_datasets import FsPeptide
rs = np.random.RandomState(42)
diff --git a/examples/plot_trace.py b/examples/plot_trace.py
index bfa7219..2639e99 100644
--- a/examples/plot_trace.py
+++ b/examples/plot_trace.py
@@ -2,10 +2,10 @@
Trace Plot
==========
"""
+from msmbuilder.example_datasets import FsPeptide
from msmbuilder.featurizer import RMSDFeaturizer
import msmexplorer as msme
-from msmexplorer.example_datasets import FsPeptide
# Load Fs Peptide Data
traj = FsPeptide().get().trajectories[0]
diff --git a/msmexplorer/example_datasets/.gitignore b/msmexplorer/example_datasets/.gitignore
deleted file mode 100644
index 14dfc67..0000000
--- a/msmexplorer/example_datasets/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-_muller.c
-*.so
diff --git a/msmexplorer/example_datasets/__init__.py b/msmexplorer/example_datasets/__init__.py
deleted file mode 100644
index 8790b10..0000000
--- a/msmexplorer/example_datasets/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from .base import get_data_home, clear_data_home, has_msmb_data
-from .brownian1d import DoubleWell, QuadWell
-from .brownian1d import load_doublewell, load_quadwell
-from .brownian1d import doublewell_eigs, quadwell_eigs
-from .alanine_dipeptide import fetch_alanine_dipeptide, AlanineDipeptide
-from .met_enkephalin import fetch_met_enkephalin, MetEnkephalin
-from .fs_peptide import fetch_fs_peptide, FsPeptide, MinimalFsPeptide
-from .muller import MullerPotential, load_muller
-
-__all__ = [
- 'get_data_home',
- 'clear_data_home',
- 'has_msmb_data',
- 'load_doublewell',
- 'load_quadwell',
- 'doublewell_eigs',
- 'quadwell_eigs',
- 'fetch_alanine_dipeptide',
- 'fetch_met_enkephalin',
- 'fetch_fs_peptide',
- 'AlanineDipeptide',
- 'MetEnkephalin',
- 'FsPeptide',
- 'MinimalFsPeptide',
- 'DoubleWell',
- 'QuadWell',
- 'Muller',
- 'load_muller',
-]
diff --git a/msmexplorer/example_datasets/_muller.pyx b/msmexplorer/example_datasets/_muller.pyx
deleted file mode 100644
index b6f01d4..0000000
--- a/msmexplorer/example_datasets/_muller.pyx
+++ /dev/null
@@ -1,97 +0,0 @@
-import numpy as np
-from sklearn.utils import check_random_state
-from libc.math cimport exp, sqrt
-from numpy cimport npy_intp
-
-cdef double *MULLER_aa = [-1, -1, -6.5, 0.7]
-cdef double *MULLER_bb = [0, 0, 11, 0.6]
-cdef double *MULLER_cc = [-10, -10, -6.5, 0.7]
-cdef double *MULLER_AA = [-200, -100, -170, 15]
-cdef double *MULLER_XX = [1, 0, -0.5, -1]
-cdef double *MULLER_YY = [0, 0.5, 1.5, 1]
-
-
-def propagate(int n_steps=5000, x0=None, int thin=1,
- double kT=1.5e4, double dt=0.1, double D=0.010,
- random_state=None, double min_x=-np.inf, double max_x=np.inf,
- double min_y=-np.inf, double max_y=np.inf):
- random = check_random_state(random_state)
- if x0 is None:
- x0 = (-0.5, 0.5)
- cdef int i, j
- cdef int save_index = 0
- cdef double DT_SQRT_2D = dt * sqrt(2 * D)
- cdef double beta = 1.0 / kT
- cdef double[:, ::1] r = random.randn(n_steps, 2)
- cdef double[:, ::1] saved_x = np.zeros(((n_steps)/thin, 2))
- cdef double x = x0[0]
- cdef double y = x0[1]
- cdef double[2] grad
-
- with nogil:
- for i in range(n_steps):
- _muller_grad(x, y, beta, &grad[0])
-
- # Brownian update
- x = x - dt * grad[0] + DT_SQRT_2D * r[i, 0]
- y = y - dt * grad[1] + DT_SQRT_2D * r[i, 1]
-
- if x > max_x:
- x = 2 * max_x - x
- if x < min_x:
- x = 2 * min_x - x
- if y > max_y:
- y = 2 * max_y - y
- if y < min_y:
- y = 2 * min_y - y
-
- if i % thin == 0:
- saved_x[save_index, 0] = x
- saved_x[save_index, 1] = y
- save_index += 1
-
- return np.asarray(saved_x)
-
-
-def muller_potential(x, y, beta=1):
- """Muller potential.
-
- This function can take vectors (x,y) as input and will broadcast
- """
- wrapper = lambda x, y, beta: _muller_potential(x, y, beta)
- return np.vectorize(wrapper)(x, y, beta)
-
-
-cdef double _muller_potential(double x, double y, double beta=1) nogil:
- cdef npy_intp j
- cdef double value = 0
-
- for j in range(4):
- value += MULLER_AA[j] * exp(
- MULLER_aa[j] * (x - MULLER_XX[j])**2
- + MULLER_bb[j] * (x - MULLER_XX[j]) * (y - MULLER_YY[j])
- + MULLER_cc[j] * (y - MULLER_YY[j])**2)
- return beta * value
-
-
-cdef void _muller_grad(double x, double y, double beta, double grad[2]) nogil:
- cdef int j
- cdef double value = 0
- cdef double term
- grad[0] = 0
- grad[1] = 0
-
- for j in range(4):
- # this is the potential term
- term = MULLER_AA[j] * exp(
- MULLER_aa[j] * (x - MULLER_XX[j])**2
- + MULLER_bb[j] * (x - MULLER_XX[j]) * (y - MULLER_YY[j])
- + MULLER_cc[j] * (y - MULLER_YY[j])**2)
-
- grad[0] += (2 * MULLER_aa[j] * (x - MULLER_XX[j])
- + MULLER_bb[j] * (y - MULLER_YY[j])) * term
- grad[1] += (MULLER_bb[j] * (x - MULLER_XX[j])
- + 2 * MULLER_cc[j] * (y - MULLER_YY[j])) * term
-
- grad[0] *= beta
- grad[1] *= beta
diff --git a/msmexplorer/example_datasets/alanine_dipeptide.py b/msmexplorer/example_datasets/alanine_dipeptide.py
deleted file mode 100644
index ab64349..0000000
--- a/msmexplorer/example_datasets/alanine_dipeptide.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Author: Robert McGibbon
-# Contributors: Matthew Harrigan
-# Copyright (c) 2016, Stanford University and the Authors
-# All rights reserved.
-
-# -----------------------------------------------------------------------------
-# Imports
-# -----------------------------------------------------------------------------
-from glob import glob
-from os.path import join
-
-import mdtraj as md
-
-from .base import Bunch, _MDDataset
-
-DATA_URL = "https://ndownloader.figshare.com/articles/1026131/versions/8"
-TARGET_DIRECTORY = "alanine_dipeptide"
-
-
-class AlanineDipeptide(_MDDataset):
- """Alanine dipeptide dataset
-
- Parameters
- ----------
- data_home : optional, default: None
- Specify another download and cache folder for the datasets. By default
- all MSMBuilder data is stored in '~/msmbuilder_data' subfolders.
-
-
- Notes
- -----
- The dataset consists of ten 10ns trajectories of of alanine dipeptide,
- simulated using OpenMM 6.0.1 (CUDA platform, NVIDIA GTX660) with the
- AMBER99SB-ILDN force field at 300K (langevin dynamics, friction coefficient
- of 91/ps, timestep of 2fs) with GBSA implicit solvent. The coordinates are
- saved every 1ps. Each trajectory contains 9,999 snapshots.
-
- The dataset, including the script used to generate the dataset
- is available on figshare at
-
- http://dx.doi.org/10.6084/m9.figshare.1026131
- """
- target_directory = TARGET_DIRECTORY
- data_url = DATA_URL
-
- def get_cached(self):
- top = md.load(join(self.data_dir, 'ala2.pdb'))
- trajectories = []
- for fn in glob(join(self.data_dir, 'trajectory*.dcd')):
- trajectories.append(md.load(fn, top=top))
-
- return Bunch(trajectories=trajectories, DESCR=self.description())
-
-
-def fetch_alanine_dipeptide(data_home=None):
- return AlanineDipeptide(data_home).get()
-
-
-fetch_alanine_dipeptide.__doc__ = AlanineDipeptide.__doc__
diff --git a/msmexplorer/example_datasets/base.py b/msmexplorer/example_datasets/base.py
deleted file mode 100644
index 1789332..0000000
--- a/msmexplorer/example_datasets/base.py
+++ /dev/null
@@ -1,267 +0,0 @@
-import numbers
-import shutil
-import sys
-import time
-from functools import wraps
-from io import BytesIO
-from os import environ
-from os import makedirs
-from os.path import exists
-from os.path import expanduser
-from os.path import join
-from zipfile import ZipFile
-
-import numpy as np
-from six.moves.urllib.error import HTTPError
-from six.moves.urllib.request import urlopen
-from sklearn.utils import check_random_state
-
-
-def retry(max_retries=1):
- """ Retry a function `max_retries` times. """
-
- def retry_func(func):
- @wraps(func)
- def wrapper(*args, **kwargs):
- num_retries = 0
- while num_retries <= max_retries:
- try:
- ret = func(*args, **kwargs)
- break
- except HTTPError:
- if num_retries == max_retries:
- raise
- num_retries += 1
- time.sleep(5)
- return ret
-
- return wrapper
-
- return retry_func
-
-
-class Dataset(object):
- @classmethod
- def description(cls):
- """Get a description from the Notes section of the docstring."""
- lines = [s.strip() for s in cls.__doc__.splitlines()]
- note_i = lines.index("Notes")
- return "\n".join(lines[note_i + 2:])
-
- def cache(self):
- raise NotImplementedError
-
- def get(self):
- raise NotImplementedError
-
- def get_cached(self):
- raise NotImplementedError
-
-
-class _MDDataset(Dataset):
- target_directory = "" # set in subclass
- data_url = "" # set in subclass
-
- def __init__(self, data_home=None, verbose=True):
- self.data_home = get_data_home(data_home)
- self.data_dir = join(self.data_home, self.target_directory)
- self.cached = False
- self.verbose = verbose
-
- def _msmbdata_cache(self):
- if self.verbose:
- print("Copying {} from msmb_data package to {}"
- .format(self.target_directory, self.data_home))
- msmb_data = has_msmb_data()
- assert msmb_data is not None
- shutil.copytree("{}/{}".format(msmb_data, self.target_directory),
- self.data_dir)
-
- @retry(3)
- def _figshare_cache(self):
- if self.verbose:
- print('downloading {} from {} to {}'
- .format(self.target_directory, self.data_url,
- self.data_home))
- fhandle = urlopen(self.data_url)
- buf = BytesIO(fhandle.read())
- zip_file = ZipFile(buf)
- makedirs(self.data_dir)
- for name in zip_file.namelist():
- zip_file.extract(name, path=self.data_dir)
-
- @retry(3)
- def cache(self):
- if not exists(self.data_home):
- makedirs(self.data_home)
-
- if not exists(self.data_dir):
- if has_msmb_data() is not None:
- self._msmbdata_cache()
- else:
- self._figshare_cache()
- elif self.verbose:
- print("{} already is cached".format(self.target_directory))
-
- self.cached = True
-
- def get_cached(self):
- raise NotImplementedError
-
- def get(self):
- if not self.cached:
- self.cache()
- return self.get_cached()
-
-
-class _NWell(Dataset):
- """Base class for brownian dynamics on a potential
-
- Parameters
- ----------
- data_home : optional, default: None
- Specify another cache folder for the datasets. By default
- all MSMBuilder data is stored in '~/msmbuilder_data' subfolders.
- random_state : {int, None}, default: None
- Seed the psuedorandom number generator to generate trajectories. If
- seed is None, the global numpy PRNG is used. If random_state is an
- int, the simulations will be cached in ``data_home``, or loaded from
- ``data_home`` if simulations with that seed have been performed already.
- With random_state=None, new simulations will be performed and the
- trajectories will not be cached.
- """
-
- target_name = "" # define in subclass
- n_trajectories = 0 # define in subclass
- version = 1 # override in subclass if parameters are updated
-
- def __init__(self, data_home=None, random_state=None):
- self.data_home = get_data_home(data_home)
- self.data_dir = join(self.data_home, self.target_name)
- self.random_state = random_state
- self.cache_path = self._get_cache_path(random_state)
-
- def _get_cache_path(self, random_state):
- path = "{}/version-{}/randomstate-{}".format(self.data_dir,
- self.version,
- self.random_state)
- return path
-
- def _load(self, path):
- return [np.load("{}/{}.npy".format(path, i))
- for i in range(self.n_trajectories)]
-
- def _save(self, path, trajectories):
- assert len(trajectories) == self.n_trajectories
- if not exists(path):
- makedirs(path)
- for i, traj in enumerate(trajectories):
- np.save("{}/{}.npy".format(path, i), traj)
-
- def cache(self):
- random = check_random_state(self.random_state)
- if not exists(self.data_dir):
- makedirs(self.data_dir)
-
- if self.random_state is None:
- trajectories = self.simulate_func(random)
- return trajectories
-
- if not isinstance(self.random_state, numbers.Integral):
- raise TypeError('random_state must be an int')
- if exists(self.cache_path):
- return self._load(self.cache_path)
-
- trajectories = self.simulate_func(random)
- self._save(self.cache_path, trajectories)
- return trajectories
-
- def get_cached(self):
- if self.cache_path is None:
- raise ValueError("You must specify a random state to get "
- "cached trajectories.")
- trajectories = self._load(self.cache_path)
- return Bunch(trajectories=trajectories, DESCR=self.description())
-
- def get(self):
- trajectories = self.cache()
- return Bunch(trajectories=trajectories, DESCR=self.description())
-
- def simulate_func(self, random):
- # Implement in subclass
- raise NotImplementedError
-
- def potential(self, x):
- # Implement in subclass
- raise NotImplementedError
-
-
-class Bunch(dict):
- """Container object for datasets: dictionary-like object that
- exposes its keys as attributes."""
-
- def __init__(self, **kwargs):
- dict.__init__(self, kwargs)
- self.__dict__ = self
-
-
-def has_msmb_data():
- """We provide a conda package containing the saved data.
-
- This package was introduced because the figshare downloads could
- be 'iffy' at times.
-
- Returns
- -------
- path : str or None
- The path (if it exists). otherwise None
- """
- msmb_data_dir = join(sys.prefix, 'share', 'msmb_data')
- if exists(msmb_data_dir):
- return msmb_data_dir
- else:
- return None
-
-
-def _expand_and_makedir(data_home):
- data_home = expanduser(data_home)
- if not exists(data_home):
- makedirs(data_home)
- return data_home
-
-
-def get_data_home(data_home=None):
- """Return the path of the msmbuilder data dir.
-
- As of msmbuilder v3.6, this function will prefer data downloaded via
- the msmb_data conda package (and located within the python installation
- directory). If this package exists, we will use its data directory as
- the data home. Otherwise, we use the old logic:
-
- This folder is used by some large dataset loaders to avoid
- downloading the data several times.
-
- By default the data dir is set to a folder named 'msmbuilder_data'
- in the user's home folder.
-
- Alternatively, it can be set by the 'MSMBUILDER_DATA' environment
- variable or programmatically by giving an explicit folder path. The
- '~' symbol is expanded to the user home folder.
-
- If the folder does not already exist, it is automatically created.
- """
- if data_home is not None:
- return _expand_and_makedir(data_home)
-
- msmb_data = has_msmb_data()
- if msmb_data is not None:
- return _expand_and_makedir(msmb_data)
-
- data_home = environ.get('MSMBUILDER_DATA', join('~', 'msmbuilder_data'))
- return _expand_and_makedir(data_home)
-
-
-def clear_data_home(data_home=None):
- """Delete all the content of the data home cache."""
- data_home = get_data_home(data_home)
- shutil.rmtree(data_home)
diff --git a/msmexplorer/example_datasets/brownian1d.py b/msmexplorer/example_datasets/brownian1d.py
deleted file mode 100644
index 2320ed7..0000000
--- a/msmexplorer/example_datasets/brownian1d.py
+++ /dev/null
@@ -1,257 +0,0 @@
-"""Very simple datasets of brownian dynamics in one dimension."""
-# Author: Robert McGibbon
-# Contributors:
-# Copyright (c) 2014, Stanford University
-# All rights reserved.
-
-# -----------------------------------------------------------------------------
-# Imports
-# -----------------------------------------------------------------------------
-from __future__ import absolute_import
-
-import time
-
-import numpy as np
-
-from .base import _NWell
-from msmbuilder.msm import _solve_msm_eigensystem
-
-# -----------------------------------------------------------------------------
-# Globals
-# -----------------------------------------------------------------------------
-
-# DO NOT CHANGE THESE CONSTANTS WITHOUT UPDATING THE
-# "DOUBLEWELL_DESCRIPTION" VARIABLE
-DIFFUSION_CONST = 1e3
-DT = 1e-3
-DT_SQRT_2D = DT * np.sqrt(2 * DIFFUSION_CONST)
-
-__all__ = ['load_doublewell', 'load_quadwell',
- 'doublewell_eigs', 'quadwell_eigs']
-
-
-# -----------------------------------------------------------------------------
-# User functions
-# -----------------------------------------------------------------------------
-
-
-
-class DoubleWell(_NWell):
- r"""Brownian dynamics on a 1D double well potential
-
- Parameters
- ----------
- data_home : optional, default: None
- Specify another cache folder for the datasets. By default
- all MSMBuilder data is stored in '~/msmbuilder_data' subfolders.
- random_state : {int, None}, default: None
- Seed the psuedorandom number generator to generate trajectories. If
- seed is None, the global numpy PRNG is used. If random_state is an
- int, the simulations will be cached in ``data_home``, or loaded from
- ``data_home`` if simulations with that seed have been performed already.
- With random_state=None, new simulations will be performed and the
- trajectories will not be cached.
-
- Notes
- -----
- This dataset consists of 10 trajectories simulated with Brownian dynamics on
- the reduced potential function
-
- V(x) = 1 + cos(2x)
-
- with reflecting boundary conditions at x=-pi and x=pi. The simulations
- are governed by the stochastic differential equation
-
- dx_t/dt = -\nabla V(x) + \sqrt{2D} * R(t),
-
- where R(t) is a standard normal white-noise process, and D=1e3. The
- timsetep is 1e-3. Each trajectory is 10^5 steps long, and starts at
- x_0 = 0.
- """
- target_name = "doublewell"
- n_trajectories = 10
-
- def simulate_func(self, random):
- return _simulate_doublewell(random)
-
- def potential(self, x):
- return 1 + np.cos(2 * x)
-
-
-def load_doublewell(data_home=None, random_state=None):
- return DoubleWell(data_home, random_state).get()
-
-
-load_doublewell.__doc__ = DoubleWell.__doc__
-
-
-class QuadWell(_NWell):
- r"""Brownian dynamics on a 1D four well potential
-
- Parameters
- ----------
- data_home : optional, default: None
- Specify another cache folder for the datasets. By default
- all MSMBuilder data is stored in '~/msmbuilder_data' subfolders.
- random_state : {int, None}, default: None
- Seed the psuedorandom number generator to generate trajectories. If
- seed is None, the global numpy PRNG is used. If random_state is an
- int, the simulations will be cached in ``data_home``, or loaded from
- ``data_home`` if simulations with that seed have been performed already.
- With random_state=None, new simulations will be performed and the
- trajectories will not be cached.
-
- Notes
- -----
- This dataset consists of 100 trajectories simulated with Brownian dynamics
- on the reduced potential function
-
- V = 4(x^8 + 0.8 exp(-80 x^2) + 0.2 exp(-80 (x-0.5)^2) + 0.5 exp(-40 (x+0.5)^2)).
-
- The simulations are governed by the stochastic differential equation
-
- dx_t/dt = -\nabla V(x) + \sqrt{2D} * R(t),
-
- where R(t) is a standard normal white-noise process, and D=1e3. The timsetep
- is 1e-3. Each trajectory is 10^3 steps long, and starts from a random
- initial point sampled from the uniform distribution on [-1, 1].
- """
-
- target_name = "quadwell"
- n_trajectories = 100
-
- def simulate_func(self, random):
- return _simulate_quadwell(random)
-
- def potential(self, x):
- return 4 * (x ** 8 + 0.8 * np.exp(-80 * x ** 2) + 0.2 * np.exp(
- -80 * (x - 0.5) ** 2) +
- 0.5 * np.exp(-40 * (x + 0.5) ** 2))
-
-
-def load_quadwell(data_home=None, random_state=None):
- return QuadWell(data_home, random_state).get()
-
-
-load_quadwell.__doc__ = QuadWell.__doc__
-
-
-def doublewell_eigs(n_grid, lag_time=1):
- """Analytic eigenvalues/eigenvectors for the doublwell system
-
- TODO: DOCUMENT ME
- """
- return _brownian_eigs(n_grid, lag_time, DOUBLEWELL_GRAD_POTENTIAL,
- -np.pi, np.pi, reflect_bc=True)
-
-
-def quadwell_eigs(n_grid, lag_time=1):
- """Analytic eigenvalues/eigenvectors for the quadwell system
-
- TODO: DOCUMENT ME
- """
- return _brownian_eigs(n_grid, lag_time, QUADWELL_GRAD_POTENTIAL,
- -1.2, 1.2, reflect_bc=False)
-
-
-# -----------------------------------------------------------------------------
-# Internal functions
-# -----------------------------------------------------------------------------
-
-DOUBLEWELL_GRAD_POTENTIAL = lambda x: -2 * np.sin(2 * x)
-QUADWELL_GRAD_POTENTIAL = lambda x: 4 * (
- 8 * x ** 7 - 128 * x * np.exp(-80 * x ** 2) - \
- 32 * (x - 0.5) * np.exp(-80 * (x - 0.5) ** 2) - 40 * (x + 0.5) * np.exp(
- -40 * (x + 0.5) ** 2))
-
-
-def _simulate_doublewell(random):
- # DO NOT CHANGE THESE CONSTANTS WITHOUT UPDATING THE
- # "DOUBLEWELL_DESCRIPTION" VARIABLE AND UPDATING THE VERSION NUMBER
- # in the load_doublewell FUNCTION
- x0 = 0
- n_steps = 1e5
- n_trajectories = 10
-
- trajectories = [_propagate1d(
- x0, n_steps, DOUBLEWELL_GRAD_POTENTIAL, random, bc_min=-np.pi,
- bc_max=np.pi, verbose=True).reshape(-1, 1)
- for i in range(n_trajectories)]
- return trajectories
-
-
-def _simulate_quadwell(random):
- # DO NOT CHANGE THESE CONSTANTS WITHOUT UPDATING THE
- # "QUADWELL_DESCRIPTION" VARIABLE AND UPDATING THE VERSION NUMBER
- # in the load_quadwell FUNCTION
- n_steps = 1e3
- n_trajectories = 100
- x0 = random.uniform(-1, 1, size=n_trajectories)
-
- trajectories = [_propagate1d(
- x0[i], n_steps, QUADWELL_GRAD_POTENTIAL,
- random=random, verbose=False).reshape(-1, 1)
- for i in range(n_trajectories)]
- return trajectories
-
-
-def _reflect_boundary_conditions(x, min, max):
- if x > max:
- return 2 * max - x
- if x < min:
- return 2 * min - x
- return x
-
-
-def _propagate1d(x0, n_steps, grad_potential, random, bc_min=None, bc_max=None,
- verbose=True):
- start = time.time()
- n_steps = int(n_steps)
-
- if bc_min is None and bc_max is None:
- bc = lambda x: x
- else:
- bc = lambda x: _reflect_boundary_conditions(x, bc_min, bc_max)
-
- rand = random.randn(n_steps)
- x = np.zeros(n_steps + 1)
- x[0] = x0
- for i in range(n_steps):
- x_i_plus_1 = x[i] - DT * grad_potential(x[i]) + DT_SQRT_2D * rand[i]
- x[i + 1] = bc(x_i_plus_1)
-
- if verbose:
- print('%d steps/s' % (n_steps / (time.time() - start)))
- return x
-
-
-def _brownian_eigs(n_grid, lag_time, grad_potential, xmin, xmax, reflect_bc):
- """Analytic eigenvalues/eigenvectors for 1D Brownian dynamics
- """
-
- ONE_OVER_SQRT_2PI = 1.0 / (np.sqrt(2 * np.pi))
- normalpdf = lambda x: ONE_OVER_SQRT_2PI * np.exp(-0.5 * (x * x))
-
- grid = np.linspace(xmin, xmax, n_grid)
- width = grid[1] - grid[0]
- transmat = np.zeros((n_grid, n_grid))
- for i, x_i in enumerate(grid):
- if reflect_bc:
- for offset in range(-(n_grid - 1), n_grid):
- x_j = x_i + (offset * width)
- j = _reflect_boundary_conditions(i + offset, 0, n_grid - 1)
-
- # What is the probability of going from x_i to x_j in one step?
- diff = (x_j - x_i + DT * grad_potential(x_i)) / DT_SQRT_2D
- transmat[i, j] += normalpdf(diff)
- else:
- for j, x_j in enumerate(grid):
- # What is the probability of going from x_i to x_j in one step?
- diff = (x_j - x_i + DT * grad_potential(x_i)) / DT_SQRT_2D
- transmat[i, j] += normalpdf(diff)
-
- transmat[i, :] = transmat[i, :] / np.sum(transmat[i, :])
-
- transmat = np.linalg.matrix_power(transmat, lag_time)
- u, lv, rv = _solve_msm_eigensystem(transmat, k=len(transmat) - 1)
- return u, rv
diff --git a/msmexplorer/example_datasets/fs_peptide.py b/msmexplorer/example_datasets/fs_peptide.py
deleted file mode 100644
index 3777bb9..0000000
--- a/msmexplorer/example_datasets/fs_peptide.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Author: Robert McGibbon
-# Contributors: Matthew Harrigan
-# Copyright (c) 2016, Stanford University and the Authors
-# All rights reserved.
-
-# -----------------------------------------------------------------------------
-# Imports
-# -----------------------------------------------------------------------------
-from glob import glob
-from os.path import basename
-from os.path import join
-
-import mdtraj as md
-
-from .base import Bunch, _MDDataset
-
-DATA_URL = "https://ndownloader.figshare.com/articles/1030363/versions/1"
-TARGET_DIRECTORY = "fs_peptide"
-
-
-class FsPeptide(_MDDataset):
- """Fs peptide (implicit solvent) dataset
-
- Parameters
- ----------
- data_home : optional, default: None
- Specify another download and cache folder for the datasets. By default
- all MSMBuilder data is stored in '~/msmbuilder_data' subfolders.
-
- Notes
- -----
- This dataset consists of 28 molecular dynamics trajectories of Fs peptide
- (Ace-A_5(AAARA)_3A-NME), a widely studied model system for protein folding.
- Each trajectory is 500 ns in length, and saved at a 50 ps time interval (14
- us aggegrate sampling). The simulations were performed using the AMBER99SB-ILDN
- force field with GBSA-OBC implicit solvent at 300K, starting from randomly
- sampled conformations from an initial 400K unfolding simulation. The
- simulations were performed with OpenMM 6.0.1.
-
- The dataset, including the script used to generate the dataset
- is available on figshare at
-
- http://dx.doi.org/10.6084/m9.figshare.1030363
- """
-
- target_directory = TARGET_DIRECTORY
- data_url = DATA_URL
-
- def get_cached(self):
- try:
- top = md.load(join(self.data_dir, 'fs-peptide.pdb'))
- except IOError:
- top = md.load(join(self.data_dir, 'fs_peptide.pdb'))
- trajectories = []
- for fn in sorted(glob(join(self.data_dir, 'trajectory*.xtc'))):
- if self.verbose:
- print('loading %s...' % basename(fn))
- trajectories.append(md.load(fn, top=top))
-
- return Bunch(trajectories=trajectories, DESCR=self.description())
-
-
-class MinimalFsPeptide(_MDDataset):
- """Minimal Fs peptide (implicit solvent) dataset
-
- Notes
- -----
- This dataset consists of 28 molecular dynamics trajectories of Fs peptide
- (Ace-A_5(AAARA)_3A-NME), a widely studied model system for protein folding.
- Each trajectory is 500 ns in length, and saved at a 10 ns time interval (14
- us aggegrate sampling). The simulations were performed using the AMBER99SB-ILDN
- force field with GBSA-OBC implicit solvent at 300K, starting from randomly
- sampled conformations from an initial 400K unfolding simulation. The
- simulations were performed with OpenMM 6.0.1.
-
- The dataset is a subsampling of the FsPeptide dataset described on
- figshare at
-
- http://dx.doi.org/10.6084/m9.figshare.1030363
- """
-
- target_directory = TARGET_DIRECTORY
- data_url = DATA_URL
-
- def get_cached(self):
- try:
- top = md.load(join(self.data_dir, 'fs-peptide.pdb'))
- except IOError:
- top = md.load(join(self.data_dir, 'fs_peptide.pdb'))
- trajectories = [
- md.load(fn, top=top, stride=200)
- for fn in sorted(glob("{}/trajectory*.xtc".format(self.data_dir)))
- ]
- return Bunch(trajectories=trajectories, DESCR=self.description())
-
-
-def fetch_fs_peptide(data_home=None):
- return FsPeptide(data_home).get()
-
-
-fetch_fs_peptide.__doc__ = FsPeptide.__doc__
diff --git a/msmexplorer/example_datasets/met_enkephalin.py b/msmexplorer/example_datasets/met_enkephalin.py
deleted file mode 100644
index 426f884..0000000
--- a/msmexplorer/example_datasets/met_enkephalin.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Author: Robert McGibbon
-# Contributors:
-# Copyright (c) 2014, Stanford University and the Authors
-# All rights reserved.
-
-# -----------------------------------------------------------------------------
-# Imports
-# -----------------------------------------------------------------------------
-from glob import glob
-from os.path import join
-
-import mdtraj as md
-
-from .base import Bunch, _MDDataset
-
-DATA_URL = "https://ndownloader.figshare.com/articles/1026324/versions/1"
-TARGET_DIRECTORY = "met_enkephalin"
-
-
-class MetEnkephalin(_MDDataset):
- """Loader for the met-enkephalin dataset
-
- Parameters
- ----------
- data_home : optional, default: None
- Specify another download and cache folder for the datasets. By default
- all MSMBuilder data is stored in '~/msmbuilder_data' subfolders.
-
- download_if_missing: optional, True by default
- If False, raise a IOError if the data is not locally available
- instead of trying to download the data from the source site.
-
- Notes
- -----
- The dataset consists of ten ~50 ns molecular dynamics (MD) simulation
- trajectories of the 5 residue Met-enkaphalin peptide. The aggregate
- sampling is 499.58 ns. Simulations were performed starting from the 1st
- model in the 1PLX PDB file, solvated with 832 TIP3P water molecules using
- OpenMM 6.0. The coordinates (protein only -- the water was stripped)
- are saved every 5 picoseconds. Each of the ten trajectories is roughly
- 50 ns long and contains about 10,000 snapshots.
-
- Forcefield: amber99sb-ildn; water: tip3p; nonbonded method: PME; cutoffs:
- 1nm; bonds to hydrogen were constrained; integrator: langevin dynamics;
- temperature: 300K; friction coefficient: 1.0/ps; pressure control: Monte
- Carlo barostat (interval of 25 steps); timestep 2 fs.
-
- The dataset is available on figshare at
-
- http://dx.doi.org/10.6084/m9.figshare.1026324
- """
-
- data_url = DATA_URL
- target_directory = TARGET_DIRECTORY
-
- def get_cached(self):
- top = md.load(join(self.data_dir, '1plx.pdb'))
- trajectories = []
- for fn in glob(join(self.data_dir, 'trajectory*.dcd')):
- trajectories.append(md.load(fn, top=top))
-
- return Bunch(trajectories=trajectories, DESCR=self.description())
-
-
-def fetch_met_enkephalin(data_home=None):
- return MetEnkephalin(data_home).get()
-
-
-fetch_met_enkephalin.__doc__ = MetEnkephalin.__doc__
diff --git a/msmexplorer/example_datasets/muller.py b/msmexplorer/example_datasets/muller.py
deleted file mode 100644
index 5492ff4..0000000
--- a/msmexplorer/example_datasets/muller.py
+++ /dev/null
@@ -1,118 +0,0 @@
-from multiprocessing import cpu_count
-from multiprocessing.pool import ThreadPool
-
-import numpy as np
-
-from .base import _NWell
-from ._muller import propagate, muller_potential
-
-__all__ = ['load_muller', 'MullerPotential']
-
-
-###############################################################################
-# Constants
-###############################################################################
-
-# DO NOT CHANGE THESE CONSTANTS WITHOUT UPDATING VERSION ATTRIBUTE
-# AND THE DOCSTRING
-MULLER_PARAMETERS = dict(
- MIN_X=-1.5, MIN_Y=-0.2,
- MAX_X=1.2, MAX_Y=2,
- N_TRAJECTORIES=10,
- N_STEPS=1000000,
- THIN=100,
- KT=1.5e4,
- DT=0.1,
- DIFFUSION_CONST=1e-2,
- VERSION=1)
-
-
-###############################################################################
-# Code
-###############################################################################
-
-class MullerPotential(_NWell):
- """Browian dynamics on Muller potential dataset
-
- Parameters
- ----------
- data_home : optional, default: None
- Specify another cache folder for the datasets. By default
- all MSMBuilder data is stored in '~/msmbuilder_data' subfolders.
- random_state : {int, None}, default: None
- Seed the psuedorandom number generator to generate trajectories. If
- seed is None, the global numpy PRNG is used. If random_state is an
- int, the simulations will be cached in ``data_home``, or loaded from
- ``data_home`` if simulations with that seed have been performed already.
- With random_state=None, new simulations will be performed and the
- trajectories will not be cached.
-
- Notes
- -----
- This dataset consists of 10 trajectories simulated with Brownian dynamics
- on the Muller potential, a two-dimensional, three-well potential energy
- surface. The potential is defined in [1]. The dynamics are governed by the
- stochastic differential equation::
-
- dx_t/dt = -\nabla V(x)/(kT) + \sqrt{2D} * R(t)
-
- where R(t) is a standard normal white-noise process, and D=1e-2. The
- dynamics are discretized with an euler integrator with timsetep dt=0.1,
- and kT=1.5e4 Each trajectory is simulated for 1000000 steps, and
- coordinates are are saved every 100 steps. The starting points for the
- trajectories are sampled from the uniform distribution over the rectangular
- box between x=(-1.5, 1.2) and y=(-0.2, 2.0).
-
- References
- ----------
- .. [1] Muller, Klaus, and Leo D. Brown. "Location of saddle points and minimum
- energypaths by a constrained simplex optimization procedure." Theoretica
- chimica acta 53.1 (1979): 75-93.
- """
-
- target_name = "muller"
- version = MULLER_PARAMETERS['VERSION']
- n_trajectories = MULLER_PARAMETERS['N_TRAJECTORIES']
-
- def simulate_func(self, random):
- M = MULLER_PARAMETERS
- x0s = random.uniform(
- low=[M['MIN_X'], M['MIN_Y']],
- high=[M['MAX_X'], M['MAX_Y']],
- size=(M['N_TRAJECTORIES'], 2))
-
- # propagate releases the GIL, so we can use a thread pool and
- # get a nice speedup
- tp = ThreadPool(cpu_count())
- return tp.map(lambda x0:
- propagate(
- n_steps=M['N_STEPS'], x0=x0, thin=M['THIN'], kT=M['KT'],
- dt=M['DT'], D=M['DIFFUSION_CONST'], random_state=random,
- min_x=M['MIN_X'], max_x=M['MAX_X'], min_y=M['MIN_Y'],
- max_y=M['MAX_Y']), x0s)
-
- def potential(self, x, y):
- return muller_potential(x, y)
-
- def plot(self, minx=-1.5, maxx=1.2, miny=-0.2, maxy=2, **kwargs):
- """Helper function to plot the Muller potential
- """
- import matplotlib.pyplot as pp
- grid_width = max(maxx-minx, maxy-miny) / 200.0
-
- ax = kwargs.pop('ax', None)
-
- xx, yy = np.mgrid[minx:maxx:grid_width, miny:maxy:grid_width]
- V = self.potential(xx, yy)
- # clip off any values greater than 200, since they mess up
- # the color scheme
- if ax is None:
- ax = pp
-
- ax.contourf(xx, yy, V.clip(max=200), 40, **kwargs)
-
-
-def load_muller(data_home=None, random_state=None):
- return MullerPotential(data_home, random_state).get()
-
-load_muller.__doc__ = MullerPotential.__doc__
diff --git a/notebooks/Fs-Peptide-Example.ipynb b/notebooks/Fs-Peptide-Example.ipynb
index 817c10f..eacb88a 100644
--- a/notebooks/Fs-Peptide-Example.ipynb
+++ b/notebooks/Fs-Peptide-Example.ipynb
@@ -19,6 +19,7 @@
"source": [
"%matplotlib inline\n",
"\n",
+ "from msmbuilder.example_datasets import FsPeptide\n",
"from msmbuilder.featurizer import DihedralFeaturizer\n",
"from msmbuilder.decomposition import tICA\n",
"from msmbuilder.preprocessing import RobustScaler\n",
@@ -28,7 +29,7 @@
"import numpy as np\n",
"\n",
"import msmexplorer as msme\n",
- "from msmexplorer.example_datasets import FsPeptide\n",
+
"\n",
"rs = np.random.RandomState(42)"
]
diff --git a/setup.py b/setup.py
index 0cf7687..c423806 100644
--- a/setup.py
+++ b/setup.py
@@ -6,9 +6,7 @@
import sys
import subprocess
-import numpy as np
-from os.path import join as pjoin
-from setuptools import setup, Extension, find_packages
+from setuptools import setup, find_packages
from distutils.spawn import find_executable
try:
@@ -18,20 +16,9 @@
finally:
sys.dont_write_bytecode = False
-try:
- import Cython
- from Cython.Distutils import build_ext
-
- if Cython.__version__ < '0.18':
- raise ImportError()
-except ImportError:
- print(
- 'Cython version 0.18 or later is required. Try "conda install cython"')
- sys.exit(1)
-
NAME = "msmexplorer"
-VERSION = "0.4.0.dev0"
-ISRELEASED = False
+VERSION = "1.0.0"
+ISRELEASED = True
__version__ = VERSION
@@ -49,13 +36,6 @@ def readme_to_rst():
}
-extensions = []
-extensions.append(
- Extension('msmexplorer.example_datasets._muller',
- sources=[pjoin('msmexplorer', 'example_datasets', '_muller.pyx')],
- include_dirs=[np.get_include()]))
-
-
def main(**kwargs):
write_version_py(VERSION, ISRELEASED, 'msmexplorer/version.py')
@@ -92,8 +72,6 @@ def main(**kwargs):
'requirements.txt'],
},
zip_safe=False,
- ext_modules=extensions,
- cmdclass={'build_ext': build_ext},
**kwargs
)