Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add InterRDF_s #70

Merged
merged 20 commits into from
Nov 2, 2018
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions pmda/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,182 @@ def _reduce(res, result_single_frame):
# Add two numpy arrays
res += result_single_frame
return res


class InterRDF_s(ParallelAnalysisBase):
"""Site-specific intermolecular pair distribution function

Arguments
---------
u : Universe
a Universe that contains atoms in `ags`
ags : list
a list of pairs of :class:`~MDAnalysis.core.groups.AtomGroup`
instances
nbins : int (optional)
Number of bins in the histogram [75]
range : tuple or list (optional)
The size of the RDF [0.0, 15.0]

Example
-------

First create the :class:`InterRDF_s` object, by supplying one Universe and
one list of pairs of AtomGroups, then use the :meth:`~InterRDF_s.run`
method::

from MDAnalysisTests.datafiles import GRO_MEMPROT, XTC_MEMPROT
u = mda.Universe(GRO_MEMPROT, XTC_MEMPROT)

s1 = u.select_atoms('name ZND and resid 289')
s2 = u.select_atoms('(name OD1 or name OD2) and resid 51 and sphzone 5.0
(resid 289)')
s3 = u.select_atoms('name ZND and (resid 291 or resid 292)')
s4 = u.select_atoms('(name OD1 or name OD2) and sphzone 5.0 (resid 291)')
ags = [[s1, s2], [s3, s4]]

rdf = InterRDF_s(u, ags)
rdf.run()

Results are available through the :attr:`bins` and :attr:`rdf` attributes::

plt.plot(rdf.bins, rdf.rdf[0][0][0])

(Which plots the rdf between the first atom in ``s1`` and the first atom in
``s2``)

To generate the *cumulative distribution function* (cdf), use the
:meth:`~InterRDF_s.get_cdf` method ::

cdf = rdf.get_cdf()

Results are available through the :attr:'cdf' attribute::

plt.plot(rdf.bins, rdf.cdf[0][0][0])

(Which plots the cdf between the first atom in ``s1`` and the first atom in
``s2``)

See Also
--------
MDAnalysis.analysis.rdf.InterRDF_s


.. versionadded:: 0.2.0
"""

# pylint: disable=redefined-builtin
# continue to use 'range' as long as MDAnalysis uses it so that
# the user interface remains consistent
def __init__(self, u, ags,
nbins=75, range=(0.0, 15.0), density=True):
atomgroups = []
for pair in ags:
atomgroups.append(pair[0])
atomgroups.append(pair[1])
super(InterRDF_s, self).__init__(u, atomgroups)

# List of pairs of AtomGroups
self.ags = ags
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, the failure is due to this line. distributed cannot pass atomgroups to blocks.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems related to PR #65 .

(Also, thanks for raising #79 )

self._density = density
self.n = len(ags)
self.nf = u.trajectory.n_frames
self.rdf_settings = {'bins': nbins,
'range': range}

# pylint: enable=redefined-builtin

def _prepare(self):
# Empty list to store the RDF
count, edges = np.histogram([-1], **self.rdf_settings)
self.len = len(count)
self.edges = edges
self.bins = 0.5 * (edges[:-1] + edges[1:])

# Need to know average volume
self.volume = 0.0
self._maxrange = self.rdf_settings['range'][1]

def _single_frame(self, ts, atomgroups):
ags = [[atomgroups[2*i], atomgroups[2*i+1]] for i in range(self.n)]
count = [np.zeros((ag1.n_atoms, ag2.n_atoms, self.len),
dtype=np.float64) for ag1, ag2 in ags]
for i, (ag1, ag2) in enumerate(ags):
u = ag1.universe
pairs, dist = distances.capped_distance(ag1.positions,
ag2.positions,
self._maxrange,
box=u.dimensions)

for j, (idx1, idx2) in enumerate(pairs):
count[i][idx1, idx2, :] = np.histogram(dist[j],
**self.rdf_settings)[0]

volume = u.trajectory.ts.volume

return np.array([np.array(count), np.array(volume, dtype=np.float64)])
orbeckst marked this conversation as resolved.
Show resolved Hide resolved

def _conclude(self):
self.count = np.sum(self._results[:, 0])
self.volume = np.sum(self._results[:, 1])
# Volume in each radial shell
vol = np.power(self.edges[1:], 3) - np.power(self.edges[:-1], 3)
vol *= 4/3.0 * np.pi

# Empty lists to restore indices, RDF
indices = []
rdf = []

for i, (ag1, ag2) in enumerate(self.ags):
# Number of each selection
nA = len(ag1)
nB = len(ag2)
N = nA * nB
indices.append([ag1.indices, ag2.indices])

# Average number density
box_vol = self.volume / self.nf
density = N / box_vol

if self._density:
rdf.append(self.count[i] / (density * vol * self.nf))
else:
rdf.append(self.count[i] / (vol * self.nf))

self.rdf = rdf
self.indices = indices

def get_cdf(self):
"""Calculate the cumulative distribution functions (CDF) for all sites.
Note that this is the actual count within a given radius, i.e.,
:math:`N(r)`.
orbeckst marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
cdf : list
list of arrays with the same structure as :attr:`rdf`
"""
# Calculate cumulative distribution function
# Empty list to restore CDF
cdf = []

for count in self.count:
cdf.append(np.cumsum(count, axis=2) / self.nf)

# Results stored in self.cdf
# self.cdf is a list of cdf between pairs of AtomGroups in ags
self.cdf = cdf

return cdf

@staticmethod
def _reduce(res, result_single_frame):
""" 'add' action for an accumulator"""
if isinstance(res, list) and len(res) == 0:
# Convert res from an empty list to a numpy array
# which has the same shape as the single frame result
res = result_single_frame
else:
# Add two numpy arrays
res += result_single_frame
return res
19 changes: 3 additions & 16 deletions pmda/test/test_rdf.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,11 @@
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# PMDA
# Copyright (c) 2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
from __future__ import absolute_import

import pytest
Expand Down
118 changes: 118 additions & 0 deletions pmda/test/test_rdf_s.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# PMDA
# Copyright (c) 2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
from __future__ import absolute_import, division

import pytest

from numpy.testing import assert_almost_equal

import MDAnalysis as mda
import numpy as np
from pmda.rdf import InterRDF_s
from MDAnalysis.analysis import rdf

from MDAnalysisTests.datafiles import GRO_MEMPROT, XTC_MEMPROT


@pytest.fixture(scope='module')
def u():
return mda.Universe(GRO_MEMPROT, XTC_MEMPROT)


@pytest.fixture(scope='module')
def sels(u):
s1 = u.select_atoms('name ZND and resid 289')
s2 = u.select_atoms(
'(name OD1 or name OD2) and resid 51 and sphzone 5.0 (resid 289)')
s3 = u.select_atoms('name ZND and (resid 291 or resid 292)')
s4 = u.select_atoms('(name OD1 or name OD2) and sphzone 5.0 (resid 291)')
ags = [[s1, s2], [s3, s4]]
return ags


@pytest.fixture(scope='module')
def rdf_s(u, sels):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test with different schedulers:

def rdf_s(u, sels, scheduler)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(The scheduler fixture is defined in conftest.py and should run this calculation with all dask schedulers.)

return InterRDF_s(u, sels).run()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you test with different n_blocks?

I don't know if this is actually testing multiple blocks or not. We need to know that the "accumulator" is working with multiple blocks.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should work with @pytest.mark.parametrize.



def test_nbins(u):
ags = sels(u)
rdf = InterRDF_s(u, ags, nbins=412).run()

assert len(rdf.bins) == 412


def test_range(u):
ags = sels(u)
rmin, rmax = 1.0, 13.0
rdf = InterRDF_s(u, ags, range=(rmin, rmax)).run()

assert rdf.edges[0] == rmin
assert rdf.edges[-1] == rmax


def test_count_size(rdf_s):
# ZND vs OD1 & OD2
# should see 2 elements in rdf.count
# 1 element in rdf.count[0]
# 2 elements in rdf.count[0][0]
# 2 elements in rdf.count[1]
# 2 elements in rdf.count[1][0]
# 2 elements in rdf.count[1][1]
assert len(rdf_s.count) == 2
assert len(rdf_s.count[0]) == 1
assert len(rdf_s.count[0][0]) == 2
assert len(rdf_s.count[1]) == 2
assert len(rdf_s.count[1][0]) == 2
assert len(rdf_s.count[1][1]) == 2


def test_count(rdf_s):
# should see one distance with 5 counts in count[0][0][1]
# should see one distance with 3 counts in count[1][1][0]
assert len(rdf_s.count[0][0][1][rdf_s.count[0][0][1] == 5]) == 1
assert len(rdf_s.count[1][1][0][rdf_s.count[1][1][0] == 3]) == 1


def test_double_run(rdf_s):
# running rdf twice should give the same result
assert len(rdf_s.count[0][0][1][rdf_s.count[0][0][1] == 5]) == 1
assert len(rdf_s.count[1][1][0][rdf_s.count[1][1][0] == 3]) == 1


def test_cdf(rdf_s):
rdf_s.get_cdf()
assert rdf_s.cdf[0][0][0][-1] == rdf_s.count[0][0][0].sum()/rdf_s.nf


def test_reduce(rdf_s):
# should see numpy.array addtion
res = []
single_frame = np.array([np.array([1, 2]), np.array([3])])
res = rdf_s._reduce(res, single_frame)
res = rdf_s._reduce(res, single_frame)
assert_almost_equal(res[0], np.array([2, 4]))
assert_almost_equal(res[1], np.array([6]))


def test_same_result(u, sels):
# should see same results from analysis.rdf.InterRDF_s
# and pmda.rdf.InterRDF_s
nrdf = rdf.InterRDF_s(u, sels).run()
prdf = InterRDF_s(u, sels).run(n_jobs=3)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@orbeckst Here is one that is testing multiple blocks. Maybe I should add one test for "n_blocks=1" case

assert_almost_equal(nrdf.count[0][0][0], prdf.count[0][0][0])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why compare just one element? Shouldn't

assert_almost_equal(nrdf.count, prdf.count)

work?

assert_almost_equal(nrdf.rdf[0][0][0], prdf.rdf[0][0][0])


@pytest.mark.parametrize("density, value", [
(True, 13275.775528444701),
(False, 0.021915460340071267)])
def test_density(u, sels, density, value):
rdf = InterRDF_s(u, sels, density=density).run()
assert_almost_equal(max(rdf.rdf[0][0][0]), value)