Skip to content
This repository has been archived by the owner on Jan 26, 2022. It is now read-only.

Water #360

Closed
wants to merge 33 commits into from
Closed

Water #360

Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
380cdc8
Create indices of water atoms
mpharrigan Nov 16, 2013
40c440c
Initial commit of solvent fingerprint metric
mpharrigan Nov 16, 2013
b57b2d8
Include the metric in command line
mpharrigan Nov 16, 2013
30914ad
Added outer product assignment
mpharrigan Nov 16, 2013
736deca
Added parser type=float to fix command line sigma setting
mpharrigan Nov 20, 2013
7c5e3a1
Changed to contact.atom_distance for calculation.
mpharrigan Nov 22, 2013
7144793
Fixed error with unequal length trajectories
mpharrigan Dec 12, 2013
ed6600a
Use mdtraj distance calculations
mpharrigan Jan 9, 2014
911168b
Merge branch 'msmb2.8' of github.com:SimTk/msmbuilder into water
mpharrigan Jan 10, 2014
30cc3ed
Merge branch 'msmb2.8' into water
mpharrigan Jan 10, 2014
6a1fd1a
Merge branch 'msmb2.8' into water
mpharrigan Jan 10, 2014
d0c5aa7
Fix solvent fingerprint metric to use mdtraj trajectories
mpharrigan Jan 10, 2014
fd80637
formatting
mpharrigan Jan 10, 2014
0de3f02
Bugfix
mpharrigan Jan 13, 2014
398b3e1
Formatting
mpharrigan Jan 13, 2014
d643fa7
Merge branch 'msmb2.8' of github.com:SimTk/msmbuilder into water
mpharrigan Jan 28, 2014
b23dcc2
Merge pull request #1 from mpharrigan/water_pull
mpharrigan Jan 28, 2014
a3f866e
use mdtraj.io.loadh instead of msmbuilder
mpharrigan Jan 28, 2014
f501239
Merge the SaveStructures.py script from master
lilipeng Feb 14, 2014
1c8b43d
Merge pull request #2 from lilipeng/water
mpharrigan Feb 14, 2014
2d6278a
Typo
mpharrigan Feb 15, 2014
b52aae2
Merge pull request #3 from mpharrigan/water_typofix
mpharrigan Feb 15, 2014
e43b16f
Merge branch 'master' of github.com:SimTk/msmbuilder into water
mpharrigan Feb 15, 2014
c539a48
Get rid of extraneous #####'s from merge
mpharrigan Feb 15, 2014
f3023e2
Merge pull request #4 from mpharrigan/water_mergefromsimtk
mpharrigan Feb 15, 2014
d6d0207
More descriptive, removed main() function, changed date
mpharrigan Feb 15, 2014
4421d71
Address issues
mpharrigan Feb 15, 2014
ce9956e
small typo
mpharrigan Feb 15, 2014
d2bb38e
small typo
mpharrigan Feb 15, 2014
150b656
Be clear that 'water' just means water oxygens
mpharrigan Feb 15, 2014
7d11a1f
Moved outer product assignment code to a function in assigning
mpharrigan Feb 15, 2014
4205310
Renumbering
mpharrigan Feb 15, 2014
2e0f94c
Merge branch 'water' of github.com:mpharrigan/msmbuilder into water
mpharrigan Feb 15, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions scripts/AssignOuter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python
# This file is part of MSMBuilder.
#
# Copyright 2011 Stanford University
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe 2014?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch

#
# MSMBuilder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA


from msmbuilder import arglib
from mdtraj import io
from msmbuilder.metrics import solvent
import logging
logger = logging.getLogger('msmbuilder.scripts.AssignOuter')

parser = arglib.ArgumentParser(description='''Assign data to the outer product
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we explain this a little more?

space of two preliminary
assignments''')
parser.add_argument('assignment1', default='./Data1/Assignments.h5',
help='First assignment file')
parser.add_argument('assignment2', default='./Data2/Assignments.h5',
help='Second assignment file')
parser.add_argument('assignment_out', default='OuterProductAssignments.h5',
help='Output file')


def main(ass1_fn, ass2_fn):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this function get called anywhere?

hierarchical = Hierarchical.load_from_disk(zmatrix_fn)
assignments = hierarchical.get_assignments(k=k, cutoff_distance=d)
return assignments

if __name__ == "__main__":
args = parser.parse_args()
arglib.die_if_path_exists(args.assignment_out)

opa = solvent.OuterProductAssignment(args.assignment1, args.assignment2)
new_assignments = opa.get_product_assignments()
io.saveh(args.assignment_out, new_assignments)
logger.info('Saved outer product assignments to %s', args.assignment_out)
19 changes: 15 additions & 4 deletions scripts/CreateAtomIndices.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,25 @@
logger = logging.getLogger('msmbuilder.scripts.CreateAtomIndices')



parser = arglib.ArgumentParser(
description="Creates an atom indices file for RMSD from a PDB.")
parser.add_argument('pdb')
parser.add_argument('output', default='AtomIndices.dat')
parser.add_argument('atom_type', help='''Atoms to include in index file.

One of four options: (1) minimal (CA, CB, C, N, O, recommended), (2) heavy,
(3) alpha (carbons), or (4) all. Use "all" in cases where protein
(3) alpha (carbons), (4) water, or (5) all. Use "all" in cases where protein
nomenclature may be inapproprate, although you may want to define your own
indices in such situations. Note that "heavy" keeps all heavy atoms that
are not symmetry equivalent. By symmetry equivalent, we mean atoms
identical under an exchange of labels. For example, heavy will exclude
the two pairs of equivalent carbons (CD, CE) in a PHE ring.
Note that AtomIndices.dat should be zero-indexed--that is, a 0
the two pairs of equivalent carbons (CD, CE) in a PHE ring.
Note that AtomIndices.dat should be zero-indexed--that is, a 0
in AtomIndices.dat corresponds to the first atom in your PDB''',
choices=['minimal', 'heavy', 'alpha', 'all'], default='minimal')
choices=['minimal', 'heavy', 'alpha', 'all', 'water'],
default='minimal')



def run(PDBfn, atomtype):
Expand Down Expand Up @@ -122,6 +126,7 @@ def run(PDBfn, atomtype):
"CNLE": ["N", "CA", "CB", "C", "O", "CG", "CD", "CE"],
"NNLE": ["N", "CA", "CB", "C", "O", "CG", "CD", "CE"],
"SOL": [],
"HOH": [],
"Cl-": [],
"Na+": []
}
Expand All @@ -135,6 +140,12 @@ def run(PDBfn, atomtype):
elif atomtype == 'alpha':
for key in toKeepDict.keys():
toKeepDict[key] = ["CA"]
elif atomtype == 'water':
for key in toKeepDict.keys():
if key == "SOL" or key == 'HOH':
toKeepDict[key] = ["O"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make it clear in the help text for this script that only the water oxygen coordinates are used?

else:
toKeepDict[key] = []
elif atomtype == "all":
pass
else:
Expand Down
1 change: 1 addition & 0 deletions src/python/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@
from hybrid import Hybrid, HybridPNorm
from projection import RedDimPNorm
from positions import Positions
from solvent import SolventFp
64 changes: 41 additions & 23 deletions src/python/metrics/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from msmbuilder.metrics import (RMSD, Dihedral, BooleanContact,
AtomPairs, ContinuousContact,
AbstractDistanceMetric, Vectorized,
RedDimPNorm, Positions)
RedDimPNorm, Positions, SolventFp)

def add_argument(group, *args, **kwargs):
if 'default' in kwargs:
Expand All @@ -20,7 +20,6 @@ def add_argument(group, *args, **kwargs):
kwargs['help'] = d
group.add_argument(*args, **kwargs)

################################################################################

def locate_metric_plugins(name):
if not name in ['add_metric_parser', 'construct_metric', 'metric_class']:
Expand All @@ -32,7 +31,7 @@ def locate_metric_plugins(name):
def add_metric_parsers(parser):

metric_parser_list = []

metric_subparser = parser.add_subparsers(dest='metric', description ='Available metrics to use.')

#metrics_parsers = parser.add_subparsers(description='Available metrics to use.',dest='metric')
Expand All @@ -42,7 +41,7 @@ def add_metric_parsers(parser):
between two structures, first they are rotated and translated with respect
to one another to achieve maximum coincidence. This code is executed in parallel
on multiple cores (but not multiple boxes) using OMP.''')
add_argument(rmsd, '-a', dest='rmsd_atom_indices', help='Atom indices to use in RMSD calculation. Pass "all" to use all atoms.',
add_argument(rmsd, '-a', dest='rmsd_atom_indices', help='Atom indices to use in RMSD calculation. Pass "all" to use all atoms.',
default='AtomIndices.dat')
metric_parser_list.append(rmsd)

Expand All @@ -54,7 +53,7 @@ def add_metric_parsers(parser):
frames are computed by taking distances between these vectors in R^n. The
euclidean distance is recommended, but other distance metrics are available
(cityblock, etc). This code is executed in parallel on multiple cores (but
not multiple boxes) using OMP. ''')
not multiple boxes) using OMP. ''')
add_argument(dihedral, '-a', dest='dihedral_angles', default='phi/psi',
help='which dihedrals. Choose from phi, psi, chi (to choose multiple, seperate them with a slash), or user')
add_argument(dihedral, '-f', dest='dihedral_userfilename', default='DihedralIndices.dat', help='filename for dihedral indices, N lines of 4 space-separated indices (otherwise ignored)')
Expand All @@ -74,7 +73,7 @@ def add_metric_parsers(parser):
Furthermore, the sense in which the distance between two residues is computed can be
either specified as "CA", "closest", or "closest-heavy", which will respectively compute
("CA") the distance between the residues' alpha carbons, ("closest"), the closest distance between any pair of
atoms i and j such that i belongs to one residue and j to the other residue, ("closest-heavy"),
atoms i and j such that i belongs to one residue and j to the other residue, ("closest-heavy"),
or the closest distance between any pair of NON-HYDROGEN atoms i and j such that i belongs to
one residue and j to the other residue. This code is executed in parallel on multiple cores (but
not multiple boxes) using OMP.''')
Expand All @@ -89,14 +88,27 @@ def add_metric_parsers(parser):
atompairs = metric_subparser.add_parser('atompairs',description='''ATOMPAIRS: For each frame, we
represent the conformation as a vector of particular atom-atom distances. Then the distance
between frames is calculated using a specified norm on these vectors. This code is executed in
parallel (but not multiple boxes) using OMP.''')
parallel (but not multiple boxes) using OMP.''')
add_argument(atompairs, '-a', dest='atompairs_which',
help='path to file with 2D array of which atompairs to use.', default='AtomPairs.dat')
add_argument(atompairs, '-p', dest='atompairs_p', default=2, help='p used for metric=minkowski (otherwise ignored)')
add_argument(atompairs, '-m', dest='atompairs_metric', default='euclidean',
help='which distance metric', choices=AtomPairs.allowable_scipy_metrics)
metric_parser_list.append(atompairs)

solventfp = metric_subparser.add_parser('solventfp', description='''SOLVENTFP: Get
the solvent fingerprint.''')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we put a little bit more description here. Maybe a citation to the Gu paper would be sufficient.

add_argument(solventfp, '-w', dest='solventfp_wateri', default='WaterIndices.dat',
help='Indices of the solvent (water) atoms')
add_argument(solventfp, '-v', dest='solventfp_proti', default='ProteinIndices.dat',
help='Indices of the solute (protein) atoms')
add_argument(solventfp, '-s', dest='solventfp_sigma', default=0.5,
type=float, help='std. dev. of gaussian kernel')
add_argument(solventfp, '-p', dest='solventfp_p', default=2, help='p used for metric=minkowski (otherwise ignored)')
add_argument(solventfp, '-m', dest='solventfp_metric', default='euclidean',
help='which distance metric', choices=SolventFp.allowable_scipy_metrics)
metric_parser_list.append(solventfp)

positions = metric_subparser.add_parser('positions', description="""POSITIONS: For each frame
we represent the conformation as a vector of atom positions, where the atoms have been
aligned to a target structure.""")
Expand All @@ -111,13 +123,13 @@ def add_metric_parsers(parser):
tica = metric_subparser.add_parser( 'tica', description='''
tICA: This metric is based on a variation of PCA which looks for the slowest d.o.f.
in the simulation data. See (Schwantes, C.R., Pande, V.S. JCTC 2013, 9 (4), 2000-09.)
for more details. In addition to these options, you must provide an additional
for more details. In addition to these options, you must provide an additional
metric you used to prepare the trajectories in the training step.''')

add_argument(tica,'-p', dest='p', help='p value for p-norm')
add_argument(tica,'-m', dest='projected_metric', help='metric to use in the projected space',
choices=Vectorized.allowable_scipy_metrics, default='euclidean')
add_argument(tica, '-f', dest='tica_fn',
add_argument(tica, '-f', dest='tica_fn',
help='tICA Object which was prepared by tICA_train.py')
add_argument(tica, '-n', dest='num_vecs', type=int,
help='Choose the top <-n> eigenvectors based on their eigenvalues')
Expand All @@ -130,14 +142,12 @@ def add_metric_parsers(parser):
add_argument(picklemetric, '-i', dest='picklemetric_input', required=True,
help="Path to pickle file for the metric")
metric_parser_list.append(picklemetric)

for add_parser in locate_metric_plugins('add_metric_parser'):
plugin_metric_parser = add_parser(metric_subparser, add_argument)
metric_parser_list.append(plugin_metric_parser)

return metric_parser_list

################################################################################
return metric_parser_list


def construct_metric(args):
Expand All @@ -154,27 +164,27 @@ def construct_metric(args):
metric = Dihedral(metric=args.dihedral_metric,
p=args.dihedral_p, angles=args.dihedral_angles,
userfilename=args.dihedral_userfilename)

elif metric_name == 'contact':
if args.contact_which != 'all':
contact_which = np.loadtxt(args.contact_which, np.int)
else:
contact_which = 'all'

if args.contact_cutoff_file != None:
contact_cutoff = np.loadtxt(args.contact_cutoff_file, np.float)
if args.contact_cutoff_file != None:
contact_cutoff = np.loadtxt(args.contact_cutoff_file, np.float)
elif args.contact_cutoff != None:
contact_cutoff = float( args.contact_cutoff )
else:
contact_cutoff = None

if contact_cutoff != None and contact_cutoff < 0:
metric = ContinuousContact(contacts=contact_which,
scheme=args.contact_scheme)
else:
metric = BooleanContact(contacts=contact_which,
cutoff=contact_cutoff, scheme=args.contact_scheme)

elif metric_name == 'atompairs':
if args.atompairs_which != None:
pairs = np.loadtxt(args.atompairs_which, np.int)
Expand All @@ -186,7 +196,7 @@ def construct_metric(args):

elif metric_name == 'positions':
target = md.load(args.target)

if args.pos_atom_indices != None:
atom_indices = np.loadtxt(args.pos_atom_indices, np.int)
else:
Expand All @@ -196,16 +206,24 @@ def construct_metric(args):
align_indices = np.loadtxt(args.align_indices, np.int)
else:
align_indices = None

metric = Positions(target, atom_indices=atom_indices, align_indices=align_indices,
metric=args.positions_metric, p=args.positions_p)

elif metric_name == 'solventfp':
proti = np.loadtxt(args.solventfp_proti, np.int)
wateri = np.loadtxt(args.solventfp_wateri, np.int)
metric = SolventFp(solute_indices=proti,
solvent_indices=wateri,
sigma=args.solventfp_sigma,
metric=args.solventfp_metric,
p=args.solventfp_p)

elif metric_name == "tica":
tica_obj = tICA.load(args.tica_fn)

metric = RedDimPNorm(tica_obj, num_vecs=args.num_vecs,
metric = RedDimPNorm(tica_obj, num_vecs=args.num_vecs,
metric=args.projected_metric, p=args.p)

elif metric_name == 'custom':
with open(args.picklemetric_input) as f:
metric = pickle.load(f)
Expand All @@ -223,7 +241,7 @@ def construct_metric(args):
except StopIteration:
# This means that none of the plugins acceptedthe metric
raise RuntimeError("Bad metric. Could not be constructed by any built-in or plugin metric. Perhaps you have a poorly written plugin?")

if not isinstance(metric, AbstractDistanceMetric):
return ValueError("%s is not a AbstractDistanceMetric" % metric)

Expand Down
Loading