msmbuilder · mpharrigan · Nov 16, 2013 · Nov 16, 2013 · Nov 16, 2013 · Nov 16, 2013
diff --git a/scripts/AssignOuter.py b/scripts/AssignOuter.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# This file is part of MSMBuilder.
+#
+# Copyright 2011 Stanford University
+#
+# MSMBuilder is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+from msmbuilder import arglib
+from mdtraj import io
+from msmbuilder.metrics import solvent
+import logging
+logger = logging.getLogger('msmbuilder.scripts.AssignOuter')
+
+parser = arglib.ArgumentParser(description='''Assign data to the outer product
+                                              space of two preliminary
+                                              assignments''')
+parser.add_argument('assignment1', default='./Data1/Assignments.h5',
+                    help='First assignment file')
+parser.add_argument('assignment2', default='./Data2/Assignments.h5',
+                    help='Second assignment file')
+parser.add_argument('assignment_out', default='OuterProductAssignments.h5',
+                    help='Output file')
+
+
+def main(ass1_fn, ass2_fn):
+    hierarchical = Hierarchical.load_from_disk(zmatrix_fn)
+    assignments = hierarchical.get_assignments(k=k, cutoff_distance=d)
+    return assignments
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    arglib.die_if_path_exists(args.assignment_out)
+
+    opa = solvent.OuterProductAssignment(args.assignment1, args.assignment2)
+    new_assignments = opa.get_product_assignments()
+    io.saveh(args.assignment_out, new_assignments)
+    logger.info('Saved outer product assignments to %s', args.assignment_out)
diff --git a/scripts/CreateAtomIndices.py b/scripts/CreateAtomIndices.py
@@ -25,21 +25,25 @@
 logger = logging.getLogger('msmbuilder.scripts.CreateAtomIndices')
 
 
+
 parser = arglib.ArgumentParser(
     description="Creates an atom indices file for RMSD from a PDB.")
 parser.add_argument('pdb')
 parser.add_argument('output', default='AtomIndices.dat')
 parser.add_argument('atom_type', help='''Atoms to include in index file.
+
     One of four options: (1) minimal (CA, CB, C, N, O, recommended), (2) heavy,
-    (3) alpha (carbons), or (4) all.  Use "all" in cases where protein
+    (3) alpha (carbons), (4) water, or (5) all.  Use "all" in cases where protein
     nomenclature may be inapproprate, although you may want to define your own
     indices in such situations.  Note that "heavy" keeps all heavy atoms that
     are not symmetry equivalent.  By symmetry equivalent, we mean atoms
     identical under an exchange of labels.  For example, heavy will exclude
-    the two pairs of equivalent carbons (CD, CE) in a PHE ring.    
-    Note that AtomIndices.dat should be zero-indexed--that is, a 0 
+    the two pairs of equivalent carbons (CD, CE) in a PHE ring.
+    Note that AtomIndices.dat should be zero-indexed--that is, a 0
     in AtomIndices.dat corresponds to the first atom in your PDB''',
-                    choices=['minimal', 'heavy', 'alpha', 'all'], default='minimal')
+                    choices=['minimal', 'heavy', 'alpha', 'all', 'water'],
+                    default='minimal')
+
 
 
 def run(PDBfn, atomtype):
@@ -122,6 +126,7 @@ def run(PDBfn, atomtype):
         "CNLE": ["N", "CA", "CB", "C", "O", "CG", "CD", "CE"],
         "NNLE": ["N", "CA", "CB", "C", "O", "CG", "CD", "CE"],
         "SOL": [],
+        "HOH": [],
         "Cl-": [],
         "Na+": []
     }
@@ -135,6 +140,12 @@ def run(PDBfn, atomtype):
     elif atomtype == 'alpha':
         for key in toKeepDict.keys():
             toKeepDict[key] = ["CA"]
+    elif atomtype == 'water':
+        for key in toKeepDict.keys():
+            if key == "SOL" or key == 'HOH':
+                toKeepDict[key] = ["O"]
+            else:
+                toKeepDict[key] = []
     elif atomtype == "all":
         pass
     else:

diff --git a/src/python/metrics/__init__.py b/src/python/metrics/__init__.py
@@ -19,3 +19,4 @@
 from hybrid import Hybrid, HybridPNorm
 from projection import RedDimPNorm
 from positions import Positions
+from solvent import SolventFp
diff --git a/src/python/metrics/parsers.py b/src/python/metrics/parsers.py
@@ -9,7 +9,7 @@
 from msmbuilder.metrics import (RMSD, Dihedral, BooleanContact,
                                 AtomPairs, ContinuousContact,
                                 AbstractDistanceMetric, Vectorized,
-                                RedDimPNorm, Positions)
+                                RedDimPNorm, Positions, SolventFp)
 
 def add_argument(group, *args, **kwargs):
     if 'default' in kwargs:
@@ -20,7 +20,6 @@ def add_argument(group, *args, **kwargs):
             kwargs['help'] = d
     group.add_argument(*args, **kwargs)
 
-################################################################################
 
 def locate_metric_plugins(name):
     if not name in ['add_metric_parser', 'construct_metric', 'metric_class']:
@@ -32,7 +31,7 @@ def locate_metric_plugins(name):
 def add_metric_parsers(parser):
 
     metric_parser_list = []
-    
+
     metric_subparser = parser.add_subparsers(dest='metric', description ='Available metrics to use.')
 
     #metrics_parsers = parser.add_subparsers(description='Available metrics to use.',dest='metric')
@@ -42,7 +41,7 @@ def add_metric_parsers(parser):
         between two structures, first they are rotated and translated with respect
         to one another to achieve maximum coincidence. This code is executed in parallel
         on multiple cores (but not multiple boxes) using OMP.''')
-    add_argument(rmsd, '-a', dest='rmsd_atom_indices', help='Atom indices to use in RMSD calculation. Pass "all" to use all atoms.', 
+    add_argument(rmsd, '-a', dest='rmsd_atom_indices', help='Atom indices to use in RMSD calculation. Pass "all" to use all atoms.',
         default='AtomIndices.dat')
     metric_parser_list.append(rmsd)
 
@@ -54,7 +53,7 @@ def add_metric_parsers(parser):
         frames are computed by taking distances between these vectors in R^n. The
         euclidean distance is recommended, but other distance metrics are available
         (cityblock, etc). This code is executed in parallel on multiple cores (but
-        not multiple boxes) using OMP. ''') 
+        not multiple boxes) using OMP. ''')
     add_argument(dihedral, '-a', dest='dihedral_angles', default='phi/psi',
         help='which dihedrals. Choose from phi, psi, chi (to choose multiple, seperate them with a slash), or user')
     add_argument(dihedral, '-f', dest='dihedral_userfilename', default='DihedralIndices.dat', help='filename for dihedral indices, N lines of 4 space-separated indices (otherwise ignored)')
@@ -74,7 +73,7 @@ def add_metric_parsers(parser):
         Furthermore, the sense in which the distance between two residues is computed can be
         either specified as "CA", "closest", or "closest-heavy", which will respectively compute
         ("CA") the distance between the residues' alpha carbons, ("closest"), the closest distance between any pair of
-        atoms i and j such that i belongs to one residue and j to the other residue, ("closest-heavy"), 
+        atoms i and j such that i belongs to one residue and j to the other residue, ("closest-heavy"),
         or the closest distance between any pair of NON-HYDROGEN atoms i and j such that i belongs to
         one residue and j to the other residue. This code is executed in parallel on multiple cores (but
         not multiple boxes) using OMP.''')
@@ -89,14 +88,27 @@ def add_metric_parsers(parser):
     atompairs = metric_subparser.add_parser('atompairs',description='''ATOMPAIRS: For each frame, we
         represent the conformation as a vector of particular atom-atom distances. Then the distance
         between frames is calculated using a specified norm on these vectors. This code is executed in
-        parallel (but not multiple boxes) using OMP.''') 
+        parallel (but not multiple boxes) using OMP.''')
     add_argument(atompairs, '-a', dest='atompairs_which',
         help='path to file with 2D array of which atompairs to use.', default='AtomPairs.dat')
     add_argument(atompairs, '-p', dest='atompairs_p', default=2, help='p used for metric=minkowski (otherwise ignored)')
     add_argument(atompairs, '-m', dest='atompairs_metric', default='euclidean',
         help='which distance metric', choices=AtomPairs.allowable_scipy_metrics)
     metric_parser_list.append(atompairs)
 
+    solventfp = metric_subparser.add_parser('solventfp', description='''SOLVENTFP: Get
+        the solvent fingerprint.''')
+    add_argument(solventfp, '-w', dest='solventfp_wateri', default='WaterIndices.dat',
+                 help='Indices of the solvent (water) atoms')
+    add_argument(solventfp, '-v', dest='solventfp_proti', default='ProteinIndices.dat',
+                 help='Indices of the solute (protein) atoms')
+    add_argument(solventfp, '-s', dest='solventfp_sigma', default=0.5,
+                 type=float, help='std. dev. of gaussian kernel')
+    add_argument(solventfp, '-p', dest='solventfp_p', default=2, help='p used for metric=minkowski (otherwise ignored)')
+    add_argument(solventfp, '-m', dest='solventfp_metric', default='euclidean',
+        help='which distance metric', choices=SolventFp.allowable_scipy_metrics)
+    metric_parser_list.append(solventfp)
+
     positions = metric_subparser.add_parser('positions', description="""POSITIONS: For each frame
         we represent the conformation as a vector of atom positions, where the atoms have been
         aligned to a target structure.""")
@@ -111,13 +123,13 @@ def add_metric_parsers(parser):
     tica = metric_subparser.add_parser( 'tica', description='''
         tICA: This metric is based on a variation of PCA which looks for the slowest d.o.f.
         in the simulation data. See (Schwantes, C.R., Pande, V.S. JCTC 2013, 9 (4), 2000-09.)
-        for more details. In addition to these options, you must provide an additional 
+        for more details. In addition to these options, you must provide an additional
         metric you used to prepare the trajectories in the training step.''')
 
     add_argument(tica,'-p', dest='p', help='p value for p-norm')
     add_argument(tica,'-m', dest='projected_metric', help='metric to use in the projected space',
         choices=Vectorized.allowable_scipy_metrics, default='euclidean')
-    add_argument(tica, '-f', dest='tica_fn', 
+    add_argument(tica, '-f', dest='tica_fn',
         help='tICA Object which was prepared by tICA_train.py')
     add_argument(tica, '-n', dest='num_vecs', type=int,
         help='Choose the top <-n> eigenvectors based on their eigenvalues')
@@ -130,14 +142,12 @@ def add_metric_parsers(parser):
     add_argument(picklemetric, '-i', dest='picklemetric_input', required=True,
         help="Path to pickle file for the metric")
     metric_parser_list.append(picklemetric)
-    
+
     for add_parser in locate_metric_plugins('add_metric_parser'):
         plugin_metric_parser = add_parser(metric_subparser, add_argument)
         metric_parser_list.append(plugin_metric_parser)
-
-    return metric_parser_list
 
-    ################################################################################
+    return metric_parser_list
 
 
 def construct_metric(args):
@@ -154,27 +164,27 @@ def construct_metric(args):
         metric = Dihedral(metric=args.dihedral_metric,
             p=args.dihedral_p, angles=args.dihedral_angles,
             userfilename=args.dihedral_userfilename)
-    
+
     elif metric_name == 'contact':
         if args.contact_which != 'all':
             contact_which = np.loadtxt(args.contact_which, np.int)
         else:
             contact_which = 'all'
 
-        if args.contact_cutoff_file != None: 
-            contact_cutoff = np.loadtxt(args.contact_cutoff_file, np.float)            
+        if args.contact_cutoff_file != None:
+            contact_cutoff = np.loadtxt(args.contact_cutoff_file, np.float)
         elif args.contact_cutoff != None:
             contact_cutoff = float( args.contact_cutoff )
         else:
             contact_cutoff = None
-             
+
         if contact_cutoff != None and contact_cutoff < 0:
             metric = ContinuousContact(contacts=contact_which,
                 scheme=args.contact_scheme)
         else:
             metric = BooleanContact(contacts=contact_which,
                 cutoff=contact_cutoff, scheme=args.contact_scheme)
-     
+
     elif metric_name == 'atompairs':
         if args.atompairs_which != None:
             pairs = np.loadtxt(args.atompairs_which, np.int)
@@ -186,7 +196,7 @@ def construct_metric(args):
 
     elif metric_name == 'positions':
         target = md.load(args.target)
-        
+
         if args.pos_atom_indices != None:
             atom_indices = np.loadtxt(args.pos_atom_indices, np.int)
         else:
@@ -196,16 +206,24 @@ def construct_metric(args):
             align_indices = np.loadtxt(args.align_indices, np.int)
         else:
             align_indices = None
-        
+
         metric = Positions(target, atom_indices=atom_indices, align_indices=align_indices,
                            metric=args.positions_metric, p=args.positions_p)
 
+    elif metric_name == 'solventfp':
+        proti = np.loadtxt(args.solventfp_proti, np.int)
+        wateri = np.loadtxt(args.solventfp_wateri, np.int)
+        metric = SolventFp(solute_indices=proti,
+                           solvent_indices=wateri,
+                           sigma=args.solventfp_sigma,
+                           metric=args.solventfp_metric,
+                           p=args.solventfp_p)
+
     elif metric_name == "tica":
         tica_obj = tICA.load(args.tica_fn)
 
-        metric = RedDimPNorm(tica_obj, num_vecs=args.num_vecs, 
+        metric = RedDimPNorm(tica_obj, num_vecs=args.num_vecs,
                            metric=args.projected_metric, p=args.p)
-
     elif metric_name == 'custom':
         with open(args.picklemetric_input) as f:
             metric = pickle.load(f)
@@ -223,7 +241,7 @@ def construct_metric(args):
         except StopIteration:
             # This means that none of the plugins acceptedthe metric
             raise RuntimeError("Bad metric. Could not be constructed by any built-in or plugin metric. Perhaps you have a poorly written plugin?")
-     
+
     if not isinstance(metric, AbstractDistanceMetric):
         return ValueError("%s is not a AbstractDistanceMetric" % metric)