Enable protein mutations involving nonstandard protonation state aas (#…

…834) * attempt to enable nonstandard aas * remove unused attribute * enable hid, hie, hip * remove load nonstandard bonds * add hid, hie, hip to ring_amino_acids * expand tests to include nonstandard aas * clarify input PDB formatting * remove HIS from positivee aminos * fadd back accidentline that shouldn't be deleted * add prints to check if nonstandard pdbs are present * fix lyn as new residue * remove print statementsf * fix lyn logic according to mike's suggestion Co-authored-by: Mike Henry <[email protected]>
choderalab · Aug 31, 2021 · 2d39e8d · 2d39e8d
1 parent 82dd766
commit 2d39e8d
Show file tree

Hide file tree

Showing 9 changed files with 138 additions and 63 deletions.
diff --git a/perses/app/relative_point_mutation_setup.py b/perses/app/relative_point_mutation_setup.py
@@ -20,7 +20,7 @@
 temperature = 300 * unit.kelvin
 kT = kB * temperature
 beta = 1.0/kT
-ring_amino_acids = ['TYR', 'PHE', 'TRP', 'PRO', 'HIS']
+ring_amino_acids = ['TYR', 'PHE', 'TRP', 'PRO', 'HIS', 'HID', 'HIE', 'HIP']
 
 # Set up logger
 import logging
@@ -89,6 +89,7 @@ def __init__(self,
                  mutation_chain_id,
                  mutation_residue_id,
                  proposed_residue,
+                 old_residue=None,
                  phase='complex',
                  conduct_endstate_validation=True,
                  ligand_input=None,
@@ -113,12 +114,16 @@ def __init__(self,
         arguments
             protein_filename : str
                 path to protein (to mutate); .pdb
+                Note: if there are nonstandard residues, the PDB should contain the standard residue name but the atoms/positions should correspond to the nonstandard residue. E.g. if I want to include HID, the PDB should contain HIS for the residue name, but the atoms should correspond to the atoms present in HID. You can use openmm.app.Modeller.addHydrogens() to generate a PDB like this. The same is true for the ligand_input, if its a PDB. 
             mutation_chain_id : str
                 name of the chain to be mutated
             mutation_residue_id : str
                 residue id to change
             proposed_residue : str
                 three letter code of the residue to mutate to
+            old_residue : str, default None
+                name of the old residue, if is a nonstandard amino acid (e.g. LYN, ASH, HID, etc)
+                if not specified, the old residue name will be inferred from the input PDB.
             phase : str, default complex
                 if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p
             conduct_endstate_validation : bool, default True
@@ -249,6 +254,14 @@ def __init__(self,
         # Run pipeline...
         htfs = []
         for is_complex, (top, pos, sys) in enumerate(inputs):
+            # Change the name of the old residue to its nonstandard name, if necessary
+            # Note this needs to happen after generating the system, as the system generator requires standard residue names
+            if old_residue:
+                for residue in top.residues():
+                    if residue.id == mutation_residue_id:
+                        residue.name = old_residue
+                        print(f"Changed resid {mutation_residue_id} to {residue.name}")
+
             point_mutation_engine = PointMutationEngine(wildtype_topology=top,
                                                                  system_generator=self.system_generator,
                                                                  chain_id=mutation_chain_id, # Denote the chain id allowed to mutate (it's always a string variable)

diff --git a/perses/data/amino_acid_templates/ASH.pdb b/perses/data/amino_acid_templates/ASH.pdb
@@ -0,0 +1,15 @@
+ATOM      1  N   ASH     2      28.512  45.560   9.232  1.00  0.00           N  
+ATOM      2  CA  ASH     2      29.071  46.909   9.232  1.00  0.00           C  
+ATOM      3  C   ASH     2      30.580  46.870   9.232  1.00  0.00           C  
+ATOM      4  O   ASH     2      31.210  45.801   9.232  1.00  0.00           O  
+ATOM      5  CB  ASH     2      28.536  47.722  10.444  1.00  0.00           C  
+ATOM      6  CG  ASH     2      27.064  48.151  10.354  1.00  0.00           C  
+ATOM      7  OD1 ASH     2      26.467  48.050   9.263  1.00  0.00           O  
+ATOM      8  OD2 ASH     2      26.519  48.632  11.369  1.00  0.00           O  
+ATOM      9  H   ASH     2      28.318  45.093  10.107  1.00  0.00           H  
+ATOM     10  HA  ASH     2      28.756  47.414   8.299  1.00  0.00           H  
+ATOM     11 2HB  ASH     2      29.144  48.633  10.586  1.00  0.00           H  
+ATOM     12 3HB  ASH     2      28.640  47.146  11.378  1.00  0.00           H  
+ATOM     13 2HD  ASH     2      27.140  48.622  12.102  1.00  0.00           H  
+TER   
+END
diff --git a/perses/data/amino_acid_templates/GLH.pdb b/perses/data/amino_acid_templates/GLH.pdb
@@ -0,0 +1,18 @@
+ATOM      1  N   GLH     2      28.604  45.037   8.824  1.00  0.00           N  
+ATOM      2  CA  GLH     2      29.163  46.386   8.824  1.00  0.00           C  
+ATOM      3  C   GLH     2      30.672  46.346   8.824  1.00  0.00           C  
+ATOM      4  O   GLH     2      31.301  45.278   8.824  1.00  0.00           O  
+ATOM      5  CB  GLH     2      28.640  47.171  10.061  1.00  0.00           C  
+ATOM      6  CG  GLH     2      29.102  48.661  10.163  1.00  0.00           C  
+ATOM      7  CD  GLH     2      28.588  49.497  11.320  1.00  0.00           C  
+ATOM      8  OE1 GLH     2      27.814  49.012  12.167  1.00  0.00           O  
+ATOM      9  OE2 GLH     2      28.994  50.677  11.379  1.00  0.00           O  
+ATOM     10  H   GLH     2      28.411  44.570   9.698  1.00  0.00           H  
+ATOM     11  HA  GLH     2      28.837  46.904   7.904  1.00  0.00           H  
+ATOM     12 2HB  GLH     2      28.939  46.646  10.991  1.00  0.00           H  
+ATOM     13 3HB  GLH     2      27.533  47.151  10.080  1.00  0.00           H  
+ATOM     14 2HG  GLH     2      28.856  49.219   9.245  1.00  0.00           H  
+ATOM     15 3HG  GLH     2      30.197  48.733  10.271  1.00  0.00           H  
+ATOM     16 2HE  GLH     2      29.577  50.852  10.637  1.00  0.00           H  
+TER   
+END
diff --git a/perses/data/amino_acid_templates/HID.pdb b/perses/data/amino_acid_templates/HID.pdb
@@ -0,0 +1,19 @@
+ATOM      1  N   HID     2      29.627  44.998   8.697  1.00  0.00           N  
+ATOM      2  CA  HID     2      30.173  46.304   9.051  1.00  0.00           C  
+ATOM      3  C   HID     2      31.682  46.298   8.977  1.00  0.00           C  
+ATOM      4  O   HID     2      32.315  45.264   8.748  1.00  0.00           O  
+ATOM      5  CB  HID     2      29.658  46.744  10.457  1.00  0.00           C  
+ATOM      6  CG  HID     2      28.153  46.966  10.592  1.00  0.00           C  
+ATOM      7  CD2 HID     2      27.281  46.093  11.260  1.00  0.00           C  
+ATOM      8  ND1 HID     2      27.434  48.027  10.076  1.00  0.00           N  
+ATOM      9  CE1 HID     2      26.174  47.693  10.471  1.00  0.00           C  
+ATOM     10  NE2 HID     2      25.970  46.549  11.196  1.00  0.00           N  
+ATOM     11  H   HID     2      29.409  44.328   9.421  1.00  0.00           H  
+ATOM     12  HA  HID     2      29.855  46.999   8.255  1.00  0.00           H  
+ATOM     13 2HB  HID     2      30.161  47.676  10.783  1.00  0.00           H  
+ATOM     14 3HB  HID     2      29.966  46.025  11.243  1.00  0.00           H  
+ATOM     15 1HD  HID     2      27.755  48.812   9.501  1.00  0.00           H  
+ATOM     16 2HD  HID     2      27.587  45.182  11.754  1.00  0.00           H  
+ATOM     17 1HE  HID     2      25.335  48.323  10.204  1.00  0.00           H  
+TER   
+END
diff --git a/perses/data/amino_acid_templates/HIE.pdb b/perses/data/amino_acid_templates/HIE.pdb
@@ -0,0 +1,19 @@
+ATOM      1  N   HIE     2      29.617  45.008   8.732  1.00  0.00           N  
+ATOM      2  CA  HIE     2      30.171  46.327   9.025  1.00  0.00           C  
+ATOM      3  C   HIE     2      31.680  46.300   8.994  1.00  0.00           C  
+ATOM      4  O   HIE     2      32.314  45.263   8.747  1.00  0.00           O  
+ATOM      5  CB  HIE     2      29.670  46.704  10.429  1.00  0.00           C  
+ATOM      6  CG  HIE     2      28.838  47.953  10.421  1.00  0.00           C  
+ATOM      7  CD2 HIE     2      27.459  47.935  10.259  1.00  0.00           C  
+ATOM      8  ND1 HIE     2      29.316  49.253  10.570  1.00  0.00           N  
+ATOM      9  CE1 HIE     2      28.160  49.936  10.479  1.00  0.00           C  
+ATOM     10  NE2 HIE     2      27.011  49.230  10.297  1.00  0.00           N  
+ATOM     11  H   HIE     2      29.402  44.372   9.486  1.00  0.00           H  
+ATOM     12  HA  HIE     2      29.851  47.010   8.220  1.00  0.00           H  
+ATOM     13 2HB  HIE     2      30.516  46.863  11.127  1.00  0.00           H  
+ATOM     14 3HB  HIE     2      29.060  45.897  10.886  1.00  0.00           H  
+ATOM     15 2HD  HIE     2      26.867  47.034  10.127  1.00  0.00           H  
+ATOM     16 1HE  HIE     2      28.207  51.014  10.559  1.00  0.00           H  
+ATOM     17 2HE  HIE     2      26.044  49.574  10.207  1.00  0.00           H  
+TER   
+END
diff --git a/perses/data/amino_acid_templates/HIP.pdb b/perses/data/amino_acid_templates/HIP.pdb
@@ -0,0 +1,20 @@
+ATOM      1  N   HIP     2      29.623  45.003   8.715  1.00  0.00           N  
+ATOM      2  CA  HIP     2      30.177  46.314   9.037  1.00  0.00           C  
+ATOM      3  C   HIP     2      31.686  46.294   8.975  1.00  0.00           C  
+ATOM      4  O   HIP     2      32.313  45.262   8.746  1.00  0.00           O  
+ATOM      5  CB  HIP     2      29.649  46.724  10.451  1.00  0.00           C  
+ATOM      6  CG  HIP     2      28.143  46.924  10.601  1.00  0.00           C  
+ATOM      7  CD2 HIP     2      27.286  46.027  11.267  1.00  0.00           C  
+ATOM      8  ND1 HIP     2      27.405  47.988  10.106  1.00  0.00           N1+
+ATOM      9  CE1 HIP     2      26.142  47.655  10.504  1.00  0.00           C  
+ATOM     10  NE2 HIP     2      25.974  46.493  11.216  1.00  0.00           N  
+ATOM     11  H   HIP     2      29.405  44.351   9.455  1.00  0.00           H  
+ATOM     12  HA  HIP     2      29.865  47.007   8.235  1.00  0.00           H  
+ATOM     13 2HB  HIP     2      30.153  47.650  10.798  1.00  0.00           H  
+ATOM     14 3HB  HIP     2      29.972  45.997  11.226  1.00  0.00           H  
+ATOM     15 1HD  HIP     2      27.720  48.776   9.522  1.00  0.00           H  
+ATOM     16 2HD  HIP     2      27.617  45.105  11.739  1.00  0.00           H  
+ATOM     17 1HE  HIP     2      25.294  48.294  10.254  1.00  0.00           H  
+ATOM     18 2HE  HIP     2      25.111  46.076  11.591  1.00  0.00           H  
+TER   
+END
diff --git a/perses/data/amino_acid_templates/LYN.pdb b/perses/data/amino_acid_templates/LYN.pdb
@@ -0,0 +1,23 @@
+ATOM      1  N   LYN     2      28.674  43.813   8.197  1.00  0.00           N  
+ATOM      2  CA  LYN     2      29.233  45.162   8.197  1.00  0.00           C  
+ATOM      3  C   LYN     2      30.743  45.122   8.197  1.00  0.00           C  
+ATOM      4  O   LYN     2      31.372  44.054   8.197  1.00  0.00           O  
+ATOM      5  CB  LYN     2      28.698  45.943   9.428  1.00  0.00           C  
+ATOM      6  CG  LYN     2      29.180  47.414   9.481  1.00  0.00           C  
+ATOM      7  CD  LYN     2      28.616  48.235  10.645  1.00  0.00           C  
+ATOM      8  CE  LYN     2      29.185  49.659  10.581  1.00  0.00           C  
+ATOM      9  NZ  LYN     2      28.643  50.453  11.697  1.00  0.00           N  
+ATOM     10  H   LYN     2      28.480  43.346   9.071  1.00  0.00           H  
+ATOM     11  HA  LYN     2      28.914  45.672   7.269  1.00  0.00           H  
+ATOM     12 2HB  LYN     2      28.990  45.416  10.359  1.00  0.00           H  
+ATOM     13 3HB  LYN     2      27.590  45.925   9.428  1.00  0.00           H  
+ATOM     14 2HG  LYN     2      28.952  47.915   8.520  1.00  0.00           H  
+ATOM     15 3HG  LYN     2      30.281  47.451   9.579  1.00  0.00           H  
+ATOM     16 2HD  LYN     2      28.888  47.757  11.608  1.00  0.00           H  
+ATOM     17 3HD  LYN     2      27.510  48.238  10.599  1.00  0.00           H  
+ATOM     18 2HE  LYN     2      28.930  50.149   9.619  1.00  0.00           H  
+ATOM     19 3HE  LYN     2      30.294  49.636  10.625  1.00  0.00           H  
+ATOM     20 1HZ  LYN     2      28.553  49.875  12.542  1.00  0.00           H  
+ATOM     21 2HZ  LYN     2      29.270  51.225  11.945  1.00  0.00           H  
+TER   
+END
diff --git a/perses/rjmc/topology_proposal.py b/perses/rjmc/topology_proposal.py
@@ -442,10 +442,10 @@ class PolymerProposalEngine(ProposalEngine):
     This base class is not meant to be invoked directly.
     """
 
-    _aminos = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE',
+    _aminos = ['ALA', 'ARG', 'ASH', 'ASN', 'ASP', 'CYS', 'GLN', 'GLH', 'GLU', 'GLY', 'HID', 'HIE', 'HIP', 'HIS', 'ILE', 'LEU', 'LYN',  'LYS',  'MET', 'PHE',
                     'SER', 'THR', 'TRP', 'TYR', 'VAL'] # common naturally-occurring amino acid names
                     # Note this does not include PRO since there's a problem with OpenMM's template DEBUG
-    _positive_aminos = ['ARG', 'HIS', 'LYS']
+    _positive_aminos = ['ARG', 'LYS', 'HIP']
     _negative_aminos = ['ASP', 'GLU']
 
     def _get_neutrals(aminos, positive, negative):
@@ -479,26 +479,6 @@ def __init__(self, system_generator, chain_id, proposal_metadata=None, always_ch
         _logger.debug(f"Instantiating PolymerProposalEngine")
         super(PolymerProposalEngine,self).__init__(system_generator=system_generator, proposal_metadata=proposal_metadata, always_change=always_change)
         self._chain_id = chain_id # chain identifier defining polymer to be modified
-        self._aminos_3letter_to_1letter_map = {'ALA' : 'A' ,
-                                                'ARG' : 'R' ,
-                                                'ASN' : 'N' ,
-                                                'ASP' : 'D' ,
-                                                'CYS' : 'C' ,
-                                                'GLN' : 'Q' ,
-                                                'GLU' : 'E' ,
-                                                'GLY' : 'G' ,
-                                                'HIS' : 'H' ,
-                                                'ILE' : 'I' ,
-                                                'LEU' : 'L' ,
-                                                'LYS' : 'K' ,
-                                                'MET' : 'M' ,
-                                                'PHE' : 'F' ,
-                                                'PRO' : 'P' ,
-                                                'SER' : 'S' ,
-                                                'THR' : 'T' ,
-                                                'TRP' : 'W' ,
-                                                'TYR' : 'Y' ,
-                                                'VAL' : 'V' }
         self._aggregate = aggregate # ?????????
 
     @staticmethod
@@ -927,6 +907,13 @@ def _identify_differences(self, topology, residue_map):
                 replace_with = 'HIE'
             template = self._templates[replace_with]
 
+            if replace_with == 'LYN': # Rename HZ3 to HZ1 s.t. the topology matches the naming in amino_acid_templates/LYN.pdb
+                for atom in template.atoms:
+                    if atom.name == 'HZ3':
+                        atom.name = 'HZ1'
+                template.atomIndices['HZ1'] = template.atomIndices['HZ3']
+                del template.atomIndices['HZ3']
+
             # template_atom_names : dict, key : template atom index, value : template atom name
             template_atom_names = {}
             for atom in template.atoms:
@@ -1216,15 +1203,6 @@ def _construct_atom_map(self,
         old_res_name = old_res.name
         new_res_name = new_res.name
 
-        #make correction for HIS
-        his_templates = ['HIE', 'HID']
-        if old_res_name in his_templates:
-            old_res_name = 'HIS'
-        if new_res_name in his_templates:
-            new_res_name = 'HIS'
-        else:
-            pass
-
         current_residue_pdb_filename = resource_filename('perses', os.path.join('data', 'amino_acid_templates', f"{old_res_name}.pdb"))
         proposed_residue_pdb_filename = resource_filename('perses', os.path.join('data', 'amino_acid_templates', f"{new_res_name}.pdb"))
 
@@ -1835,22 +1813,13 @@ def _choose_mutation_from_allowed(self, topology, chain_id, allowed_mutations, i
                             " If you wish to modify one of these residues, make sure you have added cap residues to the input topology.")
 
         # Check if mutated residue's name is same as residue's name in old topology
-        if original_residue.name in ['HID', 'HIE']:
-            # original_residue.name = 'HIS'
-            pass
         if original_residue.name == residue_name: #there is no mutation to be done
             return index_to_new_residues
 
         # Save proposed mutation to index_to_new_residues
         # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
         index_to_new_residues[residue_id_to_index[residue_id]] = residue_name
 
-        # Randomly choose HIS template ('HIS' does not exist as a template)
-        # if residue_name == 'HIS':
-        #     his_state = ['HIE','HID']
-        #     his_prob = [1/len(his_state)] * len(his_state)
-        #     his_choice = np.random.choice(range(len(his_state)), p=his_prob)
-        #     index_to_new_residues[residue_id_to_index[residue_id]] = his_state[his_choice]
         return index_to_new_residues
 
     def _propose_mutation(self, topology, chain_id, index_to_new_residues):
@@ -1905,13 +1874,7 @@ def _propose_mutation(self, topology, chain_id, index_to_new_residues):
         # Propose a location at which to mutate the residue
         # proposed_location : int, index of chosen entry in location_prob
         proposed_location = np.random.choice(range(num_residues), p=location_prob)
-
-        # Rename residue to HIS if it uses one of the HIS-derived templates
-        # original_residue : simtk.openmm.app.topology.Residue
         original_residue = chain_residues[proposed_location]
-        if original_residue.name in ['HIE', 'HID']:
-            #original_residue.name = 'HIS'
-            pass
 
         if self._residues_allowed_to_mutate is None:
             proposed_location = original_residue.index
@@ -1933,13 +1896,6 @@ def _propose_mutation(self, topology, chain_id, index_to_new_residues):
         # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
         index_to_new_residues[proposed_location] = aminos[proposed_amino_index]
 
-        # Randomly choose HIS template ('HIS' does not exist as a template)
-        if aminos[proposed_amino_index] == 'HIS':
-            # his_state = ['HIE','HID']
-            # his_prob = [1 / len(his_state)] * len(his_state)
-            # his_choice = np.random.choice(range(len(his_state)), p=his_prob)
-            # index_to_new_residues[proposed_location] = his_state[his_choice]
-            pass
         return index_to_new_residues
 
     def _mutable_residues(self, chain):
@@ -2081,12 +2037,6 @@ def _choose_mutant(self, topology, metadata):
                 continue
             # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
             index_to_new_residues[residue_index] = residue_name
-            if residue_name == 'HIS':
-                # his_state = ['HIE','HID']
-                # his_prob = np.array([0.5 for i in range(len(his_state))])
-                # his_choice = np.random.choice(range(len(his_state)),p=his_prob)
-                # index_to_new_residues[residue_index] = his_state[his_choice]
-                pass
 
         # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
         return index_to_new_residues, metadata

diff --git a/perses/tests/test_topology_proposal.py b/perses/tests/test_topology_proposal.py
@@ -325,7 +325,7 @@ def test_mutate_from_alanine():
     #      generating VERY large nonbonded energies, to which numerical precision cannot achieve a proper threshold of 1e-6.
     #      in the future, we can look to use sterics or something fancy.  At the moment, we recommend conservative transforms
     #      or transforms that have more unique _old_ atoms than new
-    aminos = ['ARG','ASN','ASP','CYS','GLN','GLU','GLY','HIS','ILE','LEU','LYS','MET','PHE','SER','THR','TRP','TYR','VAL']
+    aminos = ['ARG', 'ASH', 'ASN', 'ASP', 'CYS', 'GLH', 'GLN', 'GLU', 'GLY', 'HID', 'HIE', 'HIS', 'HIP', 'ILE', 'LEU', 'LYN', 'LYS', 'MET', 'PHE', 'SER', 'THR', 'TRP', 'TYR', 'VAL']
     attempt_full_pipeline_aminos = ['CYS', 'ILE', 'SER', 'THR', 'VAL'] #let's omit rings and large perturbations for now
 
     ala, system_generator = generate_atp()
@@ -543,8 +543,6 @@ def test_mutate_from_every_amino_to_every_other():
             continue
         if residue.index == (num_residues -1):
             continue
-        if residue.name in ['HID','HIE']:
-            residue.name = 'HIS'
         new_sequence.append(residue.name)
     for i in range(len(aminos)):
         assert new_sequence[i] == aminos[i]