Skip to content

Commit

Permalink
Enable protein mutations involving nonstandard protonation state aas (#…
Browse files Browse the repository at this point in the history
…834)

* attempt to enable nonstandard aas

* remove unused attribute

* enable hid, hie, hip

* remove load nonstandard bonds

* add hid, hie, hip to ring_amino_acids

* expand tests to include nonstandard aas

* clarify input PDB formatting

* remove HIS from positivee aminos

* fadd back accidentline that shouldn't be deleted

* add prints to check if nonstandard pdbs are present

* fix lyn as new residue

* remove print statementsf

* fix lyn logic according to mike's suggestion

Co-authored-by: Mike Henry <[email protected]>
  • Loading branch information
zhang-ivy and mikemhenry authored Aug 31, 2021
1 parent 82dd766 commit 2d39e8d
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 63 deletions.
15 changes: 14 additions & 1 deletion perses/app/relative_point_mutation_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
temperature = 300 * unit.kelvin
kT = kB * temperature
beta = 1.0/kT
ring_amino_acids = ['TYR', 'PHE', 'TRP', 'PRO', 'HIS']
ring_amino_acids = ['TYR', 'PHE', 'TRP', 'PRO', 'HIS', 'HID', 'HIE', 'HIP']

# Set up logger
import logging
Expand Down Expand Up @@ -89,6 +89,7 @@ def __init__(self,
mutation_chain_id,
mutation_residue_id,
proposed_residue,
old_residue=None,
phase='complex',
conduct_endstate_validation=True,
ligand_input=None,
Expand All @@ -113,12 +114,16 @@ def __init__(self,
arguments
protein_filename : str
path to protein (to mutate); .pdb
Note: if there are nonstandard residues, the PDB should contain the standard residue name but the atoms/positions should correspond to the nonstandard residue. E.g. if I want to include HID, the PDB should contain HIS for the residue name, but the atoms should correspond to the atoms present in HID. You can use openmm.app.Modeller.addHydrogens() to generate a PDB like this. The same is true for the ligand_input, if its a PDB.
mutation_chain_id : str
name of the chain to be mutated
mutation_residue_id : str
residue id to change
proposed_residue : str
three letter code of the residue to mutate to
old_residue : str, default None
name of the old residue, if is a nonstandard amino acid (e.g. LYN, ASH, HID, etc)
if not specified, the old residue name will be inferred from the input PDB.
phase : str, default complex
if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p
conduct_endstate_validation : bool, default True
Expand Down Expand Up @@ -249,6 +254,14 @@ def __init__(self,
# Run pipeline...
htfs = []
for is_complex, (top, pos, sys) in enumerate(inputs):
# Change the name of the old residue to its nonstandard name, if necessary
# Note this needs to happen after generating the system, as the system generator requires standard residue names
if old_residue:
for residue in top.residues():
if residue.id == mutation_residue_id:
residue.name = old_residue
print(f"Changed resid {mutation_residue_id} to {residue.name}")

point_mutation_engine = PointMutationEngine(wildtype_topology=top,
system_generator=self.system_generator,
chain_id=mutation_chain_id, # Denote the chain id allowed to mutate (it's always a string variable)
Expand Down
15 changes: 15 additions & 0 deletions perses/data/amino_acid_templates/ASH.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
ATOM 1 N ASH 2 28.512 45.560 9.232 1.00 0.00 N
ATOM 2 CA ASH 2 29.071 46.909 9.232 1.00 0.00 C
ATOM 3 C ASH 2 30.580 46.870 9.232 1.00 0.00 C
ATOM 4 O ASH 2 31.210 45.801 9.232 1.00 0.00 O
ATOM 5 CB ASH 2 28.536 47.722 10.444 1.00 0.00 C
ATOM 6 CG ASH 2 27.064 48.151 10.354 1.00 0.00 C
ATOM 7 OD1 ASH 2 26.467 48.050 9.263 1.00 0.00 O
ATOM 8 OD2 ASH 2 26.519 48.632 11.369 1.00 0.00 O
ATOM 9 H ASH 2 28.318 45.093 10.107 1.00 0.00 H
ATOM 10 HA ASH 2 28.756 47.414 8.299 1.00 0.00 H
ATOM 11 2HB ASH 2 29.144 48.633 10.586 1.00 0.00 H
ATOM 12 3HB ASH 2 28.640 47.146 11.378 1.00 0.00 H
ATOM 13 2HD ASH 2 27.140 48.622 12.102 1.00 0.00 H
TER
END
18 changes: 18 additions & 0 deletions perses/data/amino_acid_templates/GLH.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
ATOM 1 N GLH 2 28.604 45.037 8.824 1.00 0.00 N
ATOM 2 CA GLH 2 29.163 46.386 8.824 1.00 0.00 C
ATOM 3 C GLH 2 30.672 46.346 8.824 1.00 0.00 C
ATOM 4 O GLH 2 31.301 45.278 8.824 1.00 0.00 O
ATOM 5 CB GLH 2 28.640 47.171 10.061 1.00 0.00 C
ATOM 6 CG GLH 2 29.102 48.661 10.163 1.00 0.00 C
ATOM 7 CD GLH 2 28.588 49.497 11.320 1.00 0.00 C
ATOM 8 OE1 GLH 2 27.814 49.012 12.167 1.00 0.00 O
ATOM 9 OE2 GLH 2 28.994 50.677 11.379 1.00 0.00 O
ATOM 10 H GLH 2 28.411 44.570 9.698 1.00 0.00 H
ATOM 11 HA GLH 2 28.837 46.904 7.904 1.00 0.00 H
ATOM 12 2HB GLH 2 28.939 46.646 10.991 1.00 0.00 H
ATOM 13 3HB GLH 2 27.533 47.151 10.080 1.00 0.00 H
ATOM 14 2HG GLH 2 28.856 49.219 9.245 1.00 0.00 H
ATOM 15 3HG GLH 2 30.197 48.733 10.271 1.00 0.00 H
ATOM 16 2HE GLH 2 29.577 50.852 10.637 1.00 0.00 H
TER
END
19 changes: 19 additions & 0 deletions perses/data/amino_acid_templates/HID.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
ATOM 1 N HID 2 29.627 44.998 8.697 1.00 0.00 N
ATOM 2 CA HID 2 30.173 46.304 9.051 1.00 0.00 C
ATOM 3 C HID 2 31.682 46.298 8.977 1.00 0.00 C
ATOM 4 O HID 2 32.315 45.264 8.748 1.00 0.00 O
ATOM 5 CB HID 2 29.658 46.744 10.457 1.00 0.00 C
ATOM 6 CG HID 2 28.153 46.966 10.592 1.00 0.00 C
ATOM 7 CD2 HID 2 27.281 46.093 11.260 1.00 0.00 C
ATOM 8 ND1 HID 2 27.434 48.027 10.076 1.00 0.00 N
ATOM 9 CE1 HID 2 26.174 47.693 10.471 1.00 0.00 C
ATOM 10 NE2 HID 2 25.970 46.549 11.196 1.00 0.00 N
ATOM 11 H HID 2 29.409 44.328 9.421 1.00 0.00 H
ATOM 12 HA HID 2 29.855 46.999 8.255 1.00 0.00 H
ATOM 13 2HB HID 2 30.161 47.676 10.783 1.00 0.00 H
ATOM 14 3HB HID 2 29.966 46.025 11.243 1.00 0.00 H
ATOM 15 1HD HID 2 27.755 48.812 9.501 1.00 0.00 H
ATOM 16 2HD HID 2 27.587 45.182 11.754 1.00 0.00 H
ATOM 17 1HE HID 2 25.335 48.323 10.204 1.00 0.00 H
TER
END
19 changes: 19 additions & 0 deletions perses/data/amino_acid_templates/HIE.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
ATOM 1 N HIE 2 29.617 45.008 8.732 1.00 0.00 N
ATOM 2 CA HIE 2 30.171 46.327 9.025 1.00 0.00 C
ATOM 3 C HIE 2 31.680 46.300 8.994 1.00 0.00 C
ATOM 4 O HIE 2 32.314 45.263 8.747 1.00 0.00 O
ATOM 5 CB HIE 2 29.670 46.704 10.429 1.00 0.00 C
ATOM 6 CG HIE 2 28.838 47.953 10.421 1.00 0.00 C
ATOM 7 CD2 HIE 2 27.459 47.935 10.259 1.00 0.00 C
ATOM 8 ND1 HIE 2 29.316 49.253 10.570 1.00 0.00 N
ATOM 9 CE1 HIE 2 28.160 49.936 10.479 1.00 0.00 C
ATOM 10 NE2 HIE 2 27.011 49.230 10.297 1.00 0.00 N
ATOM 11 H HIE 2 29.402 44.372 9.486 1.00 0.00 H
ATOM 12 HA HIE 2 29.851 47.010 8.220 1.00 0.00 H
ATOM 13 2HB HIE 2 30.516 46.863 11.127 1.00 0.00 H
ATOM 14 3HB HIE 2 29.060 45.897 10.886 1.00 0.00 H
ATOM 15 2HD HIE 2 26.867 47.034 10.127 1.00 0.00 H
ATOM 16 1HE HIE 2 28.207 51.014 10.559 1.00 0.00 H
ATOM 17 2HE HIE 2 26.044 49.574 10.207 1.00 0.00 H
TER
END
20 changes: 20 additions & 0 deletions perses/data/amino_acid_templates/HIP.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
ATOM 1 N HIP 2 29.623 45.003 8.715 1.00 0.00 N
ATOM 2 CA HIP 2 30.177 46.314 9.037 1.00 0.00 C
ATOM 3 C HIP 2 31.686 46.294 8.975 1.00 0.00 C
ATOM 4 O HIP 2 32.313 45.262 8.746 1.00 0.00 O
ATOM 5 CB HIP 2 29.649 46.724 10.451 1.00 0.00 C
ATOM 6 CG HIP 2 28.143 46.924 10.601 1.00 0.00 C
ATOM 7 CD2 HIP 2 27.286 46.027 11.267 1.00 0.00 C
ATOM 8 ND1 HIP 2 27.405 47.988 10.106 1.00 0.00 N1+
ATOM 9 CE1 HIP 2 26.142 47.655 10.504 1.00 0.00 C
ATOM 10 NE2 HIP 2 25.974 46.493 11.216 1.00 0.00 N
ATOM 11 H HIP 2 29.405 44.351 9.455 1.00 0.00 H
ATOM 12 HA HIP 2 29.865 47.007 8.235 1.00 0.00 H
ATOM 13 2HB HIP 2 30.153 47.650 10.798 1.00 0.00 H
ATOM 14 3HB HIP 2 29.972 45.997 11.226 1.00 0.00 H
ATOM 15 1HD HIP 2 27.720 48.776 9.522 1.00 0.00 H
ATOM 16 2HD HIP 2 27.617 45.105 11.739 1.00 0.00 H
ATOM 17 1HE HIP 2 25.294 48.294 10.254 1.00 0.00 H
ATOM 18 2HE HIP 2 25.111 46.076 11.591 1.00 0.00 H
TER
END
23 changes: 23 additions & 0 deletions perses/data/amino_acid_templates/LYN.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
ATOM 1 N LYN 2 28.674 43.813 8.197 1.00 0.00 N
ATOM 2 CA LYN 2 29.233 45.162 8.197 1.00 0.00 C
ATOM 3 C LYN 2 30.743 45.122 8.197 1.00 0.00 C
ATOM 4 O LYN 2 31.372 44.054 8.197 1.00 0.00 O
ATOM 5 CB LYN 2 28.698 45.943 9.428 1.00 0.00 C
ATOM 6 CG LYN 2 29.180 47.414 9.481 1.00 0.00 C
ATOM 7 CD LYN 2 28.616 48.235 10.645 1.00 0.00 C
ATOM 8 CE LYN 2 29.185 49.659 10.581 1.00 0.00 C
ATOM 9 NZ LYN 2 28.643 50.453 11.697 1.00 0.00 N
ATOM 10 H LYN 2 28.480 43.346 9.071 1.00 0.00 H
ATOM 11 HA LYN 2 28.914 45.672 7.269 1.00 0.00 H
ATOM 12 2HB LYN 2 28.990 45.416 10.359 1.00 0.00 H
ATOM 13 3HB LYN 2 27.590 45.925 9.428 1.00 0.00 H
ATOM 14 2HG LYN 2 28.952 47.915 8.520 1.00 0.00 H
ATOM 15 3HG LYN 2 30.281 47.451 9.579 1.00 0.00 H
ATOM 16 2HD LYN 2 28.888 47.757 11.608 1.00 0.00 H
ATOM 17 3HD LYN 2 27.510 48.238 10.599 1.00 0.00 H
ATOM 18 2HE LYN 2 28.930 50.149 9.619 1.00 0.00 H
ATOM 19 3HE LYN 2 30.294 49.636 10.625 1.00 0.00 H
ATOM 20 1HZ LYN 2 28.553 49.875 12.542 1.00 0.00 H
ATOM 21 2HZ LYN 2 29.270 51.225 11.945 1.00 0.00 H
TER
END
68 changes: 9 additions & 59 deletions perses/rjmc/topology_proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,10 +442,10 @@ class PolymerProposalEngine(ProposalEngine):
This base class is not meant to be invoked directly.
"""

_aminos = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE',
_aminos = ['ALA', 'ARG', 'ASH', 'ASN', 'ASP', 'CYS', 'GLN', 'GLH', 'GLU', 'GLY', 'HID', 'HIE', 'HIP', 'HIS', 'ILE', 'LEU', 'LYN', 'LYS', 'MET', 'PHE',
'SER', 'THR', 'TRP', 'TYR', 'VAL'] # common naturally-occurring amino acid names
# Note this does not include PRO since there's a problem with OpenMM's template DEBUG
_positive_aminos = ['ARG', 'HIS', 'LYS']
_positive_aminos = ['ARG', 'LYS', 'HIP']
_negative_aminos = ['ASP', 'GLU']

def _get_neutrals(aminos, positive, negative):
Expand Down Expand Up @@ -479,26 +479,6 @@ def __init__(self, system_generator, chain_id, proposal_metadata=None, always_ch
_logger.debug(f"Instantiating PolymerProposalEngine")
super(PolymerProposalEngine,self).__init__(system_generator=system_generator, proposal_metadata=proposal_metadata, always_change=always_change)
self._chain_id = chain_id # chain identifier defining polymer to be modified
self._aminos_3letter_to_1letter_map = {'ALA' : 'A' ,
'ARG' : 'R' ,
'ASN' : 'N' ,
'ASP' : 'D' ,
'CYS' : 'C' ,
'GLN' : 'Q' ,
'GLU' : 'E' ,
'GLY' : 'G' ,
'HIS' : 'H' ,
'ILE' : 'I' ,
'LEU' : 'L' ,
'LYS' : 'K' ,
'MET' : 'M' ,
'PHE' : 'F' ,
'PRO' : 'P' ,
'SER' : 'S' ,
'THR' : 'T' ,
'TRP' : 'W' ,
'TYR' : 'Y' ,
'VAL' : 'V' }
self._aggregate = aggregate # ?????????

@staticmethod
Expand Down Expand Up @@ -927,6 +907,13 @@ def _identify_differences(self, topology, residue_map):
replace_with = 'HIE'
template = self._templates[replace_with]

if replace_with == 'LYN': # Rename HZ3 to HZ1 s.t. the topology matches the naming in amino_acid_templates/LYN.pdb
for atom in template.atoms:
if atom.name == 'HZ3':
atom.name = 'HZ1'
template.atomIndices['HZ1'] = template.atomIndices['HZ3']
del template.atomIndices['HZ3']

# template_atom_names : dict, key : template atom index, value : template atom name
template_atom_names = {}
for atom in template.atoms:
Expand Down Expand Up @@ -1216,15 +1203,6 @@ def _construct_atom_map(self,
old_res_name = old_res.name
new_res_name = new_res.name

#make correction for HIS
his_templates = ['HIE', 'HID']
if old_res_name in his_templates:
old_res_name = 'HIS'
if new_res_name in his_templates:
new_res_name = 'HIS'
else:
pass

current_residue_pdb_filename = resource_filename('perses', os.path.join('data', 'amino_acid_templates', f"{old_res_name}.pdb"))
proposed_residue_pdb_filename = resource_filename('perses', os.path.join('data', 'amino_acid_templates', f"{new_res_name}.pdb"))

Expand Down Expand Up @@ -1835,22 +1813,13 @@ def _choose_mutation_from_allowed(self, topology, chain_id, allowed_mutations, i
" If you wish to modify one of these residues, make sure you have added cap residues to the input topology.")

# Check if mutated residue's name is same as residue's name in old topology
if original_residue.name in ['HID', 'HIE']:
# original_residue.name = 'HIS'
pass
if original_residue.name == residue_name: #there is no mutation to be done
return index_to_new_residues

# Save proposed mutation to index_to_new_residues
# index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
index_to_new_residues[residue_id_to_index[residue_id]] = residue_name

# Randomly choose HIS template ('HIS' does not exist as a template)
# if residue_name == 'HIS':
# his_state = ['HIE','HID']
# his_prob = [1/len(his_state)] * len(his_state)
# his_choice = np.random.choice(range(len(his_state)), p=his_prob)
# index_to_new_residues[residue_id_to_index[residue_id]] = his_state[his_choice]
return index_to_new_residues

def _propose_mutation(self, topology, chain_id, index_to_new_residues):
Expand Down Expand Up @@ -1905,13 +1874,7 @@ def _propose_mutation(self, topology, chain_id, index_to_new_residues):
# Propose a location at which to mutate the residue
# proposed_location : int, index of chosen entry in location_prob
proposed_location = np.random.choice(range(num_residues), p=location_prob)

# Rename residue to HIS if it uses one of the HIS-derived templates
# original_residue : simtk.openmm.app.topology.Residue
original_residue = chain_residues[proposed_location]
if original_residue.name in ['HIE', 'HID']:
#original_residue.name = 'HIS'
pass

if self._residues_allowed_to_mutate is None:
proposed_location = original_residue.index
Expand All @@ -1933,13 +1896,6 @@ def _propose_mutation(self, topology, chain_id, index_to_new_residues):
# index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
index_to_new_residues[proposed_location] = aminos[proposed_amino_index]

# Randomly choose HIS template ('HIS' does not exist as a template)
if aminos[proposed_amino_index] == 'HIS':
# his_state = ['HIE','HID']
# his_prob = [1 / len(his_state)] * len(his_state)
# his_choice = np.random.choice(range(len(his_state)), p=his_prob)
# index_to_new_residues[proposed_location] = his_state[his_choice]
pass
return index_to_new_residues

def _mutable_residues(self, chain):
Expand Down Expand Up @@ -2081,12 +2037,6 @@ def _choose_mutant(self, topology, metadata):
continue
# index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
index_to_new_residues[residue_index] = residue_name
if residue_name == 'HIS':
# his_state = ['HIE','HID']
# his_prob = np.array([0.5 for i in range(len(his_state))])
# his_choice = np.random.choice(range(len(his_state)),p=his_prob)
# index_to_new_residues[residue_index] = his_state[his_choice]
pass

# index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
return index_to_new_residues, metadata
Expand Down
4 changes: 1 addition & 3 deletions perses/tests/test_topology_proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def test_mutate_from_alanine():
# generating VERY large nonbonded energies, to which numerical precision cannot achieve a proper threshold of 1e-6.
# in the future, we can look to use sterics or something fancy. At the moment, we recommend conservative transforms
# or transforms that have more unique _old_ atoms than new
aminos = ['ARG','ASN','ASP','CYS','GLN','GLU','GLY','HIS','ILE','LEU','LYS','MET','PHE','SER','THR','TRP','TYR','VAL']
aminos = ['ARG', 'ASH', 'ASN', 'ASP', 'CYS', 'GLH', 'GLN', 'GLU', 'GLY', 'HID', 'HIE', 'HIS', 'HIP', 'ILE', 'LEU', 'LYN', 'LYS', 'MET', 'PHE', 'SER', 'THR', 'TRP', 'TYR', 'VAL']
attempt_full_pipeline_aminos = ['CYS', 'ILE', 'SER', 'THR', 'VAL'] #let's omit rings and large perturbations for now

ala, system_generator = generate_atp()
Expand Down Expand Up @@ -543,8 +543,6 @@ def test_mutate_from_every_amino_to_every_other():
continue
if residue.index == (num_residues -1):
continue
if residue.name in ['HID','HIE']:
residue.name = 'HIS'
new_sequence.append(residue.name)
for i in range(len(aminos)):
assert new_sequence[i] == aminos[i]
Expand Down

0 comments on commit 2d39e8d

Please sign in to comment.