diff --git a/perses/app/relative_point_mutation_setup.py b/perses/app/relative_point_mutation_setup.py index 4e0721a19..b3714769f 100644 --- a/perses/app/relative_point_mutation_setup.py +++ b/perses/app/relative_point_mutation_setup.py @@ -20,7 +20,7 @@ temperature = 300 * unit.kelvin kT = kB * temperature beta = 1.0/kT -ring_amino_acids = ['TYR', 'PHE', 'TRP', 'PRO', 'HIS'] +ring_amino_acids = ['TYR', 'PHE', 'TRP', 'PRO', 'HIS', 'HID', 'HIE', 'HIP'] # Set up logger import logging @@ -89,6 +89,7 @@ def __init__(self, mutation_chain_id, mutation_residue_id, proposed_residue, + old_residue=None, phase='complex', conduct_endstate_validation=True, ligand_input=None, @@ -113,12 +114,16 @@ def __init__(self, arguments protein_filename : str path to protein (to mutate); .pdb + Note: if there are nonstandard residues, the PDB should contain the standard residue name but the atoms/positions should correspond to the nonstandard residue. E.g. if I want to include HID, the PDB should contain HIS for the residue name, but the atoms should correspond to the atoms present in HID. You can use openmm.app.Modeller.addHydrogens() to generate a PDB like this. The same is true for the ligand_input, if its a PDB. mutation_chain_id : str name of the chain to be mutated mutation_residue_id : str residue id to change proposed_residue : str three letter code of the residue to mutate to + old_residue : str, default None + name of the old residue, if is a nonstandard amino acid (e.g. LYN, ASH, HID, etc) + if not specified, the old residue name will be inferred from the input PDB. phase : str, default complex if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p conduct_endstate_validation : bool, default True @@ -249,6 +254,14 @@ def __init__(self, # Run pipeline... htfs = [] for is_complex, (top, pos, sys) in enumerate(inputs): + # Change the name of the old residue to its nonstandard name, if necessary + # Note this needs to happen after generating the system, as the system generator requires standard residue names + if old_residue: + for residue in top.residues(): + if residue.id == mutation_residue_id: + residue.name = old_residue + print(f"Changed resid {mutation_residue_id} to {residue.name}") + point_mutation_engine = PointMutationEngine(wildtype_topology=top, system_generator=self.system_generator, chain_id=mutation_chain_id, # Denote the chain id allowed to mutate (it's always a string variable) diff --git a/perses/data/amino_acid_templates/ASH.pdb b/perses/data/amino_acid_templates/ASH.pdb new file mode 100644 index 000000000..e7f74e8d2 --- /dev/null +++ b/perses/data/amino_acid_templates/ASH.pdb @@ -0,0 +1,15 @@ +ATOM 1 N ASH 2 28.512 45.560 9.232 1.00 0.00 N +ATOM 2 CA ASH 2 29.071 46.909 9.232 1.00 0.00 C +ATOM 3 C ASH 2 30.580 46.870 9.232 1.00 0.00 C +ATOM 4 O ASH 2 31.210 45.801 9.232 1.00 0.00 O +ATOM 5 CB ASH 2 28.536 47.722 10.444 1.00 0.00 C +ATOM 6 CG ASH 2 27.064 48.151 10.354 1.00 0.00 C +ATOM 7 OD1 ASH 2 26.467 48.050 9.263 1.00 0.00 O +ATOM 8 OD2 ASH 2 26.519 48.632 11.369 1.00 0.00 O +ATOM 9 H ASH 2 28.318 45.093 10.107 1.00 0.00 H +ATOM 10 HA ASH 2 28.756 47.414 8.299 1.00 0.00 H +ATOM 11 2HB ASH 2 29.144 48.633 10.586 1.00 0.00 H +ATOM 12 3HB ASH 2 28.640 47.146 11.378 1.00 0.00 H +ATOM 13 2HD ASH 2 27.140 48.622 12.102 1.00 0.00 H +TER +END diff --git a/perses/data/amino_acid_templates/GLH.pdb b/perses/data/amino_acid_templates/GLH.pdb new file mode 100644 index 000000000..3407ad990 --- /dev/null +++ b/perses/data/amino_acid_templates/GLH.pdb @@ -0,0 +1,18 @@ +ATOM 1 N GLH 2 28.604 45.037 8.824 1.00 0.00 N +ATOM 2 CA GLH 2 29.163 46.386 8.824 1.00 0.00 C +ATOM 3 C GLH 2 30.672 46.346 8.824 1.00 0.00 C +ATOM 4 O GLH 2 31.301 45.278 8.824 1.00 0.00 O +ATOM 5 CB GLH 2 28.640 47.171 10.061 1.00 0.00 C +ATOM 6 CG GLH 2 29.102 48.661 10.163 1.00 0.00 C +ATOM 7 CD GLH 2 28.588 49.497 11.320 1.00 0.00 C +ATOM 8 OE1 GLH 2 27.814 49.012 12.167 1.00 0.00 O +ATOM 9 OE2 GLH 2 28.994 50.677 11.379 1.00 0.00 O +ATOM 10 H GLH 2 28.411 44.570 9.698 1.00 0.00 H +ATOM 11 HA GLH 2 28.837 46.904 7.904 1.00 0.00 H +ATOM 12 2HB GLH 2 28.939 46.646 10.991 1.00 0.00 H +ATOM 13 3HB GLH 2 27.533 47.151 10.080 1.00 0.00 H +ATOM 14 2HG GLH 2 28.856 49.219 9.245 1.00 0.00 H +ATOM 15 3HG GLH 2 30.197 48.733 10.271 1.00 0.00 H +ATOM 16 2HE GLH 2 29.577 50.852 10.637 1.00 0.00 H +TER +END diff --git a/perses/data/amino_acid_templates/HID.pdb b/perses/data/amino_acid_templates/HID.pdb new file mode 100644 index 000000000..30bf4beb4 --- /dev/null +++ b/perses/data/amino_acid_templates/HID.pdb @@ -0,0 +1,19 @@ +ATOM 1 N HID 2 29.627 44.998 8.697 1.00 0.00 N +ATOM 2 CA HID 2 30.173 46.304 9.051 1.00 0.00 C +ATOM 3 C HID 2 31.682 46.298 8.977 1.00 0.00 C +ATOM 4 O HID 2 32.315 45.264 8.748 1.00 0.00 O +ATOM 5 CB HID 2 29.658 46.744 10.457 1.00 0.00 C +ATOM 6 CG HID 2 28.153 46.966 10.592 1.00 0.00 C +ATOM 7 CD2 HID 2 27.281 46.093 11.260 1.00 0.00 C +ATOM 8 ND1 HID 2 27.434 48.027 10.076 1.00 0.00 N +ATOM 9 CE1 HID 2 26.174 47.693 10.471 1.00 0.00 C +ATOM 10 NE2 HID 2 25.970 46.549 11.196 1.00 0.00 N +ATOM 11 H HID 2 29.409 44.328 9.421 1.00 0.00 H +ATOM 12 HA HID 2 29.855 46.999 8.255 1.00 0.00 H +ATOM 13 2HB HID 2 30.161 47.676 10.783 1.00 0.00 H +ATOM 14 3HB HID 2 29.966 46.025 11.243 1.00 0.00 H +ATOM 15 1HD HID 2 27.755 48.812 9.501 1.00 0.00 H +ATOM 16 2HD HID 2 27.587 45.182 11.754 1.00 0.00 H +ATOM 17 1HE HID 2 25.335 48.323 10.204 1.00 0.00 H +TER +END diff --git a/perses/data/amino_acid_templates/HIE.pdb b/perses/data/amino_acid_templates/HIE.pdb new file mode 100644 index 000000000..79be667e1 --- /dev/null +++ b/perses/data/amino_acid_templates/HIE.pdb @@ -0,0 +1,19 @@ +ATOM 1 N HIE 2 29.617 45.008 8.732 1.00 0.00 N +ATOM 2 CA HIE 2 30.171 46.327 9.025 1.00 0.00 C +ATOM 3 C HIE 2 31.680 46.300 8.994 1.00 0.00 C +ATOM 4 O HIE 2 32.314 45.263 8.747 1.00 0.00 O +ATOM 5 CB HIE 2 29.670 46.704 10.429 1.00 0.00 C +ATOM 6 CG HIE 2 28.838 47.953 10.421 1.00 0.00 C +ATOM 7 CD2 HIE 2 27.459 47.935 10.259 1.00 0.00 C +ATOM 8 ND1 HIE 2 29.316 49.253 10.570 1.00 0.00 N +ATOM 9 CE1 HIE 2 28.160 49.936 10.479 1.00 0.00 C +ATOM 10 NE2 HIE 2 27.011 49.230 10.297 1.00 0.00 N +ATOM 11 H HIE 2 29.402 44.372 9.486 1.00 0.00 H +ATOM 12 HA HIE 2 29.851 47.010 8.220 1.00 0.00 H +ATOM 13 2HB HIE 2 30.516 46.863 11.127 1.00 0.00 H +ATOM 14 3HB HIE 2 29.060 45.897 10.886 1.00 0.00 H +ATOM 15 2HD HIE 2 26.867 47.034 10.127 1.00 0.00 H +ATOM 16 1HE HIE 2 28.207 51.014 10.559 1.00 0.00 H +ATOM 17 2HE HIE 2 26.044 49.574 10.207 1.00 0.00 H +TER +END diff --git a/perses/data/amino_acid_templates/HIP.pdb b/perses/data/amino_acid_templates/HIP.pdb new file mode 100644 index 000000000..33d0caa91 --- /dev/null +++ b/perses/data/amino_acid_templates/HIP.pdb @@ -0,0 +1,20 @@ +ATOM 1 N HIP 2 29.623 45.003 8.715 1.00 0.00 N +ATOM 2 CA HIP 2 30.177 46.314 9.037 1.00 0.00 C +ATOM 3 C HIP 2 31.686 46.294 8.975 1.00 0.00 C +ATOM 4 O HIP 2 32.313 45.262 8.746 1.00 0.00 O +ATOM 5 CB HIP 2 29.649 46.724 10.451 1.00 0.00 C +ATOM 6 CG HIP 2 28.143 46.924 10.601 1.00 0.00 C +ATOM 7 CD2 HIP 2 27.286 46.027 11.267 1.00 0.00 C +ATOM 8 ND1 HIP 2 27.405 47.988 10.106 1.00 0.00 N1+ +ATOM 9 CE1 HIP 2 26.142 47.655 10.504 1.00 0.00 C +ATOM 10 NE2 HIP 2 25.974 46.493 11.216 1.00 0.00 N +ATOM 11 H HIP 2 29.405 44.351 9.455 1.00 0.00 H +ATOM 12 HA HIP 2 29.865 47.007 8.235 1.00 0.00 H +ATOM 13 2HB HIP 2 30.153 47.650 10.798 1.00 0.00 H +ATOM 14 3HB HIP 2 29.972 45.997 11.226 1.00 0.00 H +ATOM 15 1HD HIP 2 27.720 48.776 9.522 1.00 0.00 H +ATOM 16 2HD HIP 2 27.617 45.105 11.739 1.00 0.00 H +ATOM 17 1HE HIP 2 25.294 48.294 10.254 1.00 0.00 H +ATOM 18 2HE HIP 2 25.111 46.076 11.591 1.00 0.00 H +TER +END diff --git a/perses/data/amino_acid_templates/LYN.pdb b/perses/data/amino_acid_templates/LYN.pdb new file mode 100644 index 000000000..bf868bae1 --- /dev/null +++ b/perses/data/amino_acid_templates/LYN.pdb @@ -0,0 +1,23 @@ +ATOM 1 N LYN 2 28.674 43.813 8.197 1.00 0.00 N +ATOM 2 CA LYN 2 29.233 45.162 8.197 1.00 0.00 C +ATOM 3 C LYN 2 30.743 45.122 8.197 1.00 0.00 C +ATOM 4 O LYN 2 31.372 44.054 8.197 1.00 0.00 O +ATOM 5 CB LYN 2 28.698 45.943 9.428 1.00 0.00 C +ATOM 6 CG LYN 2 29.180 47.414 9.481 1.00 0.00 C +ATOM 7 CD LYN 2 28.616 48.235 10.645 1.00 0.00 C +ATOM 8 CE LYN 2 29.185 49.659 10.581 1.00 0.00 C +ATOM 9 NZ LYN 2 28.643 50.453 11.697 1.00 0.00 N +ATOM 10 H LYN 2 28.480 43.346 9.071 1.00 0.00 H +ATOM 11 HA LYN 2 28.914 45.672 7.269 1.00 0.00 H +ATOM 12 2HB LYN 2 28.990 45.416 10.359 1.00 0.00 H +ATOM 13 3HB LYN 2 27.590 45.925 9.428 1.00 0.00 H +ATOM 14 2HG LYN 2 28.952 47.915 8.520 1.00 0.00 H +ATOM 15 3HG LYN 2 30.281 47.451 9.579 1.00 0.00 H +ATOM 16 2HD LYN 2 28.888 47.757 11.608 1.00 0.00 H +ATOM 17 3HD LYN 2 27.510 48.238 10.599 1.00 0.00 H +ATOM 18 2HE LYN 2 28.930 50.149 9.619 1.00 0.00 H +ATOM 19 3HE LYN 2 30.294 49.636 10.625 1.00 0.00 H +ATOM 20 1HZ LYN 2 28.553 49.875 12.542 1.00 0.00 H +ATOM 21 2HZ LYN 2 29.270 51.225 11.945 1.00 0.00 H +TER +END diff --git a/perses/rjmc/topology_proposal.py b/perses/rjmc/topology_proposal.py index 9105a7498..c135f946a 100644 --- a/perses/rjmc/topology_proposal.py +++ b/perses/rjmc/topology_proposal.py @@ -442,10 +442,10 @@ class PolymerProposalEngine(ProposalEngine): This base class is not meant to be invoked directly. """ - _aminos = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', + _aminos = ['ALA', 'ARG', 'ASH', 'ASN', 'ASP', 'CYS', 'GLN', 'GLH', 'GLU', 'GLY', 'HID', 'HIE', 'HIP', 'HIS', 'ILE', 'LEU', 'LYN', 'LYS', 'MET', 'PHE', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] # common naturally-occurring amino acid names # Note this does not include PRO since there's a problem with OpenMM's template DEBUG - _positive_aminos = ['ARG', 'HIS', 'LYS'] + _positive_aminos = ['ARG', 'LYS', 'HIP'] _negative_aminos = ['ASP', 'GLU'] def _get_neutrals(aminos, positive, negative): @@ -479,26 +479,6 @@ def __init__(self, system_generator, chain_id, proposal_metadata=None, always_ch _logger.debug(f"Instantiating PolymerProposalEngine") super(PolymerProposalEngine,self).__init__(system_generator=system_generator, proposal_metadata=proposal_metadata, always_change=always_change) self._chain_id = chain_id # chain identifier defining polymer to be modified - self._aminos_3letter_to_1letter_map = {'ALA' : 'A' , - 'ARG' : 'R' , - 'ASN' : 'N' , - 'ASP' : 'D' , - 'CYS' : 'C' , - 'GLN' : 'Q' , - 'GLU' : 'E' , - 'GLY' : 'G' , - 'HIS' : 'H' , - 'ILE' : 'I' , - 'LEU' : 'L' , - 'LYS' : 'K' , - 'MET' : 'M' , - 'PHE' : 'F' , - 'PRO' : 'P' , - 'SER' : 'S' , - 'THR' : 'T' , - 'TRP' : 'W' , - 'TYR' : 'Y' , - 'VAL' : 'V' } self._aggregate = aggregate # ????????? @staticmethod @@ -927,6 +907,13 @@ def _identify_differences(self, topology, residue_map): replace_with = 'HIE' template = self._templates[replace_with] + if replace_with == 'LYN': # Rename HZ3 to HZ1 s.t. the topology matches the naming in amino_acid_templates/LYN.pdb + for atom in template.atoms: + if atom.name == 'HZ3': + atom.name = 'HZ1' + template.atomIndices['HZ1'] = template.atomIndices['HZ3'] + del template.atomIndices['HZ3'] + # template_atom_names : dict, key : template atom index, value : template atom name template_atom_names = {} for atom in template.atoms: @@ -1216,15 +1203,6 @@ def _construct_atom_map(self, old_res_name = old_res.name new_res_name = new_res.name - #make correction for HIS - his_templates = ['HIE', 'HID'] - if old_res_name in his_templates: - old_res_name = 'HIS' - if new_res_name in his_templates: - new_res_name = 'HIS' - else: - pass - current_residue_pdb_filename = resource_filename('perses', os.path.join('data', 'amino_acid_templates', f"{old_res_name}.pdb")) proposed_residue_pdb_filename = resource_filename('perses', os.path.join('data', 'amino_acid_templates', f"{new_res_name}.pdb")) @@ -1835,9 +1813,6 @@ def _choose_mutation_from_allowed(self, topology, chain_id, allowed_mutations, i " If you wish to modify one of these residues, make sure you have added cap residues to the input topology.") # Check if mutated residue's name is same as residue's name in old topology - if original_residue.name in ['HID', 'HIE']: - # original_residue.name = 'HIS' - pass if original_residue.name == residue_name: #there is no mutation to be done return index_to_new_residues @@ -1845,12 +1820,6 @@ def _choose_mutation_from_allowed(self, topology, chain_id, allowed_mutations, i # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name) index_to_new_residues[residue_id_to_index[residue_id]] = residue_name - # Randomly choose HIS template ('HIS' does not exist as a template) - # if residue_name == 'HIS': - # his_state = ['HIE','HID'] - # his_prob = [1/len(his_state)] * len(his_state) - # his_choice = np.random.choice(range(len(his_state)), p=his_prob) - # index_to_new_residues[residue_id_to_index[residue_id]] = his_state[his_choice] return index_to_new_residues def _propose_mutation(self, topology, chain_id, index_to_new_residues): @@ -1905,13 +1874,7 @@ def _propose_mutation(self, topology, chain_id, index_to_new_residues): # Propose a location at which to mutate the residue # proposed_location : int, index of chosen entry in location_prob proposed_location = np.random.choice(range(num_residues), p=location_prob) - - # Rename residue to HIS if it uses one of the HIS-derived templates - # original_residue : simtk.openmm.app.topology.Residue original_residue = chain_residues[proposed_location] - if original_residue.name in ['HIE', 'HID']: - #original_residue.name = 'HIS' - pass if self._residues_allowed_to_mutate is None: proposed_location = original_residue.index @@ -1933,13 +1896,6 @@ def _propose_mutation(self, topology, chain_id, index_to_new_residues): # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name) index_to_new_residues[proposed_location] = aminos[proposed_amino_index] - # Randomly choose HIS template ('HIS' does not exist as a template) - if aminos[proposed_amino_index] == 'HIS': - # his_state = ['HIE','HID'] - # his_prob = [1 / len(his_state)] * len(his_state) - # his_choice = np.random.choice(range(len(his_state)), p=his_prob) - # index_to_new_residues[proposed_location] = his_state[his_choice] - pass return index_to_new_residues def _mutable_residues(self, chain): @@ -2081,12 +2037,6 @@ def _choose_mutant(self, topology, metadata): continue # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name) index_to_new_residues[residue_index] = residue_name - if residue_name == 'HIS': - # his_state = ['HIE','HID'] - # his_prob = np.array([0.5 for i in range(len(his_state))]) - # his_choice = np.random.choice(range(len(his_state)),p=his_prob) - # index_to_new_residues[residue_index] = his_state[his_choice] - pass # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name) return index_to_new_residues, metadata diff --git a/perses/tests/test_topology_proposal.py b/perses/tests/test_topology_proposal.py index bbbc5ba54..49b01ec87 100644 --- a/perses/tests/test_topology_proposal.py +++ b/perses/tests/test_topology_proposal.py @@ -325,7 +325,7 @@ def test_mutate_from_alanine(): # generating VERY large nonbonded energies, to which numerical precision cannot achieve a proper threshold of 1e-6. # in the future, we can look to use sterics or something fancy. At the moment, we recommend conservative transforms # or transforms that have more unique _old_ atoms than new - aminos = ['ARG','ASN','ASP','CYS','GLN','GLU','GLY','HIS','ILE','LEU','LYS','MET','PHE','SER','THR','TRP','TYR','VAL'] + aminos = ['ARG', 'ASH', 'ASN', 'ASP', 'CYS', 'GLH', 'GLN', 'GLU', 'GLY', 'HID', 'HIE', 'HIS', 'HIP', 'ILE', 'LEU', 'LYN', 'LYS', 'MET', 'PHE', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] attempt_full_pipeline_aminos = ['CYS', 'ILE', 'SER', 'THR', 'VAL'] #let's omit rings and large perturbations for now ala, system_generator = generate_atp() @@ -543,8 +543,6 @@ def test_mutate_from_every_amino_to_every_other(): continue if residue.index == (num_residues -1): continue - if residue.name in ['HID','HIE']: - residue.name = 'HIS' new_sequence.append(residue.name) for i in range(len(aminos)): assert new_sequence[i] == aminos[i]