diff --git a/perses/annihilation/ncmc_switching.py b/perses/annihilation/ncmc_switching.py index a2525d6c0..259472e7e 100644 --- a/perses/annihilation/ncmc_switching.py +++ b/perses/annihilation/ncmc_switching.py @@ -238,6 +238,7 @@ def make_alchemical_system(self, topology_proposal, current_positions, new_posit hybrid_factory._new_positions = new_positions hybrid_factory._compute_hybrid_positions() except KeyError: + hybrid_factory = HybridTopologyFactory(topology_proposal, current_positions, new_positions) try: hybrid_factory = HybridTopologyFactory(topology_proposal, current_positions, new_positions) self._hybrid_cache[topology_proposal] = hybrid_factory @@ -377,15 +378,16 @@ def integrate(self, topology_proposal, initial_sampler_state, proposed_sampler_s box_lengths_and_angles = np.stack([box_lengths, box_angles]) #write out the positions of the topology - for frame in range(nframes): - self._storage.write_configuration(position_varname, trajectory[frame, :, :], topology, iteration=iteration, frame=frame, nframes=nframes) + if self._save_configuration: + for frame in range(nframes): + self._storage.write_configuration(position_varname, trajectory[frame, :, :], topology, iteration=iteration, frame=frame, nframes=nframes) - #write out the periodict box vectors: - self._storage.write_array(box_vec_varname, box_lengths_and_angles, iteration=iteration) + #write out the periodic box vectors: + self._storage.write_array(box_vec_varname, box_lengths_and_angles, iteration=iteration) - #retrieve the protocol work and write that out too: - protocol_work = ne_move.cumulative_work - self._storage.write_array("protocolwork", protocol_work, iteration=iteration) + #retrieve the protocol work and write that out too: + protocol_work = ne_move.cumulative_work + self._storage.write_array("protocolwork", protocol_work, iteration=iteration) # Return return [final_old_sampler_state, final_sampler_state, logP_work, -initial_reduced_potential, -final_reduced_potential] diff --git a/perses/annihilation/new_relative.py b/perses/annihilation/new_relative.py index 7d24b91ea..0300f863b 100644 --- a/perses/annihilation/new_relative.py +++ b/perses/annihilation/new_relative.py @@ -1356,14 +1356,12 @@ def _handle_hybrid_exceptions(self): #now we check if the pair is in the exception dictionary if old_index_atom_pair in self._old_system_exceptions: [chargeProd, sigma, epsilon] = self._old_system_exceptions[old_index_atom_pair] - chargeProd *= 0.0 self._hybrid_system_forces['standard_nonbonded_force'].addException(atom_pair[0], atom_pair[1], chargeProd, sigma, epsilon) self._hybrid_system_forces['core_sterics_force'].addExclusion(atom_pair[0], atom_pair[1]) # add exclusion to ensure exceptions are consistent #check if the pair is in the reverse order and use that if so elif old_index_atom_pair[::-1] in self._old_system_exceptions: [chargeProd, sigma, epsilon] = self._old_system_exceptions[old_index_atom_pair[::-1]] - chargeProd *= 0.0 self._hybrid_system_forces['standard_nonbonded_force'].addException(atom_pair[0], atom_pair[1], chargeProd, sigma, epsilon) self._hybrid_system_forces['core_sterics_force'].addExclusion(atom_pair[0], atom_pair[1]) # add exclusion to ensure exceptions are consistent @@ -1372,7 +1370,6 @@ def _handle_hybrid_exceptions(self): [charge0, sigma0, epsilon0] = self._old_system_forces['NonbondedForce'].getParticleParameters(old_index_atom_pair[0]) [charge1, sigma1, epsilon1] = self._old_system_forces['NonbondedForce'].getParticleParameters(old_index_atom_pair[1]) chargeProd = charge0*charge1 - chargeProd *= 0.0 epsilon = unit.sqrt(epsilon0*epsilon1) sigma = 0.5*(sigma0+sigma1) self._hybrid_system_forces['standard_nonbonded_force'].addException(atom_pair[0], atom_pair[1], chargeProd, sigma, epsilon) @@ -1386,14 +1383,12 @@ def _handle_hybrid_exceptions(self): #now we check if the pair is in the exception dictionary if new_index_atom_pair in self._new_system_exceptions: [chargeProd, sigma, epsilon] = self._new_system_exceptions[new_index_atom_pair] - chargeProd *= 0.0 self._hybrid_system_forces['standard_nonbonded_force'].addException(atom_pair[0], atom_pair[1], chargeProd, sigma, epsilon) self._hybrid_system_forces['core_sterics_force'].addExclusion(atom_pair[0], atom_pair[1]) # add exclusion to ensure exceptions are consistent #check if the pair is present in the reverse order and use that if so elif new_index_atom_pair[::-1] in self._new_system_exceptions: [chargeProd, sigma, epsilon] = self._new_system_exceptions[new_index_atom_pair[::-1]] - chargeProd *= 0.0 self._hybrid_system_forces['standard_nonbonded_force'].addException(atom_pair[0], atom_pair[1], chargeProd, sigma, epsilon) self._hybrid_system_forces['core_sterics_force'].addExclusion(atom_pair[0], atom_pair[1]) # add exclusion to ensure exceptions are consistent @@ -1402,7 +1397,6 @@ def _handle_hybrid_exceptions(self): [charge0, sigma0, epsilon0] = self._new_system_forces['NonbondedForce'].getParticleParameters(new_index_atom_pair[0]) [charge1, sigma1, epsilon1] = self._new_system_forces['NonbondedForce'].getParticleParameters(new_index_atom_pair[1]) chargeProd = charge0*charge1 - chargeProd *= 0.0 epsilon = unit.sqrt(epsilon0*epsilon1) sigma = 0.5*(sigma0+sigma1) self._hybrid_system_forces['standard_nonbonded_force'].addException(atom_pair[0], atom_pair[1], chargeProd, sigma, epsilon) @@ -1656,18 +1650,20 @@ def _create_topology(self): atom2_index_in_hybrid = self._new_to_hybrid_map[atom2.index] #if at least one atom is in the unique new class, we need to add it to the hybrid system - if atom1_index_in_hybrid in self._atom_classes['unique_new_atoms'] or atom2_index_in_hybrid in self._atom_classes['unique_new_atoms']: - if atom1.index in self._atom_classes['unique_new_atoms']: - atom1_to_bond = added_atoms[atom1.index] - else: - atom1_to_bond = atom1 - - if atom2.index in self._atom_classes['unique_new_atoms']: - atom2_to_bond = added_atoms[atom2.index] - else: - atom2_to_bond = atom2 + if atom1_index_in_hybrid in self._atom_classes['unique_new_atoms'] and atom2_index_in_hybrid in self._atom_classes['unique_new_atoms']: + atom1_to_bond = added_atoms[atom1.index] + atom2_to_bond = added_atoms[atom2.index] + elif atom1_index_in_hybrid in self._atom_classes['unique_new_atoms']: + atom1_to_bond = added_atoms[atom1.index] + atom2_to_bond = atom2 + elif atom2_index_in_hybrid in self._atom_classes['unique_new_atoms']: + atom1_to_bond = atom1 + atom2_to_bond = added_atoms[atom2.index] + else: + atom1_to_bond = atom1 + atom2_to_bond = atom2 - hybrid_topology.add_bond(atom1_to_bond, atom2_to_bond) + hybrid_topology.add_bond(atom1_to_bond, atom2_to_bond) return hybrid_topology diff --git a/perses/data/clinical-kinase-inhibitors.csv b/perses/data/clinical-kinase-inhibitors.csv index 000dc97f4..8179188d5 100644 --- a/perses/data/clinical-kinase-inhibitors.csv +++ b/perses/data/clinical-kinase-inhibitors.csv @@ -1,27 +1,4 @@ Imatinib,C5=C(C1=CN=CC=C1)N=C(NC2=C(C=CC(=C2)NC(C3=CC=C(C=C3)CN4CCN(CC4)C)=O)C)N=C5 Dasatinib,C1=C(SC(=N1)NC2=CC(=NC(=N2)C)N3CCN(CC3)CCO)C(=O)NC4=C(C=CC=C4Cl)C Nilotinib,Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)C(=O)Nc4cc(cc(c4)n5cc(nc5)C)C(F)(F)F -Bosutinib,C1=C(C(=CC2=C1C(=C(C=N2)C#N)NC3=C(C=C(C(=C3)OC)Cl)Cl)OCCCN4CCN(CC4)C)OC -Ponatinib,Cc1ccc(cc1C#Cc2cnc3n2nccc3)C(=O)Nc4ccc(c(c4)C(F)(F)F)CN5CCN(CC5)C -Gefitinib,COc1cc2c(cc1OCCCN3CCOCC3)c(ncn2)Nc4ccc(c(c4)Cl)F -Erlotinib,COCCOc1cc2c(cc1OCCOC)ncnc2Nc3cccc(c3)C#C -Afatinib,CN(C)C/C=C/C(=O)NC1=C(C=C2C(=C1)C(=NC=N2)NC3=CC(=C(C=C3)F)Cl)O[C@H]4CCOC4 -Lapatinib,CS(=O)(=O)CCNCc1ccc(o1)c2ccc3c(c2)c(ncn3)Nc4ccc(c(c4)Cl)OCc5cccc(c5)F -Sorafenib,CNC(=O)c1cc(ccn1)Oc2ccc(cc2)NC(=O)Nc3ccc(c(c3)C(F)(F)F)Cl -Sunitinib,CCN(CC)CCNC(=O)c1c(c([nH]c1C)/C=C\2/c3cc(ccc3NC2=O)F)C -Pazopanib,CC1=C(C=C(C=C1)NC2=NC=CC(=N2)N(C)C3=CC4=NN(C(=C4C=C3)C)C)S(=O)(=O)N -Vandetanib,CN1CCC(CC1)COc2cc3c(cc2OC)c(ncn3)Nc4ccc(cc4F)Br -Axitinib,CNC(=O)c1ccccc1Sc2ccc3c(c2)[nH]nc3/C=C/c4ccccn4 -Regorafenib,CNC(=O)C1=NC=CC(=C1)OC2=CC(=C(C=C2)NC(=O)NC3=CC(=C(C=C3)Cl)C(F)(F)F)F -Crizotinib,C[C@H](C1=C(C=CC(=C1Cl)F)Cl)OC2=C(N=CC(=C2)C3=CN(N=C3)C4CCNCC4)N -Ceritinib,Cc1cc(c(cc1C2CCNCC2)OC(C)C)Nc3ncc(c(n3)Nc4ccccc4S(=O)(=O)C(C)C)Cl -Vemurafenib,CCCS(=O)(=O)Nc1ccc(c(c1F)C(=O)c2c[nH]c3c2cc(cn3)c4ccc(cc4)Cl)F -Dabrafenib,CC(C)(C)c1nc(c(s1)c2ccnc(n2)N)c3cccc(c3F)NS(=O)(=O)c4c(cccc4F)F -Ruxolitinib,c1c[nH]c2c1c(ncn2)c3cnn(c3)[C@H](CC#N)C4CCCC4 -Tofacitinib,C[C@@H]1CCN(C[C@@H]1N(C)C2=NC=NC3=C2C=CN3)C(=O)CC#N -Cabozantinib,COc1cc2c(ccnc2cc1OC)Oc3ccc(cc3)NC(=O)C4(CC4)C(=O)Nc5ccc(cc5)F -Ibrutinib,C=CC(=O)N1CCC[C@H](C1)n2c3c(c(n2)c4ccc(cc4)Oc5ccccc5)c(ncn3)N -Trametinib,CC1=C2C(=C(N(C1=O)C)NC3=C(C=C(C=C3)I)F)C(=O)N(C(=O)N2C4=CC=CC(=C4)NC(=O)C)C5CC5 -Idelalisib,CC[C@@H](C1=NC2=C(C(=CC=C2)F)C(=O)N1C3=CC=CC=C3)NC4=C5C(=NC=N5)N=CN4 -Lenvatinib,COC1=CC2=NC=CC(=C2C=C1C(=O)N)OC3=CC(=C(C=C3)NC(=O)NC4CC4)Cl -Palbociclib,Cc1c2cnc(nc2n(c(=O)c1C(=O)C)C3CCCC3)Nc4ccc(cn4)N5CCNCC5 +Bosutinib,C1=C(C(=CC2=C1C(=C(C=N2)C#N)NC3=C(C=C(C(=C3)OC)Cl)Cl)OCCCN4CCN(CC4)C)OC \ No newline at end of file diff --git a/perses/data/clinical-kinase-inhibitors.xml b/perses/data/clinical-kinase-inhibitors.xml new file mode 100644 index 000000000..6edf633d1 --- /dev/null +++ b/perses/data/clinical-kinase-inhibitors.xml @@ -0,0 +1,574 @@ + + + 2018-11-21 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/perses/dispersed/feptasks.py b/perses/dispersed/feptasks.py index 50068e197..ae7003753 100644 --- a/perses/dispersed/feptasks.py +++ b/perses/dispersed/feptasks.py @@ -18,7 +18,6 @@ "lambda_step" : 0.0 }) - import openmmtools.mcmc as mcmc import openmmtools.integrators as integrators import openmmtools.states as states @@ -719,6 +718,7 @@ def compute_reduced_potential(thermodynamic_state: states.ThermodynamicState, sa else: context, integrator = cache.global_context_cache.get_context(thermodynamic_state) sampler_state.apply_to_context(context, ignore_velocities=True) + return thermodynamic_state.reduced_potential(context) def write_nonequilibrium_trajectory(nonequilibrium_result: NonequilibriumResult, nonequilibrium_trajectory: md.Trajectory, trajectory_filename: str) -> float: diff --git a/perses/rjmc/geometry.py b/perses/rjmc/geometry.py index 44d73e411..d75f46ebd 100644 --- a/perses/rjmc/geometry.py +++ b/perses/rjmc/geometry.py @@ -4,20 +4,25 @@ """ import parmed import simtk.unit as units -import logging import numpy as np import copy -#from perses.rjmc import coordinate_numba +from perses.rjmc import coordinate_numba import simtk.openmm as openmm import collections import openeye.oechem as oechem import openeye.oeomega as oeomega import simtk.openmm.app as app +import tempfile +import itertools +from openmoltools import forcefield_generators +from perses.tests import utils +import os import time import logging from perses.storage import NetCDFStorage, NetCDFStorageView _logger = logging.getLogger("geometry") + class GeometryEngine(object): """ This is the base class for the geometry engine. @@ -48,7 +53,7 @@ def propose(self, top_proposal, current_positions, beta): new_positions : [n, 3] ndarray The new positions of the system """ - return np.array([0.0,0.0,0.0]) + return np.array([0.0, 0.0, 0.0]) def logp_reverse(self, top_proposal, new_coordinates, old_coordinates, beta): """ @@ -88,9 +93,9 @@ class FFAllAngleGeometryEngine(GeometryEngine): """ def __init__(self, metadata=None, use_sterics=False, n_torsion_divisions=180, verbose=True, storage=None): self._metadata = metadata - self.write_proposal_pdb = False # if True, will write PDB for sequential atom placements - self.pdb_filename_prefix = 'geometry-proposal' # PDB file prefix for writing sequential atom placements - self.nproposed = 0 # number of times self.propose() has been called + self.write_proposal_pdb = False # if True, will write PDB for sequential atom placements + self.pdb_filename_prefix = 'geometry-proposal' # PDB file prefix for writing sequential atom placements + self.nproposed = 0 # number of times self.propose() has been called self._energy_time = 0.0 self._torsion_coordinate_time = 0.0 self._position_set_time = 0.0 @@ -130,7 +135,6 @@ def propose(self, top_proposal, current_positions, beta): self.nproposed += 1 return new_positions, logp_proposal - def logp_reverse(self, top_proposal, new_coordinates, old_coordinates, beta): """ Calculate the logp for the given geometry proposal @@ -155,7 +159,8 @@ def logp_reverse(self, top_proposal, new_coordinates, old_coordinates, beta): return 0.0 new_coordinates = new_coordinates.in_units_of(units.nanometers) old_coordinates = old_coordinates.in_units_of(units.nanometers) - logp_proposal, _ = self._logp_propose(top_proposal, old_coordinates, beta, new_positions=new_coordinates, direction='reverse') + logp_proposal, _ = self._logp_propose(top_proposal, old_coordinates, beta, new_positions=new_coordinates, + direction='reverse') return logp_proposal def _write_partial_pdb(self, pdbfile, topology, positions, atoms_with_positions, model_index): @@ -165,8 +170,9 @@ def _write_partial_pdb(self, pdbfile, topology, positions, atoms_with_positions, """ from simtk.openmm.app import Modeller modeller = Modeller(topology, positions) - atom_indices_with_positions = [ atom.idx for atom in atoms_with_positions ] - atoms_to_delete = [ atom for atom in modeller.topology.atoms() if (atom.index not in atom_indices_with_positions) ] + atom_indices_with_positions = [atom.idx for atom in atoms_with_positions] + atoms_to_delete = [atom for atom in modeller.topology.atoms() if + (atom.index not in atom_indices_with_positions)] modeller.delete(atoms_to_delete) pdbfile.write('MODEL %5d\n' % model_index) @@ -207,26 +213,34 @@ def _logp_propose(self, top_proposal, old_positions, beta, new_positions=None, d proposal_order_tool = ProposalOrderTools(top_proposal) proposal_order_time = time.time() - initial_time growth_parameter_name = 'growth_stage' - if direction=="forward": + if direction == "forward": forward_init = time.time() atom_proposal_order, logp_choice = proposal_order_tool.determine_proposal_order(direction='forward') proposal_order_forward = time.time() - forward_init structure = parmed.openmm.load_topology(top_proposal.new_topology, top_proposal.new_system) - #find and copy known positions + # find and copy known positions atoms_with_positions = [structure.atoms[atom_idx] for atom_idx in top_proposal.new_to_old_atom_map.keys()] new_positions = self._copy_positions(atoms_with_positions, top_proposal, old_positions) system_init = time.time() - growth_system_generator = GeometrySystemGenerator(top_proposal.new_system, atom_proposal_order.keys(), growth_parameter_name, reference_topology=top_proposal.new_topology, use_sterics=self.use_sterics) + growth_system_generator = GeometrySystemGenerator(top_proposal.new_system, atom_proposal_order.keys(), + growth_parameter_name, + reference_topology=top_proposal.new_topology, + reference_state_key=top_proposal.new_chemical_state_key, + use_sterics=self.use_sterics) growth_system = growth_system_generator.get_modified_system() growth_system_time = time.time() - system_init - elif direction=='reverse': + elif direction == 'reverse': if new_positions is None: raise ValueError("For reverse proposals, new_positions must not be none.") atom_proposal_order, logp_choice = proposal_order_tool.determine_proposal_order(direction='reverse') structure = parmed.openmm.load_topology(top_proposal.old_topology, top_proposal.old_system) atoms_with_positions = [structure.atoms[atom_idx] for atom_idx in top_proposal.old_to_new_atom_map.keys()] - growth_system_generator = GeometrySystemGenerator(top_proposal.old_system, atom_proposal_order.keys(), growth_parameter_name, reference_topology=top_proposal.old_topology, use_sterics=self.use_sterics) + growth_system_generator = GeometrySystemGenerator(top_proposal.old_system, atom_proposal_order.keys(), + growth_parameter_name, + reference_topology=top_proposal.old_topology, + reference_state_key=top_proposal.old_chemical_state_key, + use_sterics=self.use_sterics) growth_system = growth_system_generator.get_modified_system() else: raise ValueError("Parameter 'direction' must be forward or reverse") @@ -234,14 +248,13 @@ def _logp_propose(self, top_proposal, old_positions, beta, new_positions=None, d logp_proposal = logp_choice if self._storage: self._storage.write_object("{}_proposal_order".format(direction), proposal_order_tool, iteration=self.nproposed) - if self.use_sterics: platform_name = 'CPU' else: platform_name = 'Reference' platform = openmm.Platform.getPlatformByName(platform_name) - integrator = openmm.VerletIntegrator(1*units.femtoseconds) + integrator = openmm.VerletIntegrator(1 * units.femtoseconds) context = openmm.Context(growth_system, integrator, platform) growth_system_generator.set_growth_parameter_index(len(atom_proposal_order.keys())+1, context) growth_parameter_value = 1 @@ -255,45 +268,48 @@ def _logp_propose(self, top_proposal, old_positions, beta, new_positions=None, d bond_atom = torsion.atom2 angle_atom = torsion.atom3 torsion_atom = torsion.atom4 - if self.verbose: _logger.info("Proposing atom %s from torsion %s" %(str(atom), str(torsion))) + if self.verbose: _logger.info("Proposing atom %s from torsion %s" % (str(atom), str(torsion))) if atom != torsion.atom1: raise Exception('atom != torsion.atom1') - #get internal coordinates if direction is reverse - if direction=='reverse': + # get internal coordinates if direction is reverse + if direction == 'reverse': atom_coords = old_positions[atom.idx] bond_coords = old_positions[bond_atom.idx] angle_coords = old_positions[angle_atom.idx] torsion_coords = old_positions[torsion_atom.idx] - internal_coordinates, detJ = self._cartesian_to_internal(atom_coords, bond_coords, angle_coords, torsion_coords) - r = internal_coordinates[0]*atom_coords.unit - theta = internal_coordinates[1]*units.radian - phi = internal_coordinates[2]*units.radian + internal_coordinates, detJ = self._cartesian_to_internal(atom_coords, bond_coords, angle_coords, + torsion_coords) + r = internal_coordinates[0] * atom_coords.unit + theta = internal_coordinates[1] * units.radian + phi = internal_coordinates[2] * units.radian bond = self._get_relevant_bond(atom, bond_atom) if bond is not None: - if direction=='forward': + if direction == 'forward': r = self._propose_bond(bond, beta) bond_k = bond.type.k - sigma_r = units.sqrt(1/(beta*bond_k)) - logZ_r = np.log((np.sqrt(2*np.pi)*(sigma_r.value_in_unit(units.angstrom)))) + sigma_r = units.sqrt(1 / (beta * bond_k)) + logZ_r = np.log((np.sqrt(2 * np.pi) * (sigma_r.value_in_unit(units.angstrom)))) logp_r = self._bond_logq(r, bond, beta) - logZ_r else: if direction == 'forward': constraint = self._get_bond_constraint(atom, bond_atom, top_proposal.new_system) if constraint is None: - raise ValueError("Structure contains a topological bond [%s - %s] with no constraint or bond information." % (str(atom), str(bond_atom))) - r = constraint #set bond length to exactly constraint + raise ValueError( + "Structure contains a topological bond [%s - %s] with no constraint or bond information." % ( + str(atom), str(bond_atom))) + r = constraint # set bond length to exactly constraint logp_r = 0.0 - #propose an angle and calculate its probability + # propose an angle and calculate its probability angle = self._get_relevant_angle(atom, bond_atom, angle_atom) - if direction=='forward': + if direction == 'forward': theta = self._propose_angle(angle, beta) angle_k = angle.type.k - sigma_theta = units.sqrt(1/(beta*angle_k)) - logZ_theta = np.log((np.sqrt(2*np.pi)*(sigma_theta.value_in_unit(units.radians)))) + sigma_theta = units.sqrt(1 / (beta * angle_k)) + logZ_theta = np.log((np.sqrt(2 * np.pi) * (sigma_theta.value_in_unit(units.radians)))) logp_theta = self._angle_logq(theta, angle, beta) - logZ_theta #propose a torsion angle and calcualate its probability @@ -315,7 +331,6 @@ def _logp_propose(self, top_proposal, old_positions, beta, new_positions=None, d logp_r, logp_theta, logp_phi, np.log(detJ)]) atom_placements.append(atom_placement_array) - logp_proposal += logp_r + logp_theta + logp_phi + np.log(detJ) growth_parameter_value += 1 @@ -336,7 +351,7 @@ def _logp_propose(self, top_proposal, old_positions, beta, new_positions=None, d return logp_proposal, new_positions @staticmethod - def _oemol_from_residue(res, verbose=True): + def oemol_from_residue(res, verbose=False): """ Get an OEMol from a residue, even if that residue is polymeric. In the latter case, external bonds @@ -354,49 +369,36 @@ def _oemol_from_residue(res, verbose=True): oemol : openeye.oechem.OEMol an oemol representation of the residue with topology indices """ - # TODO: This seems to be broken. Can we fix it? - from openmoltools.forcefield_generators import generateOEMolFromTopologyResidue external_bonds = list(res.external_bonds()) for bond in external_bonds: - if verbose: print(bond) + if verbose: print("external bond: ", bond) new_atoms = {} - highest_index = 0 if external_bonds: new_topology = app.Topology() new_chain = new_topology.addChain(0) - new_res = new_topology.addResidue("new_res", new_chain) + new_res = new_topology.addResidue(res.name, new_chain) for atom in res.atoms(): - new_atom = new_topology.addAtom(atom.name, atom.element, new_res, atom.id) - new_atom.index = atom.index + new_atom = new_topology.addAtom(atom.name, atom.element, new_res) + new_atom.topology_index = atom.index # Save the atom index from the input topology new_atoms[atom] = new_atom - highest_index = max(highest_index, atom.index) for bond in res.internal_bonds(): - new_topology.addBond(new_atoms[bond[0]], new_atoms[bond[1]]) + new_bond = new_topology.addBond(new_atoms[bond[0]], new_atoms[bond[1]]) for bond in res.external_bonds(): - internal_atom = [atom for atom in bond if atom.residue==res][0] - if verbose: - print('internal atom') - print(internal_atom) - highest_index += 1 - if internal_atom.name=='N': + internal_atom = bond[0] if bond[0].residue == res else bond[1] + if verbose: print('internal atom: ', internal_atom) + if internal_atom.name == 'N': if verbose: print('Adding H to N') - new_atom = new_topology.addAtom("H2", app.Element.getByAtomicNumber(1), new_res, -1) - new_atom.index = -1 + new_atom = new_topology.addAtom("H2", app.Element.getByAtomicNumber(1), new_res) new_topology.addBond(new_atoms[internal_atom], new_atom) - if internal_atom.name=='C': + if internal_atom.name == 'C': if verbose: print('Adding OH to C') - new_atom = new_topology.addAtom("O2", app.Element.getByAtomicNumber(8), new_res, -1) - new_atom.index = -1 + new_atom = new_topology.addAtom("O2", app.Element.getByAtomicNumber(8), new_res) new_topology.addBond(new_atoms[internal_atom], new_atom) - highest_index += 1 - new_hydrogen = new_topology.addAtom("HO", app.Element.getByAtomicNumber(1), new_res, -1) - new_hydrogen.index = -1 + new_hydrogen = new_topology.addAtom("HO", app.Element.getByAtomicNumber(1), new_res) new_topology.addBond(new_hydrogen, new_atom) - res_to_use = new_res - external_bonds = list(res_to_use.external_bonds()) + oemol = forcefield_generators.generateOEMolFromTopologyResidue(new_res) else: - res_to_use = res - oemol = generateOEMolFromTopologyResidue(res_to_use, geometry=False) + oemol = forcefield_generators.generateOEMolFromTopologyResidue(res) oechem.OEAddExplicitHydrogens(oemol) return oemol @@ -422,7 +424,7 @@ def _copy_positions(self, atoms_with_positions, top_proposal, current_positions) new_positions = units.Quantity(np.random.random([top_proposal.n_atoms_new, 3]), unit=units.nanometers) current_positions = current_positions.in_units_of(units.nanometers) - #copy positions + # copy positions for atom in atoms_with_positions: old_index = top_proposal.new_to_old_atom_map[atom.idx] new_positions[atom.idx] = current_positions[old_index] @@ -481,7 +483,7 @@ def _get_bond_constraint(self, atom1, atom2, system): for i in range(n_constraints): constraint_parameters = system.getConstraintParameters(i) constraint_atoms = set(constraint_parameters[:2]) - if len(constraint_atoms.intersection(atom_indices))==2: + if len(constraint_atoms.intersection(atom_indices)) == 2: constraint = constraint_parameters[2] return constraint @@ -524,10 +526,10 @@ def _add_bond_units(self, bond): ------- """ - if type(bond.type.k)==units.Quantity: + if type(bond.type.k) == units.Quantity: return bond bond.type.req = units.Quantity(bond.type.req, unit=units.angstrom) - bond.type.k = units.Quantity(2.0*bond.type.k, unit=units.kilocalorie_per_mole/units.angstrom**2) + bond.type.k = units.Quantity(2.0 * bond.type.k, unit=units.kilocalorie_per_mole / units.angstrom ** 2) return bond def _add_angle_units(self, angle): @@ -544,10 +546,10 @@ def _add_angle_units(self, angle): angle_with_units : parmed angle The angle, but with units on its parameters """ - if type(angle.type.k)==units.Quantity: + if type(angle.type.k) == units.Quantity: return angle angle.type.theteq = units.Quantity(angle.type.theteq, unit=units.degree) - angle.type.k = units.Quantity(2.0*angle.type.k, unit=units.kilocalorie_per_mole/units.radian**2) + angle.type.k = units.Quantity(2.0 * angle.type.k, unit=units.kilocalorie_per_mole / units.radian ** 2) return angle def _add_torsion_units(self, torsion): @@ -574,21 +576,21 @@ def _rotation_matrix(self, axis, angle): """ This method produces a rotation matrix given an axis and an angle. """ - axis = axis/np.linalg.norm(axis) + axis = axis / np.linalg.norm(axis) axis_squared = np.square(axis) cos_angle = np.cos(angle) sin_angle = np.sin(angle) - rot_matrix_row_one = np.array([cos_angle+axis_squared[0]*(1-cos_angle), - axis[0]*axis[1]*(1-cos_angle) - axis[2]*sin_angle, - axis[0]*axis[2]*(1-cos_angle)+axis[1]*sin_angle]) + rot_matrix_row_one = np.array([cos_angle + axis_squared[0] * (1 - cos_angle), + axis[0] * axis[1] * (1 - cos_angle) - axis[2] * sin_angle, + axis[0] * axis[2] * (1 - cos_angle) + axis[1] * sin_angle]) - rot_matrix_row_two = np.array([axis[1]*axis[0]*(1-cos_angle)+axis[2]*sin_angle, - cos_angle+axis_squared[1]*(1-cos_angle), - axis[1]*axis[2]*(1-cos_angle) - axis[0]*sin_angle]) + rot_matrix_row_two = np.array([axis[1] * axis[0] * (1 - cos_angle) + axis[2] * sin_angle, + cos_angle + axis_squared[1] * (1 - cos_angle), + axis[1] * axis[2] * (1 - cos_angle) - axis[0] * sin_angle]) - rot_matrix_row_three = np.array([axis[2]*axis[0]*(1-cos_angle)-axis[1]*sin_angle, - axis[2]*axis[1]*(1-cos_angle)+axis[0]*sin_angle, - cos_angle+axis_squared[2]*(1-cos_angle)]) + rot_matrix_row_three = np.array([axis[2] * axis[0] * (1 - cos_angle) - axis[1] * sin_angle, + axis[2] * axis[1] * (1 - cos_angle) + axis[0] * sin_angle, + cos_angle + axis_squared[2] * (1 - cos_angle)]) rotation_matrix = np.array([rot_matrix_row_one, rot_matrix_row_two, rot_matrix_row_three]) return rotation_matrix @@ -598,16 +600,16 @@ def _cartesian_to_internal(self, atom_position, bond_position, angle_position, t Cartesian to internal function """ from perses.rjmc import coordinate_numba - #ensure we have the correct units, then remove them + # ensure we have the correct units, then remove them atom_position = atom_position.value_in_unit(units.nanometers).astype(np.float64) bond_position = bond_position.value_in_unit(units.nanometers).astype(np.float64) angle_position = angle_position.value_in_unit(units.nanometers).astype(np.float64) torsion_position = torsion_position.value_in_unit(units.nanometers).astype(np.float64) - internal_coords = coordinate_numba.cartesian_to_internal(atom_position, bond_position, angle_position, torsion_position) - + internal_coords = coordinate_numba.cartesian_to_internal(atom_position, bond_position, angle_position, + torsion_position) - return internal_coords, np.abs(internal_coords[0]**2*np.sin(internal_coords[1])) + return internal_coords, np.abs(internal_coords[0] ** 2 * np.sin(internal_coords[1])) def _internal_to_cartesian(self, bond_position, angle_position, torsion_position, r, theta, phi): """ @@ -620,10 +622,11 @@ def _internal_to_cartesian(self, bond_position, angle_position, torsion_position bond_position = bond_position.value_in_unit(units.nanometers).astype(np.float64) angle_position = angle_position.value_in_unit(units.nanometers).astype(np.float64) torsion_position = torsion_position.value_in_unit(units.nanometers).astype(np.float64) - xyz = coordinate_numba.internal_to_cartesian(bond_position, angle_position, torsion_position, np.array([r, theta, phi], dtype=np.float64)) + xyz = coordinate_numba.internal_to_cartesian(bond_position, angle_position, torsion_position, + np.array([r, theta, phi], dtype=np.float64)) xyz = units.Quantity(xyz, unit=units.nanometers) - return xyz, np.abs(r**2*np.sin(theta)) + return xyz, np.abs(r ** 2 * np.sin(theta)) def _bond_logq(self, r, bond, beta): """ @@ -642,7 +645,7 @@ def _bond_logq(self, r, bond, beta): """ k_eq = bond.type.k r0 = bond.type.req - logq = -beta*0.5*k_eq*(r-r0)**2 + logq = -beta * 0.5 * k_eq * (r - r0) ** 2 return logq def _angle_logq(self, theta, angle, beta): @@ -660,7 +663,7 @@ def _angle_logq(self, theta, angle, beta): """ k_eq = angle.type.k theta0 = angle.type.theteq - logq = -beta*k_eq*0.5*(theta-theta0)**2 + logq = -beta * k_eq * 0.5 * (theta - theta0) ** 2 return logq def _propose_bond(self, bond, beta): @@ -669,8 +672,8 @@ def _propose_bond(self, bond, beta): """ r0 = bond.type.req k = bond.type.k - sigma_r = units.sqrt(1.0/(beta*k)) - r = sigma_r*np.random.randn() + r0 + sigma_r = units.sqrt(1.0 / (beta * k)) + r = sigma_r * np.random.randn() + r0 return r def _propose_angle(self, angle, beta): @@ -679,8 +682,8 @@ def _propose_angle(self, angle, beta): """ theta0 = angle.type.theteq k = angle.type.k - sigma_theta = units.sqrt(1.0/(beta*k)) - theta = sigma_theta*np.random.randn() + theta0 + sigma_theta = units.sqrt(1.0 / (beta * k)) + theta = sigma_theta * np.random.randn() + theta0 return theta def _torsion_scan(self, torsion, positions, r, theta, n_divisions=360): @@ -714,15 +717,17 @@ def _torsion_scan(self, torsion, positions, r, theta, n_divisions=360): bond_atom = torsion.atom2 angle_atom = torsion.atom3 torsion_atom = torsion.atom4 - phis = np.arange(-np.pi, +np.pi, (2.0*np.pi)/n_divisions) # Can't use units here. - xyzs = coordinate_numba.torsion_scan(positions_copy[bond_atom.idx], positions_copy[angle_atom.idx], positions_copy[torsion_atom.idx], np.array([r, theta, 0.0]), phis) - xyzs_quantity = units.Quantity(xyzs, unit=units.nanometers) #have to put the units back now + phis = np.arange(-np.pi, +np.pi, (2.0 * np.pi) / n_divisions) # Can't use units here. + xyzs = coordinate_numba.torsion_scan(positions_copy[bond_atom.idx], positions_copy[angle_atom.idx], + positions_copy[torsion_atom.idx], np.array([r, theta, 0.0]), phis) + xyzs_quantity = units.Quantity(xyzs, unit=units.nanometers) # have to put the units back now phis = units.Quantity(phis, unit=units.radians) torsion_scan_time = time.time() - torsion_scan_init self._torsion_coordinate_time += torsion_scan_time return xyzs_quantity, phis - def _torsion_log_probability_mass_function(self, growth_context, torsion, positions, r, theta, beta, n_divisions=360): + def _torsion_log_probability_mass_function(self, growth_context, torsion, positions, r, theta, beta, + n_divisions=360): """ Calculate the torsion logp pmf using OpenMM @@ -756,7 +761,7 @@ def _torsion_log_probability_mass_function(self, growth_context, torsion, positi xyzs = xyzs.value_in_unit_system(units.md_unit_system) positions = positions.value_in_unit_system(units.md_unit_system) for i, xyz in enumerate(xyzs): - positions[atom_idx,:] = xyz + positions[atom_idx, :] = xyz position_set = time.time() growth_context.setPositions(positions) position_time = time.time() - position_set @@ -766,7 +771,7 @@ def _torsion_log_probability_mass_function(self, growth_context, torsion, positi potential_energy = state.getPotentialEnergy() energy_computation_time = time.time() - energy_computation_init self._energy_time += energy_computation_time - logq_i = -beta*potential_energy + logq_i = -beta * potential_energy logq[i] = logq_i if np.sum(np.isnan(logq)) == n_divisions: @@ -780,11 +785,13 @@ def _torsion_log_probability_mass_function(self, growth_context, torsion, positi if hasattr(self, '_proposal_pdbfile'): # Write proposal probabilities to PDB file as B-factors for inert atoms f_i = -logp_torsions - f_i -= f_i.min() # minimum free energy is zero + f_i -= f_i.min() # minimum free energy is zero f_i[f_i > 999.99] = 999.99 self._proposal_pdbfile.write('MODEL\n') for i, xyz in enumerate(xyzs): - self._proposal_pdbfile.write('ATOM %5d %4s %3s %c%4d %8.3f%8.3f%8.3f%6.2f%6.2f\n' % (i+1, ' Ar ', 'Ar ', ' ', atom_idx+1, 10*xyz[0], 10*xyz[1], 10*xyz[2], np.exp(logp_torsions[i]), f_i[i])) + self._proposal_pdbfile.write('ATOM %5d %4s %3s %c%4d %8.3f%8.3f%8.3f%6.2f%6.2f\n' % ( + i + 1, ' Ar ', 'Ar ', ' ', atom_idx + 1, 10 * xyz[0], 10 * xyz[1], 10 * xyz[2], + np.exp(logp_torsions[i]), f_i[i])) self._proposal_pdbfile.write('TER\n') self._proposal_pdbfile.write('ENDMDL\n') # TODO: Write proposal PMFs to storage @@ -794,7 +801,6 @@ def _torsion_log_probability_mass_function(self, growth_context, torsion, positi return logp_torsions, phis - def _propose_torsion(self, growth_context, torsion, positions, r, theta, beta, n_divisions=360): """ Propose a torsion using OpenMM @@ -823,13 +829,15 @@ def _propose_torsion(self, growth_context, torsion, positions, r, theta, beta, n logp : float The log probability of the proposal. """ - logp_torsions, phis = self._torsion_log_probability_mass_function(growth_context, torsion, positions, r, theta, beta, n_divisions=n_divisions) - division = units.Quantity(2*np.pi/n_divisions, unit=units.radian) + logp_torsions, phis = self._torsion_log_probability_mass_function(growth_context, torsion, positions, r, theta, + beta, n_divisions=n_divisions) + division = units.Quantity(2 * np.pi / n_divisions, unit=units.radian) phi_median_idx = np.random.choice(range(len(phis)), p=np.exp(logp_torsions)) - phi_min = phis[phi_median_idx] - division/2.0 - phi_max = phis[phi_median_idx] + division/2.0 + phi_min = phis[phi_median_idx] - division / 2.0 + phi_max = phis[phi_median_idx] + division / 2.0 phi = np.random.uniform(phi_min.value_in_unit(units.radian), phi_max.value_in_unit(units.radian)) - logp = logp_torsions[phi_median_idx] - np.log(2*np.pi / n_divisions) # convert from probability mass function to probability density function so that sum(dphi*p) = 1, with dphi = (2*pi)/n_divisions + logp = logp_torsions[phi_median_idx] - np.log( + 2 * np.pi / n_divisions) # convert from probability mass function to probability density function so that sum(dphi*p) = 1, with dphi = (2*pi)/n_divisions return units.Quantity(phi, unit=units.radian), logp def _torsion_logp(self, growth_context, torsion, positions, r, theta, phi, beta, n_divisions=360): @@ -860,11 +868,14 @@ def _torsion_logp(self, growth_context, torsion, positions, r, theta, phi, beta, torsion_logp : float the logp of this torsion """ - logp_torsions, phis = self._torsion_log_probability_mass_function(growth_context, torsion, positions, r, theta, beta, n_divisions=n_divisions) - phi_idx = np.argmin(np.abs(phi-phis)) # WARNING: This assumes both phi and phis have domain of [-pi,+pi) - torsion_logp = logp_torsions[phi_idx] - np.log(2*np.pi / n_divisions) # convert from probability mass function to probability density function so that sum(dphi*p) = 1, with dphi = (2*pi)/n_divisions. + logp_torsions, phis = self._torsion_log_probability_mass_function(growth_context, torsion, positions, r, theta, + beta, n_divisions=n_divisions) + phi_idx = np.argmin(np.abs(phi - phis)) # WARNING: This assumes both phi and phis have domain of [-pi,+pi) + torsion_logp = logp_torsions[phi_idx] - np.log( + 2 * np.pi / n_divisions) # convert from probability mass function to probability density function so that sum(dphi*p) = 1, with dphi = (2*pi)/n_divisions. return torsion_logp + class PredAtomTopologyIndex(oechem.OEUnaryAtomPred): def __init__(self, topology_index): @@ -919,10 +930,10 @@ def __init__(self, growth_context, atom_torsions, initial_positions, beta, n_par self._n_new_atoms = len(self._atom_torsions) self._initial_positions = initial_positions self._new_indices = [atom.idx for atom in self._atom_torsions.keys()] - #create a matrix for log weights (n_particles, n_stages) + # create a matrix for log weights (n_particles, n_stages) self._Wij = np.zeros([self._n_particles, self._n_new_atoms]) - #create an array for positions--only store new positions to avoid - #consuming way too much memory + # create an array for positions--only store new positions to avoid + # consuming way too much memory self._new_positions = np.zeros([self._n_particles, self._n_new_atoms, 3]) self._generate_configurations() @@ -937,8 +948,9 @@ def _internal_to_cartesian(self, bond_position, angle_position, torsion_position bond_position = bond_position.astype(np.float64) angle_position = angle_position.astype(np.float64) torsion_position = torsion_position.astype(np.float64) - xyz = coordinate_numba.internal_to_cartesian(bond_position, angle_position, torsion_position, np.array([r, theta, phi], dtype=np.float64)) - return xyz, r**2*np.sin(theta) + xyz = coordinate_numba.internal_to_cartesian(bond_position, angle_position, torsion_position, + np.array([r, theta, phi], dtype=np.float64)) + return xyz, r ** 2 * np.sin(theta) def _get_bond_constraint(self, atom1, atom2, system): """ @@ -965,7 +977,7 @@ def _get_bond_constraint(self, atom1, atom2, system): for i in range(n_constraints): constraint_parameters = system.getConstraintParameters(i) constraint_atoms = set(constraint_parameters[:2]) - if len(constraint_atoms.intersection(atom_indices))==2: + if len(constraint_atoms.intersection(atom_indices)) == 2: constraint = constraint_parameters[2] return constraint @@ -989,7 +1001,7 @@ def _log_unnormalized_target(self, new_positions): self._growth_context.setParameter('growth_stage', self._growth_stage) self._growth_context.setPositions(positions) energy = self._growth_context.getState(getEnergy=True).getPotentialEnergy() - return -self._beta*energy + return -self._beta * energy def _get_relevant_angle(self, atom1, atom2, atom3): """ @@ -1019,10 +1031,10 @@ def _add_bond_units(self, bond): ------- """ - if type(bond.type.k)==units.Quantity: + if type(bond.type.k) == units.Quantity: return bond bond.type.req = units.Quantity(bond.type.req, unit=units.angstrom) - bond.type.k = units.Quantity(2.0*bond.type.k, unit=units.kilocalorie_per_mole/units.angstrom**2) + bond.type.k = units.Quantity(2.0 * bond.type.k, unit=units.kilocalorie_per_mole / units.angstrom ** 2) return bond def _add_angle_units(self, angle): @@ -1039,10 +1051,10 @@ def _add_angle_units(self, angle): angle_with_units : parmed angle The angle, but with units on its parameters """ - if type(angle.type.k)==units.Quantity: + if type(angle.type.k) == units.Quantity: return angle angle.type.theteq = units.Quantity(angle.type.theteq, unit=units.degree) - angle.type.k = units.Quantity(2.0*angle.type.k, unit=units.kilocalorie_per_mole/units.radian**2) + angle.type.k = units.Quantity(2.0 * angle.type.k, unit=units.kilocalorie_per_mole / units.radian ** 2) return angle def _get_relevant_bond(self, atom1, atom2): @@ -1090,7 +1102,7 @@ def _bond_logq(self, r, bond): """ k_eq = bond.type.k r0 = bond.type.req - logq = -self._beta*0.5*k_eq*(r-r0)**2 + logq = -self._beta * 0.5 * k_eq * (r - r0) ** 2 return logq def _angle_logq(self, theta, angle): @@ -1108,7 +1120,7 @@ def _angle_logq(self, theta, angle): """ k_eq = angle.type.k theta0 = angle.type.theteq - logq = -self._beta*k_eq*0.5*(theta-theta0)**2 + logq = -self._beta * k_eq * 0.5 * (theta - theta0) ** 2 return logq def _propose_bond(self, bond): @@ -1117,8 +1129,8 @@ def _propose_bond(self, bond): """ r0 = bond.type.req k = bond.type.k - sigma_r = units.sqrt(1.0/(self._beta*k)) - r = sigma_r*np.random.randn() + r0 + sigma_r = units.sqrt(1.0 / (self._beta * k)) + r = sigma_r * np.random.randn() + r0 return r def _propose_angle(self, angle): @@ -1127,8 +1139,8 @@ def _propose_angle(self, angle): """ theta0 = angle.type.theteq k = angle.type.k - sigma_theta = units.sqrt(1.0/(self._beta*k)) - theta = sigma_theta*np.random.randn() + theta0 + sigma_theta = units.sqrt(1.0 / (self._beta * k)) + theta = sigma_theta * np.random.randn() + theta0 return theta def _propose_atom(self, atom, torsion, new_positions): @@ -1167,29 +1179,30 @@ def _propose_atom(self, atom, torsion, new_positions): if bond is not None: r = self._propose_bond(bond) bond_k = bond.type.k - sigma_r = units.sqrt(1/(self._beta*bond_k)) - logZ_r = np.log((np.sqrt(2*np.pi)*(sigma_r/units.angstroms))) # CHECK DOMAIN AND UNITS + sigma_r = units.sqrt(1 / (self._beta * bond_k)) + logZ_r = np.log((np.sqrt(2 * np.pi) * (sigma_r / units.angstroms))) # CHECK DOMAIN AND UNITS logp_r = self._bond_logq(r, bond) - logZ_r else: constraint = self._get_bond_constraint(atom, bond_atom, self._system) - r = constraint #set bond length to exactly constraint + r = constraint # set bond length to exactly constraint logp_r = 0.0 - #propose an angle and calculate its probability + # propose an angle and calculate its probability angle = self._get_relevant_angle(atom, bond_atom, angle_atom) theta = self._propose_angle(angle) angle_k = angle.type.k - sigma_theta = units.sqrt(1/(self._beta*angle_k)) - logZ_theta = np.log((np.sqrt(2*np.pi)*(sigma_theta/units.radians))) # CHECK DOMAIN AND UNITS + sigma_theta = units.sqrt(1 / (self._beta * angle_k)) + logZ_theta = np.log((np.sqrt(2 * np.pi) * (sigma_theta / units.radians))) # CHECK DOMAIN AND UNITS logp_theta = self._angle_logq(theta, angle) - logZ_theta - #propose a torsion angle uniformly (this can be dramatically improved) + # propose a torsion angle uniformly (this can be dramatically improved) phi = np.random.uniform(-np.pi, np.pi) - logp_phi = -np.log(2*np.pi) + logp_phi = -np.log(2 * np.pi) - #get the new cartesian coordinates and detJ: - new_xyz, detJ = self._internal_to_cartesian(positions[bond_atom.idx], positions[angle_atom.idx], positions[torsion_atom.idx], r, theta, phi) - #accumulate logp + # get the new cartesian coordinates and detJ: + new_xyz, detJ = self._internal_to_cartesian(positions[bond_atom.idx], positions[angle_atom.idx], + positions[torsion_atom.idx], r, theta, phi) + # accumulate logp logp_proposal = logp_r + logp_theta + logp_phi + np.log(np.abs(detJ)) return new_xyz, logp_proposal @@ -1199,10 +1212,10 @@ def _resample(self): Resample from the current set of weights and positions. """ particle_indices = range(self._n_particles) - new_indices = np.random.choice(particle_indices, size=self._n_particles, p=self._Wij[:, self._growth_stage-1]) + new_indices = np.random.choice(particle_indices, size=self._n_particles, p=self._Wij[:, self._growth_stage - 1]) for particle_index in particle_indices: self._new_positions[particle_index, :, :] = self._new_positions[new_indices[particle_index], :, :] - self._Wij[:, self._growth_stage-1] = -np.log(self._n_particles) #set particle weights to be equal + self._Wij[:, self._growth_stage - 1] = -np.log(self._n_particles) # set particle weights to be equal def _generate_configurations(self): """ @@ -1210,16 +1223,17 @@ def _generate_configurations(self): from p(x_new | x_common). """ for i, atom_torsion in enumerate(self._atom_torsions.items()): - self._growth_stage = i+1 + self._growth_stage = i + 1 for particle_index in range(self._n_particles): proposed_xyz, logp_proposal = self._propose_atom(atom_torsion[0], atom_torsion[1]) self._new_positions[particle_index, i, :] = proposed_xyz - unnormalized_log_target = self._log_unnormalized_target(self._new_positions[particle_index, :,:]) + unnormalized_log_target = self._log_unnormalized_target(self._new_positions[particle_index, :, :]) if i > 0: - self._Wij = [particle_index, i] = (unnormalized_log_target - logp_proposal) + self._Wij[particle_index, i-1] + self._Wij = [particle_index, i] = (unnormalized_log_target - logp_proposal) + self._Wij[ + particle_index, i - 1] else: self._Wij = [particle_index, i] = unnormalized_log_target - logp_proposal - sum_log_weights = np.sum(np.exp(self._Wij[:,i])) + sum_log_weights = np.sum(np.exp(self._Wij[:, i])) self._Wij -= np.log(sum_log_weights) if i % self._resample_frequency == 0 and i != 0: self._resample() @@ -1286,8 +1300,10 @@ class GeometrySystemGenerator(object): _HarmonicBondForceEnergy = "select(step({}+0.1 - growth_idx), (K/2)*(r-r0)^2, 0);" _HarmonicAngleForceEnergy = "select(step({}+0.1 - growth_idx), (K/2)*(theta-theta0)^2, 0);" _PeriodicTorsionForceEnergy = "select(step({}+0.1 - growth_idx), k*(1+cos(periodicity*theta-phase)), 0);" + _VonMisesTorsionForceEnergy = "select(step({}+0.1 - growth_idx), (1/(sigma^2))*(cos(theta-phase)), 0);" - def __init__(self, reference_system, growth_indices, parameter_name, add_extra_torsions=True, add_extra_angles=True, reference_topology=None, use_sterics=False, force_names=None, force_parameters=None, verbose=True): + def __init__(self, reference_system, growth_indices, parameter_name, add_extra_torsions=True, add_extra_angles=True, + reference_topology=None, reference_state_key=None, use_sterics=False, force_names=None, force_parameters=None, verbose=True): """ Parameters ---------- @@ -1307,10 +1323,10 @@ def __init__(self, reference_system, growth_indices, parameter_name, add_extra_t If True, will print verbose output. """ - ONE_4PI_EPS0 = 138.935456 # OpenMM constant for Coulomb interactions (openmm/platforms/reference/include/SimTKOpenMMRealType.h) in OpenMM units - # TODO: Replace this with an import from simtk.openmm.constants once these constants are available there + ONE_4PI_EPS0 = 138.935456 # OpenMM constant for Coulomb interactions (openmm/platforms/reference/include/SimTKOpenMMRealType.h) in OpenMM units + # TODO: Replace this with an import from simtk.openmm.constants once these constants are available there - default_growth_index = len(growth_indices) # default value of growth index to use in System that is returned + default_growth_index = len(growth_indices) # default value of growth index to use in System that is returned self.current_growth_index = default_growth_index # Nonbonded sterics and electrostatics. @@ -1329,17 +1345,22 @@ def __init__(self, reference_system, growth_indices, parameter_name, add_extra_t self._nonbondedExceptionEnergy += "U_exception = ONE_4PI_EPS0*chargeprod/r + 4*epsilon*x*(x-1.0); x = (sigma/r)^6;" self._nonbondedExceptionEnergy += "ONE_4PI_EPS0 = %f;" % ONE_4PI_EPS0 - self.sterics_cutoff_distance = 9.0 * units.angstroms # cutoff for sterics + self.sterics_cutoff_distance = 9.0 * units.angstroms # cutoff for sterics self.verbose = verbose + self._aminos = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', + 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] + # Get list of particle indices for new and old atoms. - new_particle_indices = [ atom.idx for atom in growth_indices ] - old_particle_indices = [idx for idx in range(reference_system.getNumParticles()) if idx not in new_particle_indices] + new_particle_indices = [atom.idx for atom in growth_indices] + old_particle_indices = [idx for idx in range(reference_system.getNumParticles()) if + idx not in new_particle_indices] - reference_forces = {reference_system.getForce(index).__class__.__name__ : reference_system.getForce(index) for index in range(reference_system.getNumForces())} + reference_forces = {reference_system.getForce(index).__class__.__name__: reference_system.getForce(index) for + index in range(reference_system.getNumForces())} growth_system = openmm.System() - #create the forces: + # create the forces: modified_bond_force = openmm.CustomBondForce(self._HarmonicBondForceEnergy.format(parameter_name)) modified_bond_force.addPerBondParameter("r0") modified_bond_force.addPerBondParameter("K") @@ -1359,40 +1380,52 @@ def __init__(self, reference_system, growth_indices, parameter_name, add_extra_t modified_torsion_force.addPerTorsionParameter("growth_idx") modified_torsion_force.addGlobalParameter(parameter_name, default_growth_index) + modified_stereochemistry_force = openmm.CustomTorsionForce(self._VonMisesTorsionForceEnergy.format(parameter_name)) + modified_stereochemistry_force.addPerTorsionParameter("phase") + modified_stereochemistry_force.addPerTorsionParameter("sigma") + modified_stereochemistry_force.addPerTorsionParameter("growth_idx") + modified_stereochemistry_force.addGlobalParameter(parameter_name, default_growth_index) + growth_system.addForce(modified_bond_force) growth_system.addForce(modified_angle_force) growth_system.addForce(modified_torsion_force) + growth_system.addForce(modified_stereochemistry_force) - #copy the particles over + # copy the particles over for i in range(reference_system.getNumParticles()): growth_system.addParticle(reference_system.getParticleMass(i)) - #copy each bond, adding the per-particle parameter as well + # copy each bond, adding the per-particle parameter as well reference_bond_force = reference_forces['HarmonicBondForce'] for bond in range(reference_bond_force.getNumBonds()): bond_parameters = reference_bond_force.getBondParameters(bond) growth_idx = self._calculate_growth_idx(bond_parameters[:2], growth_indices) - if growth_idx==0: + if growth_idx == 0: continue - modified_bond_force.addBond(bond_parameters[0], bond_parameters[1], [bond_parameters[2], bond_parameters[3], growth_idx]) + modified_bond_force.addBond(bond_parameters[0], bond_parameters[1], + [bond_parameters[2], bond_parameters[3], growth_idx]) - #copy each angle, adding the per particle parameter as well + # copy each angle, adding the per particle parameter as well reference_angle_force = reference_forces['HarmonicAngleForce'] for angle in range(reference_angle_force.getNumAngles()): angle_parameters = reference_angle_force.getAngleParameters(angle) growth_idx = self._calculate_growth_idx(angle_parameters[:3], growth_indices) - if growth_idx==0: + if growth_idx == 0: continue - modified_angle_force.addAngle(angle_parameters[0], angle_parameters[1], angle_parameters[2], [angle_parameters[3], angle_parameters[4], growth_idx]) + modified_angle_force.addAngle(angle_parameters[0], angle_parameters[1], angle_parameters[2], + [angle_parameters[3], angle_parameters[4], growth_idx]) - #copy each torsion, adding the per particle parameter as well + # copy each torsion, adding the per particle parameter as well reference_torsion_force = reference_forces['PeriodicTorsionForce'] for torsion in range(reference_torsion_force.getNumTorsions()): torsion_parameters = reference_torsion_force.getTorsionParameters(torsion) growth_idx = self._calculate_growth_idx(torsion_parameters[:4], growth_indices) - if growth_idx==0: + if growth_idx == 0: continue - modified_torsion_force.addTorsion(torsion_parameters[0], torsion_parameters[1], torsion_parameters[2], torsion_parameters[3], [torsion_parameters[4], torsion_parameters[5], torsion_parameters[6], growth_idx]) + modified_torsion_force.addTorsion(torsion_parameters[0], torsion_parameters[1], torsion_parameters[2], + torsion_parameters[3], + [torsion_parameters[4], torsion_parameters[5], torsion_parameters[6], + growth_idx]) # Add (1,4) exceptions, regardless of whether 'use_sterics' is specified, because these are part of the valence forces. if 'NonbondedForce' in reference_forces.keys(): @@ -1405,19 +1438,27 @@ def __init__(self, reference_system, growth_indices, parameter_name, add_extra_t growth_system.addForce(custom_bond_force) # Add exclusions, which are active at all times. # (1,4) exceptions are always included, since they are part of the valence terms. - #print('growth_indices:', growth_indices) + # print('growth_indices:', growth_indices) reference_nonbonded_force = reference_forces['NonbondedForce'] for exception_index in range(reference_nonbonded_force.getNumExceptions()): - [particle_index_1, particle_index_2, chargeprod, sigma, epsilon] = reference_nonbonded_force.getExceptionParameters(exception_index) - growth_idx_1 = new_particle_indices.index(particle_index_1) + 1 if particle_index_1 in new_particle_indices else 0 - growth_idx_2 = new_particle_indices.index(particle_index_2) + 1 if particle_index_2 in new_particle_indices else 0 + [particle_index_1, particle_index_2, chargeprod, sigma, + epsilon] = reference_nonbonded_force.getExceptionParameters(exception_index) + growth_idx_1 = new_particle_indices.index( + particle_index_1) + 1 if particle_index_1 in new_particle_indices else 0 + growth_idx_2 = new_particle_indices.index( + particle_index_2) + 1 if particle_index_2 in new_particle_indices else 0 growth_idx = max(growth_idx_1, growth_idx_2) # Only need to add terms that are nonzero and involve newly added atoms. - if (growth_idx > 0) and ((chargeprod.value_in_unit_system(units.md_unit_system) != 0.0) or (epsilon.value_in_unit_system(units.md_unit_system) != 0.0)): - if self.verbose: _logger.info('Adding CustomBondForce: %5d %5d : chargeprod %8.3f e^2, sigma %8.3f A, epsilon %8.3f kcal/mol, growth_idx %5d' % (particle_index_1, particle_index_2, chargeprod/units.elementary_charge**2, sigma/units.angstrom, epsilon/units.kilocalorie_per_mole, growth_idx)) - custom_bond_force.addBond(particle_index_1, particle_index_2, [chargeprod, sigma, epsilon, growth_idx]) - - #copy parameters for sterics parameters in nonbonded force + if (growth_idx > 0) and ((chargeprod.value_in_unit_system(units.md_unit_system) != 0.0) or ( + epsilon.value_in_unit_system(units.md_unit_system) != 0.0)): + if self.verbose: _logger.info( + 'Adding CustomBondForce: %5d %5d : chargeprod %8.3f e^2, sigma %8.3f A, epsilon %8.3f kcal/mol, growth_idx %5d' % ( + particle_index_1, particle_index_2, chargeprod / units.elementary_charge ** 2, + sigma / units.angstrom, epsilon / units.kilocalorie_per_mole, growth_idx)) + custom_bond_force.addBond(particle_index_1, particle_index_2, + [chargeprod, sigma, epsilon, growth_idx]) + + # copy parameters for sterics parameters in nonbonded force if 'NonbondedForce' in reference_forces.keys() and use_sterics: modified_sterics_force = openmm.CustomNonbondedForce(self._nonbondedEnergy.format(parameter_name)) modified_sterics_force.addPerParticleParameter("charge") @@ -1430,20 +1471,26 @@ def __init__(self, reference_system, growth_indices, parameter_name, add_extra_t reference_nonbonded_force = reference_forces['NonbondedForce'] if reference_nonbonded_force in [openmm.NonbondedForce.NoCutoff, openmm.NonbondedForce.CutoffNonPeriodic]: modified_sterics_force.setNonbondedMethod(openmm.CustomNonbondedForce.CutoffNonPeriodic) - elif reference_nonbonded_force in [openmm.NonbondedForce.CutoffPeriodic, openmm.NonbondedForce.PME, openmm.NonbondedForce.Ewald]: + elif reference_nonbonded_force in [openmm.NonbondedForce.CutoffPeriodic, openmm.NonbondedForce.PME, + openmm.NonbondedForce.Ewald]: modified_sterics_force.setNonbondedMethod(openmm.CustomNonbondedForce.CutoffPeriodic) modified_sterics_force.setCutoffDistance(self.sterics_cutoff_distance) # Add particle parameters. for particle_index in range(reference_nonbonded_force.getNumParticles()): [charge, sigma, epsilon] = reference_nonbonded_force.getParticleParameters(particle_index) - growth_idx = new_particle_indices.index(particle_index) + 1 if particle_index in new_particle_indices else 0 + growth_idx = new_particle_indices.index( + particle_index) + 1 if particle_index in new_particle_indices else 0 modified_sterics_force.addParticle([charge, sigma, epsilon, growth_idx]) if self.verbose and (growth_idx > 0): - _logger.info('Adding NonbondedForce particle %5d : charge %8.3f |e|, sigma %8.3f A, epsilon %8.3f kcal/mol, growth_idx %5d' % (particle_index, charge/units.elementary_charge, sigma/units.angstrom, epsilon/units.kilocalorie_per_mole, growth_idx)) + _logger.info( + 'Adding NonbondedForce particle %5d : charge %8.3f |e|, sigma %8.3f A, epsilon %8.3f kcal/mol, growth_idx %5d' % ( + particle_index, charge / units.elementary_charge, sigma / units.angstrom, + epsilon / units.kilocalorie_per_mole, growth_idx)) # Add exclusions, which are active at all times. # (1,4) exceptions are always included, since they are part of the valence terms. for exception_index in range(reference_nonbonded_force.getNumExceptions()): - [particle_index_1, particle_index_2, chargeprod, sigma, epsilon] = reference_nonbonded_force.getExceptionParameters(exception_index) + [particle_index_1, particle_index_2, chargeprod, sigma, + epsilon] = reference_nonbonded_force.getExceptionParameters(exception_index) modified_sterics_force.addExclusion(particle_index_1, particle_index_2) # Only compute interactions of new particles with all other particles # TODO: Allow inteactions to be resticted to only the residue being grown. @@ -1452,11 +1499,11 @@ def __init__(self, reference_system, growth_indices, parameter_name, add_extra_t # Add extra ring-closing torsions, if requested. if add_extra_torsions: - if reference_topology==None: + if reference_topology == None: raise ValueError("Need to specify topology in order to add extra torsions.") - self._determine_extra_torsions(modified_torsion_force, reference_topology, growth_indices) + self._determine_extra_torsions(modified_torsion_force, modified_stereochemistry_force, reference_topology, reference_state_key, growth_indices) if add_extra_angles: - if reference_topology==None: + if reference_topology == None: raise ValueError("Need to specify topology in order to add extra angles") self._determine_extra_angles(modified_angle_force, reference_topology, growth_indices) @@ -1485,7 +1532,7 @@ def get_modified_system(self): """ return self._growth_system - def _determine_extra_torsions(self, torsion_force, reference_topology, growth_indices): + def _determine_extra_torsions(self, torsion_force, stereochemistry_force, reference_topology, reference_state_key, growth_indices): """ Determine which atoms need an extra torsion. First figure out which residue is covered by the new atoms, then determine the rotatable bonds. Finally, construct @@ -1510,17 +1557,83 @@ def _determine_extra_torsions(self, torsion_force, reference_topology, growth_in if len(growth_indices) == 0: return torsion_force - import openmoltools.forcefield_generators as forcefield_generators + # Identify modified residue depending on the type of reference_topology (small molecule or polymer) atoms = list(reference_topology.atoms()) growth_indices = list(growth_indices) - #get residue from first atom - residue = atoms[growth_indices[0].idx].residue + residues = [res for res in reference_topology.residues() if res.modified_aa] # Polymer topology + if len(residues) > 1: + raise Exception("Please only modify one residue at a time. The residues you tried to modify are: ", + residues) + elif not residues: # Small molecule topology + residues = [atoms[growth_indices[0].idx].residue] + + # Interpret as HIE, HID templates as HIS + res_name = residues[0].name + if res_name in ['HIE', 'HID']: + res_name = 'HIS' + + # Create oemol from reference_state_key (contains stereochemistry) + if res_name not in self._aminos: + stereo_mol = utils.smiles_to_oemol(reference_state_key) + oechem.OEPerceiveChiral(stereo_mol) + + # Generate oemol using reference topology residue. Also get the omega geometry of the molecule. try: - oemol = FFAllAngleGeometryEngine._oemol_from_residue(residue) + oemol = FFAllAngleGeometryEngine.oemol_from_residue(residues[0]) except Exception as e: print("Could not generate an oemol from the residue.") print(e) + # Set atom stereochemistry + oechem.OEPerceiveChiral(oemol) + is_stereo_set = False + for atom in oemol.GetAtoms(): + neighbors = [nbr.GetName() for nbr in atom.GetAtoms()] + if atom.IsChiral() and len(neighbors) >= 4: # Exclude improper torsions for nitrogen chiral centers + if res_name in self._aminos: + calpha_neighbors = ['C', 'CB', 'HA', 'N'] + if set(neighbors) == set(calpha_neighbors): + if res_name == 'CYS': + oechem.OESetCIPStereo(oemol, atom, oechem.OECIPAtomStereo_R) + else: + oechem.OESetCIPStereo(oemol, atom, oechem.OECIPAtomStereo_S) + else: + if res_name == 'THR': + oechem.OESetCIPStereo(oemol, atom, oechem.OECIPAtomStereo_R) + elif res_name == 'ILE': + oechem.OESetCIPStereo(oemol, atom, oechem.OECIPAtomStereo_S) + is_stereo_set = True + else: # For small molecule, use stereo from stereo_mol + match_found = False + for stereo_atom in stereo_mol.GetAtoms(): + if stereo_atom.GetIdx() == atom.GetIdx(): + stereo = oechem.OEPerceiveCIPStereo(stereo_mol, stereo_atom) + oechem.OESetCIPStereo(oemol, atom, stereo) + match_found = True + break + if not match_found: + raise Exception("Error: Stereochemistry was not assigned to all chiral atoms from the smiles string.") + is_stereo_set = True + + # Set bond stereochemistry for small molecules + if res_name not in self._aminos: + for bond in oemol.GetBonds(): + if bond.GetOrder() == 2: + match_found = False + for stereo_bond in stereo_mol.GetBonds(): + if stereo_bond.GetIdx() == bond.GetIdx(): + stereo = oechem.OEPerceiveCIPStereo(stereo_mol, stereo_bond) + oechem.OESetCIPStereo(oemol, bond, stereo) + match_found = True + break + if not match_found: + raise Exception("Error: Stereochemistry was not assigned to all chiral atoms from the smiles string.") + + omega = oeomega.OEOmega() + omega.SetMaxConfs(1) + omega.SetStrictStereo(is_stereo_set) + omega(oemol) + # DEBUG: Write mol2 file. debug = True if debug: @@ -1532,51 +1645,95 @@ def _determine_extra_torsions(self, torsion_force, reference_topology, growth_in oemol_copy = oechem.OEMol(oemol) ofs = oechem.oemolostream(filename) oechem.OETriposAtomTypeNames(oemol_copy) - oechem.OEWriteMol2File(ofs, oemol_copy) # Preserve atom naming + oechem.OEWriteMol2File(ofs, oemol_copy) # Preserve atom naming ofs.close() - #get the omega geometry of the molecule: - omega = oeomega.OEOmega() - omega.SetMaxConfs(1) - omega.SetStrictStereo(False) #TODO: fix stereochem - omega(oemol) - - #get the list of torsions in the molecule that are not about a rotatable bond + # Get the list of torsions in the molecule that are not about a rotatable bond # Note that only torsions involving heavy atoms are enumerated here. rotor = oechem.OEIsRotor() torsion_predicate = oechem.OENotBond(rotor) non_rotor_torsions = list(oechem.OEGetTorsions(oemol, torsion_predicate)) relevant_torsion_list = self._select_torsions_without_h(non_rotor_torsions) - #now, for each torsion, extract the set of indices and the angle periodicity = 1 - k = 120.0*units.kilocalories_per_mole # stddev of 12 degrees - #print([atom.name for atom in growth_indices]) + k = 120.0 * units.kilocalories_per_mole # stddev of 12 degrees + # Now, for each proper torsion, extract the set of indices and the angle. Then, add it to torsion_force for torsion in relevant_torsion_list: - #make sure to get the atom index that corresponds to the topology - atom_indices = [torsion.a.GetData("topology_index"), torsion.b.GetData("topology_index"), torsion.c.GetData("topology_index"), torsion.d.GetData("topology_index")] - # Determine phase in [-pi,+pi) interval - #phase = (np.pi)*units.radians+angle - phase = torsion.radians + np.pi # TODO: Check that this is the correct convention? + # Make sure to get the atom index that corresponds to the topology + atom_indices = [torsion.a.GetData("topology_index"), torsion.b.GetData("topology_index"), + torsion.c.GetData("topology_index"), torsion.d.GetData("topology_index")] + # Determine phase in [-pi,+pi) interval + # phase = (np.pi)*units.radians+angle + phase = torsion.radians + np.pi # TODO: Check that this is the correct convention? while (phase >= np.pi): - phase -= 2*np.pi + phase -= 2 * np.pi while (phase < -np.pi): - phase += 2*np.pi + phase += 2 * np.pi phase *= units.radian - #print('PHASE>>>> ' + str(phase)) # DEBUG growth_idx = self._calculate_growth_idx(atom_indices, growth_indices) - atom_names = [torsion.a.GetName(), torsion.b.GetName(), torsion.c.GetName(), torsion.d.GetName()] - #print("Adding torsion with atoms %s and growth index %d" %(str(atom_names), growth_idx)) - #If this is a CustomTorsionForce, we need to pass the parameters as a list, and it will have the growth_idx parameter. - #If it's a regular PeriodicTorsionForce, there is no growth_index and the parameters are passed separately. + # If this is a CustomTorsionForce, we need to pass the parameters as a list, and it will have the growth_idx parameter. + # If it's a regular PeriodicTorsionForce, there is no growth_index and the parameters are passed separately. if isinstance(torsion_force, openmm.CustomTorsionForce): - torsion_force.addTorsion(atom_indices[0], atom_indices[1], atom_indices[2], atom_indices[3], [periodicity, phase, k, growth_idx]) + torsion_force.addTorsion(atom_indices[0], atom_indices[1], atom_indices[2], atom_indices[3], + [periodicity, phase, k, growth_idx]) elif isinstance(torsion_force, openmm.PeriodicTorsionForce): - torsion_force.addTorsion(atom_indices[0], atom_indices[1], atom_indices[2], atom_indices[3], periodicity, phase, k) + torsion_force.addTorsion(atom_indices[0], atom_indices[1], atom_indices[2], atom_indices[3], + periodicity, + phase, k) else: - raise ValueError("The force supplied to this method must be either a CustomTorsionForce or a PeriodicTorsionForce") - - return torsion_force + raise ValueError( + "The force supplied to this method must be either a CustomTorsionForce or a PeriodicTorsionForce") + + + # Now, add improper torsions + # coords : dict, key : index of atom in oemol (int), value : (x, y, z) coordinates of atom (3-element tuple) + coords = oemol.GetCoords() + sigma = 15 + for atom in oemol.GetAtoms(): + if atom.IsChiral(): + # Get neighbors + # neighbors_top : list(int) contains topology indices of the neighbors + # neighbors_oemol : list(int) contains oemol indices of the neighbors + # neighbors: list(OEAtomBase) contains oemol atoms representing the neighbors + neighbors_top = [] + neighbors_oemol = [] + neighbors = [] + for nbr in atom.GetAtoms(): + neighbors_top.append(nbr.GetData("topology_index")) + neighbors_oemol.append(nbr.GetIdx()) + neighbors.append(nbr) + if len(neighbors) >= 4: # Exclude improper torsions for nitrogen chiral centers + # Specify atom order for calculating angle + contains_H = False + for i, nbr in enumerate(neighbors): # Replace H (if it exists in neighbors) with chiral center + if nbr.GetAtomicNum() == oechem.OEElemNo_H: + neighbors_top[i] = atom.GetData("topology_index") + neighbors_oemol[i] = atom.GetIdx() + contains_H = True + break + if not contains_H: # Replace first neighbor with chiral center if H doesn't exist + neighbors_top[0] = atom.GetData("topology_index") + neighbors_oemol[0] = atom.GetIdx() + + # Calculate improper angles + phase = coordinate_numba.cartesian_to_internal(np.array(coords[neighbors_oemol[0]], dtype='float64'), + np.array(coords[neighbors_oemol[1]], dtype='float64'), + np.array(coords[neighbors_oemol[2]], dtype='float64'), + np.array(coords[neighbors_oemol[3]], + dtype='float64'))[2] + # Determine phase in [-pi,+pi) interval + # phase = (np.pi)*units.radians+angle + phase = phase + np.pi # TODO: Check that this is the correct convention? + while (phase >= np.pi): + phase -= 2 * np.pi + while (phase < -np.pi): + phase += 2 * np.pi + phase *= units.radian + + growth_idx = self._calculate_growth_idx(neighbors_top, growth_indices) + stereochemistry_force.addTorsion(neighbors_top[0], neighbors_top[1], neighbors_top[2], neighbors_top[3], + [phase, sigma, growth_idx]) + return torsion_force, stereochemistry_force def _select_torsions_without_h(self, torsion_list): """ @@ -1616,55 +1773,62 @@ def _determine_extra_angles(self, angle_force, reference_topology, growth_indice angle_force : simtk.openmm.CustomAngleForce The modified angle force """ - import itertools - if len(growth_indices)==0: + if len(growth_indices) == 0: return - angle_force_constant = 400.0*units.kilojoules_per_mole/units.radians**2 + angle_force_constant = 400.0 * units.kilojoules_per_mole / units.radians ** 2 + + # Identify modified residue(s) atoms = list(reference_topology.atoms()) growth_indices = list(growth_indices) - #get residue from first atom - residue = atoms[growth_indices[0].idx].residue try: - oemol = FFAllAngleGeometryEngine._oemol_from_residue(residue) - except Exception as e: - print("Could not generate an oemol from the residue.") - print(e) - - #get the omega geometry of the molecule: - omega = oeomega.OEOmega() - omega.SetMaxConfs(1) - omega.SetStrictStereo(False) #TODO: fix stereochem - omega(oemol) - - #we now have the residue as an oemol. Time to find the relevant angles. - #There's no equivalent to OEGetTorsions, so first find atoms that are relevant - #TODO: find out if that's really true - aromatic_pred = oechem.OEIsAromaticAtom() - heavy_pred = oechem.OEIsHeavy() - angle_criteria = oechem.OEAndAtom(aromatic_pred, heavy_pred) - - #get all heavy aromatic atoms: - #TODO: do this more efficiently - heavy_aromatics = list(oemol.GetAtoms(angle_criteria)) - for atom in heavy_aromatics: - #bonded_atoms = [bonded_atom for bonded_atom in list(atom.GetAtoms()) if bonded_atom in heavy_aromatics] - bonded_atoms = list(atom.GetAtoms()) - for angle_atoms in itertools.combinations(bonded_atoms, 2): + residues = [res for res in reference_topology.residues() if res.modified] + except AttributeError: + residues = [atoms[growth_indices[0].idx].residue] + + for residue in residues: + try: + oemol = FFAllAngleGeometryEngine.oemol_from_residue(residue) + except Exception as e: + print("Could not generate an oemol from the residue.") + print(e) + + # Get the omega geometry of the molecule: + omega = oeomega.OEOmega() + omega.SetMaxConfs(1) + omega.SetStrictStereo(False) # TODO: fix stereochem + omega(oemol) + + # We now have the residue as an oemol. Time to find the relevant angles. + # There's no equivalent to OEGetTorsions, so first find atoms that are relevant + # TODO: find out if that's really true + aromatic_pred = oechem.OEIsAromaticAtom() + heavy_pred = oechem.OEIsHeavy() + angle_criteria = oechem.OEAndAtom(aromatic_pred, heavy_pred) + + # Get all heavy aromatic atoms: + # TODO: do this more efficiently + heavy_aromatics = list(oemol.GetAtoms(angle_criteria)) + for atom in heavy_aromatics: + # bonded_atoms = [bonded_atom for bonded_atom in list(atom.GetAtoms()) if bonded_atom in heavy_aromatics] + bonded_atoms = list(atom.GetAtoms()) + for angle_atoms in itertools.combinations(bonded_atoms, 2): angle = oechem.OEGetAngle(oemol, angle_atoms[0], atom, angle_atoms[1]) - atom_indices = [angle_atoms[0].GetData("topology_index"), atom.GetData("topology_index"), angle_atoms[1].GetData("topology_index")] - angle_radians = angle*units.radian + atom_indices = [angle_atoms[0].GetData("topology_index"), atom.GetData("topology_index"), + angle_atoms[1].GetData("topology_index")] + angle_radians = angle * units.radian growth_idx = self._calculate_growth_idx(atom_indices, growth_indices) - #If this is a CustomAngleForce, we need to pass the parameters as a list, and it will have the growth_idx parameter. - #If it's a regular HarmonicAngleForce, there is no growth_index and the parameters are passed separately. + # If this is a CustomAngleForce, we need to pass the parameters as a list, and it will have the growth_idx parameter. + # If it's a regular HarmonicAngleForce, there is no growth_index and the parameters are passed separately. if isinstance(angle_force, openmm.CustomAngleForce): - angle_force.addAngle(atom_indices[0], atom_indices[1], atom_indices[2], [angle_radians, angle_force_constant, growth_idx]) + angle_force.addAngle(atom_indices[0], atom_indices[1], atom_indices[2], + [angle_radians, angle_force_constant, growth_idx]) elif isinstance(angle_force, openmm.HarmonicAngleForce): - angle_force.addAngle(atom_indices[0], atom_indices[1], atom_indices[2], angle_radians, angle_force_constant) + angle_force.addAngle(atom_indices[0], atom_indices[1], atom_indices[2], angle_radians, + angle_force_constant) else: raise ValueError("Angle force must be either CustomAngleForce or HarmonicAngleForce") return angle_force - def _calculate_growth_idx(self, particle_indices, growth_indices): """ Utility function to calculate the growth index of a particular force. @@ -1688,15 +1852,17 @@ def _calculate_growth_idx(self, particle_indices, growth_indices): new_atoms_in_force = particle_indices_set.intersection(growth_indices_set) if len(new_atoms_in_force) == 0: return 0 - new_atom_growth_order = [growth_indices_list.index(atom_idx)+1 for atom_idx in new_atoms_in_force] + new_atom_growth_order = [growth_indices_list.index(atom_idx) + 1 for atom_idx in new_atoms_in_force] return max(new_atom_growth_order) + class GeometrySystemGeneratorFast(GeometrySystemGenerator): """ Use updateParametersInContext to make energy evaluation fast. """ - def __init__(self, reference_system, growth_indices, parameter_name, add_extra_torsions=True, add_extra_angles=True, reference_topology=None, use_sterics=True, force_names=None, force_parameters=None, verbose=True): + def __init__(self, reference_system, growth_indices, parameter_name, add_extra_torsions=True, add_extra_angles=True, + reference_topology=None, reference_state_key=None, use_sterics=True, force_names=None, force_parameters=None, verbose=True): """ Parameters ---------- @@ -1718,13 +1884,17 @@ def __init__(self, reference_system, growth_indices, parameter_name, add_extra_t NB: We assume `reference_system` remains unmodified """ - self.sterics_cutoff_distance = 9.0 * units.angstroms # cutoff for sterics + self.sterics_cutoff_distance = 9.0 * units.angstroms # cutoff for sterics self.verbose = verbose + self._aminos = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', + 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] + # Get list of particle indices for new and old atoms. - self._new_particle_indices = [ atom.idx for atom in growth_indices ] - self._old_particle_indices = [idx for idx in range(reference_system.getNumParticles()) if idx not in self._new_particle_indices] + self._new_particle_indices = [atom.idx for atom in growth_indices] + self._old_particle_indices = [idx for idx in range(reference_system.getNumParticles()) if + idx not in self._new_particle_indices] self._growth_indices = growth_indices # Determine forces to keep @@ -1746,20 +1916,30 @@ def __init__(self, reference_system, growth_indices, parameter_name, add_extra_t # Create new system, copying forces we will keep. self._growth_system = copy.deepcopy(self._reference_system) - #Extract the forces from the system to use for adding auxiliary angles and torsions - reference_forces = {reference_system.getForce(index).__class__.__name__ : reference_system.getForce(index) for index in range(reference_system.getNumForces())} + # Extract the forces from the system to use for adding auxiliary angles and torsions + reference_forces = {reference_system.getForce(index).__class__.__name__: reference_system.getForce(index) for + index in range(reference_system.getNumForces())} # Ensure 'canonical form' of System has all parameters turned on, or else we'll run into nonbonded exceptions self.current_growth_index = -1 + self._growth_parameter_name = parameter_name self.set_growth_parameter_index(len(self._growth_indices)) + # Add stereochemistry force to growth system + modified_stereochemistry_force = openmm.CustomTorsionForce(self._VonMisesTorsionForceEnergy.format(parameter_name)) + modified_stereochemistry_force.addPerTorsionParameter("phase") + modified_stereochemistry_force.addPerTorsionParameter("sigma") + modified_stereochemistry_force.addPerTorsionParameter("growth_idx") + modified_stereochemistry_force.addGlobalParameter(parameter_name, self.current_growth_index) + self._growth_system.addForce(modified_stereochemistry_force) + # Add extra ring-closing torsions, if requested. if add_extra_torsions: - if reference_topology==None: + if reference_topology == None: raise ValueError("Need to specify topology in order to add extra torsions.") - self._determine_extra_torsions(reference_forces['PeriodicTorsionForce'], reference_topology, growth_indices) + self._determine_extra_torsions(reference_forces['PeriodicTorsionForce'], modified_stereochemistry_force, reference_topology, reference_state_key, growth_indices) if add_extra_angles: - if reference_topology==None: + if reference_topology == None: raise ValueError("Need to specify topology in order to add extra angles") self._determine_extra_angles(reference_forces['HarmonicAngleForce'], reference_topology, growth_indices) # TODO: Precompute growth indices for force terms for speed @@ -1804,19 +1984,22 @@ def set_growth_parameter_index(self, growth_index, context=None): parameters = reference_force.getExceptionParameters(exception_index) this_growth_index = self._calculate_growth_idx(parameters[:2], self._growth_indices) if (growth_index < this_growth_index): - parameters[2] *= 1.0e-6 # WORKAROUND // TODO: Change to zero when OpenMM issue is fixed - parameters[4] *= 1.0e-6 # WORKAROUND // TODO: Change to zero when OpenMM issue is fixed + parameters[2] *= 1.0e-6 # WORKAROUND // TODO: Change to zero when OpenMM issue is fixed + parameters[4] *= 1.0e-6 # WORKAROUND // TODO: Change to zero when OpenMM issue is fixed growth_force.setExceptionParameters(exception_index, *parameters) # Update parameters in context if context is not None: growth_force.updateParametersInContext(context) + context.setParameter(self._growth_parameter_name, growth_index) + class PredHBond(oechem.OEUnaryBondPred): """ Example elaborating usage on: https://docs.eyesopen.com/toolkits/python/oechemtk/predicates.html#section-predicates-match """ + def __call__(self, bond): atom1 = bond.GetBgn() atom2 = bond.GetEnd() @@ -1842,7 +2025,7 @@ class ProposalOrderTools(object): def __init__(self, topology_proposal, verbose=True): self._topology_proposal = topology_proposal - self.verbose = True # DEBUG + self.verbose = True # DEBUG def determine_proposal_order(self, direction='forward'): """ @@ -1861,32 +2044,38 @@ def determine_proposal_order(self, direction='forward'): logp_torsion_choice : float log probability of the chosen torsions """ - if direction=='forward': + if direction == 'forward': topology = self._topology_proposal.new_topology system = self._topology_proposal.new_system - structure = parmed.openmm.load_topology(self._topology_proposal.new_topology, self._topology_proposal.new_system) + structure = parmed.openmm.load_topology(self._topology_proposal.new_topology, + self._topology_proposal.new_system) unique_atoms = self._topology_proposal.unique_new_atoms - #atoms_with_positions = [structure.atoms[atom_idx] for atom_idx in range(self._topology_proposal.n_atoms_new) if atom_idx not in self._topology_proposal.unique_new_atoms] - atoms_with_positions = [structure.atoms[atom_idx] for atom_idx in self._topology_proposal.new_to_old_atom_map.keys()] - elif direction=='reverse': + # atoms_with_positions = [structure.atoms[atom_idx] for atom_idx in range(self._topology_proposal.n_atoms_new) if atom_idx not in self._topology_proposal.unique_new_atoms] + old_structure = parmed.openmm.load_topology(self._topology_proposal.old_topology, + self._topology_proposal.old_system) + atoms_with_positions = [structure.atoms[atom_idx] for atom_idx in + self._topology_proposal.new_to_old_atom_map.keys()] + elif direction == 'reverse': topology = self._topology_proposal.old_topology system = self._topology_proposal.old_system - structure = parmed.openmm.load_topology(self._topology_proposal.old_topology, self._topology_proposal.old_system) + structure = parmed.openmm.load_topology(self._topology_proposal.old_topology, + self._topology_proposal.old_system) unique_atoms = self._topology_proposal.unique_old_atoms - atoms_with_positions = [structure.atoms[atom_idx] for atom_idx in self._topology_proposal.old_to_new_atom_map.keys()] + atoms_with_positions = [structure.atoms[atom_idx] for atom_idx in + self._topology_proposal.old_to_new_atom_map.keys()] else: raise ValueError("direction parameter must be either forward or reverse.") # Determine list of atoms to be added. - new_hydrogen_atoms = [ structure.atoms[idx] for idx in unique_atoms if structure.atoms[idx].atomic_number == 1 ] - new_heavy_atoms = [ structure.atoms[idx] for idx in unique_atoms if structure.atoms[idx].atomic_number != 1 ] + new_hydrogen_atoms = [structure.atoms[idx] for idx in unique_atoms if structure.atoms[idx].atomic_number == 1] + new_heavy_atoms = [structure.atoms[idx] for idx in unique_atoms if structure.atoms[idx].atomic_number != 1] # DEBUG - #print('STRUCTURE') - #print(structure) - #for atom in structure.atoms: + # print('STRUCTURE') + # print(structure) + # for atom in structure.atoms: # print(atom, atom.bonds, atom.angles, atom.dihedrals) - #print('') + # print('') def add_atoms(new_atoms, atoms_torsions): """ @@ -1907,20 +2096,21 @@ def add_atoms(new_atoms, atoms_torsions): """ from scipy import special logp_torsion_choice = 0.0 - while(len(new_atoms))>0: + while (len(new_atoms)) > 0: eligible_atoms = self._atoms_eligible_for_proposal(new_atoms, atoms_with_positions) - #randomize positions + # randomize positions eligible_atoms_in_order = np.random.choice(eligible_atoms, size=len(eligible_atoms), replace=False) - #the logp of this choice is log(1/n!) - #gamma is (n-1)!, log-gamma is more numerically stable. - logp_torsion_choice += -special.gammaln(len(eligible_atoms)+1) + # the logp of this choice is log(1/n!) + # gamma is (n-1)!, log-gamma is more numerically stable. + logp_torsion_choice += -special.gammaln(len(eligible_atoms) + 1) if (len(new_atoms) > 0) and (len(eligible_atoms) == 0): - raise Exception('new_atoms (%s) has remaining atoms to place, but eligible_atoms is empty.' % str(new_atoms)) + raise Exception( + 'new_atoms (%s) has remaining atoms to place, but eligible_atoms is empty.' % str(new_atoms)) - #choose the torsions + # choose the torsions for atom in eligible_atoms_in_order: chosen_torsion, logp_choice = self._choose_torsion(atoms_with_positions, atom) atoms_torsions[atom] = chosen_torsion @@ -1938,7 +2128,6 @@ def add_atoms(new_atoms, atoms_torsions): return atoms_torsions, logp_torsion_choice - def _atoms_eligible_for_proposal(self, new_atoms, atoms_with_positions): """ Get the set of atoms currently eligible for proposal @@ -1952,11 +2141,11 @@ def _atoms_eligible_for_proposal(self, new_atoms, atoms_with_positions): """ eligible_atoms = [] for atom in new_atoms: - #get all topological torsions for the appropriate atom + # get all topological torsions for the appropriate atom torsions = self._get_topological_torsions(atoms_with_positions, atom) - #go through the topological torsions (atom1 is always the new atom), and if one of them has - #atoms 2, 3, 4 in atoms_with_positions, the atom is eligible. + # go through the topological torsions (atom1 is always the new atom), and if one of them has + # atoms 2, 3, 4 in atoms_with_positions, the atom is eligible. for torsion in torsions: if torsion.atom2 in atoms_with_positions and torsion.atom3 in atoms_with_positions and torsion.atom4 in atoms_with_positions: eligible_atoms.append(atom) @@ -1985,7 +2174,7 @@ def _choose_torsion(self, atoms_with_positions, atom_for_proposal): raise NoTorsionError("No eligible torsions found for placing atom %s." % str(atom_for_proposal)) torsion_idx = np.random.randint(0, len(eligible_torsions)) torsion_selected = eligible_torsions[torsion_idx] - return torsion_selected, np.log(1.0/len(eligible_torsions)) + return torsion_selected, np.log(1.0 / len(eligible_torsions)) def _get_topological_torsions(self, atoms_with_positions, new_atom): """ @@ -2007,15 +2196,15 @@ def _get_topological_torsions(self, atoms_with_positions, new_atom): topological_torsions = list() atom1 = new_atom for bond12 in atom1.bonds: - atom2 = bond12.atom2 if bond12.atom1==atom1 else bond12.atom1 + atom2 = bond12.atom2 if bond12.atom1 == atom1 else bond12.atom1 if atom2 not in atoms_with_positions: continue for bond23 in atom2.bonds: - atom3 = bond23.atom2 if bond23.atom1==atom2 else bond23.atom1 + atom3 = bond23.atom2 if bond23.atom1 == atom2 else bond23.atom1 if (atom3 not in atoms_with_positions) or (atom3 in set([atom1, atom2])): continue for bond34 in atom3.bonds: - atom4 = bond34.atom2 if bond34.atom1==atom3 else bond34.atom1 + atom4 = bond34.atom2 if bond34.atom1 == atom3 else bond34.atom1 if (atom4 not in atoms_with_positions) or (atom4 in set([atom1, atom2, atom3])): continue topological_torsions.append((atom1, atom2, atom3, atom4)) @@ -2033,7 +2222,8 @@ def _get_topological_torsions(self, atoms_with_positions, new_atom): _logger.debug(new_atom.dihedrals) # Recode topological torsions as parmed Dihedral objects - topological_torsions = [ parmed.Dihedral(atoms[0], atoms[1], atoms[2], atoms[3]) for atoms in topological_torsions ] + topological_torsions = [parmed.Dihedral(atoms[0], atoms[1], atoms[2], atoms[3]) for atoms in + topological_torsions] return topological_torsions diff --git a/perses/rjmc/topology_proposal.py b/perses/rjmc/topology_proposal.py index 6f989979a..775335146 100644 --- a/perses/rjmc/topology_proposal.py +++ b/perses/rjmc/topology_proposal.py @@ -5,6 +5,7 @@ import simtk.openmm as openmm import simtk.openmm.app as app from collections import namedtuple +import mdtraj as md import copy import warnings import logging @@ -20,15 +21,17 @@ import openeye.oegraphsim as oegraphsim from perses.rjmc.geometry import FFAllAngleGeometryEngine from perses.storage import NetCDFStorageView +from perses.tests import utils try: from StringIO import StringIO except ImportError: from io import StringIO -import openmoltools +from openmoltools import forcefield_generators import base64 import logging import time import progressbar +import math from typing import List, Dict try: from subprocess import getoutput # If python 3 @@ -39,7 +42,7 @@ # CONSTANTS ################################################################################ -OESMILES_OPTIONS = oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_ISOMERIC | oechem.OESMILESFlag_Hydrogens +OESMILES_OPTIONS = oechem.OESMILESFlag_ISOMERIC | oechem.OESMILESFlag_Hydrogens #DEFAULT_ATOM_EXPRESSION = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_Hybridization #| oechem.OEExprOpts_EqAromatic | oechem.OEExprOpts_EqHalogen | oechem.OEExprOpts_RingMember | oechem.OEExprOpts_EqCAliphaticONS #DEFAULT_BOND_EXPRESSION = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_RingMember @@ -70,21 +73,24 @@ def append_topology(destination_topology, source_topology, exclude_residue_name= If specified, any residues matching this name are excluded. """ - newAtoms = {} + if exclude_residue_name is None: + exclude_residue_name = " " # something with 3 characters that is never a residue name + new_atoms = {} for chain in source_topology.chains(): - newChain = destination_topology.addChain(chain.id) + new_chain = destination_topology.addChain(chain.id) for residue in chain.residues(): - if (residue.name == exclude_residue_name): + if (residue.name[:3] == exclude_residue_name[:3]): continue - newResidue = destination_topology.addResidue(residue.name, newChain, residue.id) + new_residue = destination_topology.addResidue(residue.name, new_chain, residue.id) for atom in residue.atoms(): - newAtom = destination_topology.addAtom(atom.name, atom.element, newResidue, atom.id) - newAtoms[atom] = newAtom + new_atom = destination_topology.addAtom(atom.name, atom.element, new_residue, atom.id) + new_atoms[atom] = new_atom for bond in source_topology.bonds(): - if (bond[0].residue.name==exclude_residue_name) or (bond[1].residue.name==exclude_residue_name): + if (bond[0].residue.name[:3] == exclude_residue_name[:3]) or (bond[1].residue.name[:3] == exclude_residue_name[:3]): continue - # TODO: Preserve bond order info using extended OpenMM API - destination_topology.addBond(newAtoms[bond[0]], newAtoms[bond[1]]) + order = bond.order + destination_topology.addBond(new_atoms[bond[0]], new_atoms[bond[1]], order=order) + def deepcopy_topology(source_topology): """ @@ -763,8 +769,8 @@ def __init__(self, self._old_chemical_state_key = old_chemical_state_key self._new_to_old_atom_map = new_to_old_atom_map self._old_to_new_atom_map = {old_atom : new_atom for new_atom, old_atom in new_to_old_atom_map.items()} - self._unique_new_atoms = list(set(range(self._new_topology._numAtoms))-set(self._new_to_old_atom_map.keys())) - self._unique_old_atoms = list(set(range(self._old_topology._numAtoms))-set(self._new_to_old_atom_map.values())) + self._unique_new_atoms = list(set(range(self._new_topology.getNumAtoms()))-set(self._new_to_old_atom_map.keys())) + self._unique_old_atoms = list(set(range(self._old_topology.getNumAtoms()))-set(self._new_to_old_atom_map.values())) self._old_alchemical_atoms = set(old_alchemical_atoms) if (old_alchemical_atoms is not None) else {atom for atom in range(old_system.getNumParticles())} self._new_alchemical_atoms = set(self._old_to_new_atom_map.values()).union(self._unique_new_atoms) self._old_environment_atoms = set(range(old_system.getNumParticles())) - self._old_alchemical_atoms @@ -894,9 +900,12 @@ def chemical_state_list(self): raise NotImplementedError("This ProposalEngine does not expose a list of possible chemical states.") class PolymerProposalEngine(ProposalEngine): - def __init__(self, system_generator, chain_id, proposal_metadata=None, verbose=False, always_change=True): + def __init__(self, system_generator, chain_id, proposal_metadata=None, verbose=False, always_change=True, aggregate=False): super(PolymerProposalEngine,self).__init__(system_generator, proposal_metadata=proposal_metadata, verbose=verbose, always_change=always_change) self._chain_id = chain_id + self._aminos = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', + 'SER', 'THR', 'TRP', 'TYR', 'VAL'] # Note this does not include PRO since there's a problem with OpenMM's template DEBUG + self._aggregate = aggregate def propose(self, current_system, current_topology, current_metadata=None): """ @@ -920,13 +929,9 @@ def propose(self, current_system, current_topology, current_metadata=None): old_topology = app.Topology() append_topology(old_topology, current_topology) - # new_topology : simtk.openmm.app.Topology - new_topology = app.Topology() - append_topology(new_topology, current_topology) - # Check that old_topology and old_system have same number of atoms. old_system = current_system - old_topology_natoms = sum([1 for atom in old_topology.atoms()]) # number of topology atoms + old_topology_natoms = old_topology.getNumAtoms() # number of topology atoms old_system_natoms = old_system.getNumParticles() if old_topology_natoms != old_system_natoms: msg = 'PolymerProposalEngine: old_topology has %d atoms, while old_system has %d atoms' % (old_topology_natoms, old_system_natoms) @@ -934,67 +939,84 @@ def propose(self, current_system, current_topology, current_metadata=None): # metadata : dict, key = 'chain_id' , value : str metadata = current_metadata - if metadata == None: + if metadata is None: metadata = dict() + # old_chemical_state_key : str old_chemical_state_key = self.compute_state_key(old_topology) - # chain_id : str - chain_id = self._chain_id - # save old indices for mapping -- could just directly save positions instead + # index_to_new_residues : dict, key : int (index) , value : str (three letter name of proposed residue) + index_to_new_residues, metadata = self._choose_mutant(old_topology, metadata) - # EDITING new_topology - # atom : simtk.openmm.app.topology.Atom - for atom in new_topology.atoms(): - # atom.old_index : int - atom.old_index = atom.index - - index_to_new_residues, metadata = self._choose_mutant(new_topology, metadata) - # residue_map : list(tuples : simtk.openmm.app.topology.Residue (existing residue), str (three letter residue name of proposed residue)) - residue_map = self._generate_residue_map(new_topology, index_to_new_residues) + # residue_map : list(tuples : simtk.openmm.app.topology.Residue (existing residue), str (three letter name of proposed residue)) + residue_map = self._generate_residue_map(old_topology, index_to_new_residues) for (res, new_name) in residue_map: if res.name == new_name: del(index_to_new_residues[res.index]) if len(index_to_new_residues) == 0: atom_map = dict() - for atom in new_topology.atoms(): + for atom in old_topology.atoms(): atom_map[atom.index] = atom.index if self.verbose: print('PolymerProposalEngine: No changes to topology proposed, returning old system and topology') topology_proposal = TopologyProposal(new_topology=old_topology, new_system=old_system, old_topology=old_topology, old_system=old_system, old_chemical_state_key=old_chemical_state_key, new_chemical_state_key=old_chemical_state_key, logp_proposal=0.0, new_to_old_atom_map=atom_map) return topology_proposal + elif len(index_to_new_residues) > 1: + raise Exception("Attempting to mutate more than one residue at once: ", index_to_new_residues, " The geometry engine cannot handle this.") + # Add modified_aa property to residues in old topology + for res in old_topology.residues(): + res.modified_aa = True if res.index in index_to_new_residues.keys() else False - # new_topology : simtk.openmm.app.Topology extra atoms from old residue have been deleted, missing atoms in new residue not yet added + # Identify differences between old topology and proposed changes + # excess_atoms : list(simtk.openmm.app.topology.Atom) atoms from existing residue not in new residue + # excess_bonds : list(tuple (simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom)) bonds from existing residue not in new residue # missing_atoms : dict, key : simtk.openmm.app.topology.Residue, value : list(simtk.openmm.app.topology._TemplateAtomData) - new_topology, missing_atoms = self._delete_excess_atoms(new_topology, residue_map) - # new_topology : simtk.openmm.app.Topology new residue has all correct atoms for desired mutation - new_topology = self._add_new_atoms(new_topology, missing_atoms, residue_map) + # missing_bonds : list(tuple (simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom)) bonds from new residue not in existing residue + excess_atoms, excess_bonds, missing_atoms, missing_bonds = self._identify_differences(old_topology, residue_map) - atom_map = self._construct_atom_map(residue_map, old_topology, index_to_new_residues, new_topology) + # Delete excess atoms and bonds from old topology + excess_atoms_bonds = excess_atoms + excess_bonds + new_topology = self._delete_atoms(old_topology, excess_atoms_bonds) + + # Add missing atoms and bonds to new topology + new_topology = self._add_new_atoms(new_topology, missing_atoms, missing_bonds, residue_map) + # index_to_new_residues : dict, key : int (index) , value : str (three letter name of proposed residue) + atom_map = self._construct_atom_map(residue_map, old_topology, index_to_new_residues, new_topology) # new_chemical_state_key : str new_chemical_state_key = self.compute_state_key(new_topology) # new_system : simtk.openmm.System new_system = self._system_generator.build_system(new_topology) + # Adjust logp_propose based on HIS presence + his_residues = ['HID', 'HIE'] + old_residue = residue_map[0][0] + proposed_residue = residue_map[0][1] + if old_residue.name in his_residues and proposed_residue not in his_residues: + logp_propose = math.log(2) + elif old_residue.name not in his_residues and proposed_residue in his_residues: + logp_propose = math.log(0.5) + else: + logp_propose = 0.0 + # Create TopologyProposal. - topology_proposal = TopologyProposal(new_topology=new_topology, new_system=new_system, old_topology=old_topology, old_system=old_system, old_chemical_state_key=old_chemical_state_key, new_chemical_state_key=new_chemical_state_key, logp_proposal=0.0, new_to_old_atom_map=atom_map) + topology_proposal = TopologyProposal(new_topology=new_topology, new_system=new_system, old_topology=old_topology, old_system=old_system, old_chemical_state_key=old_chemical_state_key, new_chemical_state_key=new_chemical_state_key, logp_proposal=logp_propose, new_to_old_atom_map=atom_map) # Check that old_topology and old_system have same number of atoms. -# old_system = current_system - old_topology_natoms = sum([1 for atom in old_topology.atoms()]) # number of topology atoms + old_topology_natoms = old_topology.getNumAtoms() # number of topology atoms old_system_natoms = old_system.getNumParticles() if old_topology_natoms != old_system_natoms: msg = 'PolymerProposalEngine: old_topology has %d atoms, while old_system has %d atoms' % (old_topology_natoms, old_system_natoms) raise Exception(msg) # Check that new_topology and new_system have same number of atoms. - new_topology_natoms = sum([1 for atom in new_topology.atoms()]) # number of topology atoms + new_topology_natoms = new_topology.getNumAtoms() # number of topology atoms new_system_natoms = new_system.getNumParticles() if new_topology_natoms != new_system_natoms: msg = 'PolymerProposalEngine: new_topology has %d atoms, while new_system has %d atoms' % (new_topology_natoms, new_system_natoms) raise Exception(msg) - # Check to make sure no out-of-bounds atoms are present in new_to_old_atom_map + + # Check to make sure no out-of-bounds atoms are present in new_to_old_atom_map natoms_old = topology_proposal.old_system.getNumParticles() natoms_new = topology_proposal.new_system.getNumParticles() if not set(topology_proposal.new_to_old_atom_map.values()).issubset(range(natoms_old)): @@ -1002,7 +1024,7 @@ def propose(self, current_system, current_topology, current_metadata=None): msg += str(topology_proposal.new_to_old_atom_map) raise Exception(msg) if not set(topology_proposal.new_to_old_atom_map.keys()).issubset(range(natoms_new)): - msg = "Some new atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_new + msg = "Some new atoms in TopologyProposal.new_to_old_atom_map are not in span of new atoms (1..%d):\n" % natoms_new msg += str(topology_proposal.new_to_old_atom_map) raise Exception(msg) @@ -1021,141 +1043,168 @@ def _generate_residue_map(self, topology, index_to_new_residues): topology : simtk.openmm.app.Topology index_to_new_residues : dict key : int (index, zero-indexed in chain) - value : str (three letter residue name) + value : str (three letter name of proposed residue) Returns ------- residue_map : list(tuples) - simtk.openmm.app.topology.Residue (existing residue), str (three letter residue name of proposed residue) + simtk.openmm.app.topology.Residue (existing residue), str (three letter name of proposed residue) """ - # residue_map : list(tuples : simtk.openmm.app.topology.Residue (existing residue), str (three letter residue name of proposed residue)) + # residue_map : list(tuples : simtk.openmm.app.topology.Residue (existing residue), str (three letter name of proposed residue)) # r : simtk.openmm.app.topology.Residue, r.index : int, 0-indexed residue_map = [(r, index_to_new_residues[r.index]) for r in topology.residues() if r.index in index_to_new_residues] return residue_map - def _delete_excess_atoms(self, topology, residue_map): + def _identify_differences(self, topology, residue_map): + """ - delete excess atoms from old residues and identify new atoms for new residues + Identify excess atoms, excess bonds, missing atoms, and missing bonds. Arguments --------- topology : simtk.openmm.app.Topology residue_map : list(tuples) - simtk.openmm.app.topology.Residue (existing residue), str (three letter residue name of proposed residue) + simtk.openmm.app.topology.Residue (existing residue), str (three letter name of proposed residue) Returns ------- - topology : simtk.openmm.app.Topology - extra atoms from old residue have been deleted, missing atoms in new residue not yet added + excess_atoms : list(simtk.openmm.app.topology.Atom) + atoms from existing residue not in new residue + excess_bonds : list(tuple (simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom)) + bonds from existing residue not in new residue missing_atoms : dict key : simtk.openmm.app.topology.Residue value : list(simtk.openmm.app.topology._TemplateAtomData) + missing_bonds : list(tuple (simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom)) + bonds from new residue not in existing residue """ - # delete excess atoms from old residues and identify new atoms for new residues - # delete_atoms : list(simtk.openmm.app.topology.Atom) atoms from existing residue not in new residue - delete_atoms = list() + + # excess_atoms : list(simtk.openmm.app.topology.Atom) atoms from existing residue not in new residue + excess_atoms = list() + # excess_bonds : list(tuple (simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom)) bonds from existing residue not in new residue + excess_bonds = list() # missing_atoms : dict, key : simtk.openmm.app.topology.Residue, value : list(simtk.openmm.app.topology._TemplateAtomData) missing_atoms = dict() + # missing_bonds : list(tuple (simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom)) bonds from new residue not in existing residue + missing_bonds = list() + # residue : simtk.openmm.app.topology.Residue (existing residue) - # replace_with : str (three letter residue name of proposed residue) + # replace_with : str (three letter name of proposed residue) for k, (residue, replace_with) in enumerate(residue_map): - # chain_residues : list(simtk.openmm.app.topology.Residue) all residues in chain ==> why - chain_residues = list(residue.chain.residues()) - if residue == chain_residues[0]: - replace_with = 'N'+replace_with - residue_map[k] = (residue, replace_with) - if residue == chain_residues[-1]: - replace_with = 'C'+replace_with - residue_map[k] = (residue, replace_with) - # template : simtk.openmm.app.topology._TemplateData + + # Load residue template for residue to replace with template = self._templates[replace_with] - # standard_atoms : set of unique atom names within new residue : str - standard_atoms = set(atom.name for atom in template.atoms) + + # template_atom_names : dict, key : template atom index, value : template atom name + template_atom_names = {} + for atom in template.atoms: + template_atom_names[template.getAtomIndexByName(atom.name)] = atom.name # template_atoms : list(simtk.openmm.app.topology._TemplateAtomData) atoms in new residue template_atoms = list(template.atoms) - # atom_names : set of unique atom names within existing residue : str - atom_names = set(atom.name for atom in residue.atoms()) + # template_bonds : dict, key : simtk.openmm.app.topology.Atom, value : str template atom name + template_bonds = [(template_atom_names[bond[0]], template_atom_names[bond[1]]) for bond in template.bonds] + # old_atom_names : set of unique atom names within existing residue : str + old_atom_names = set(atom.name for atom in residue.atoms()) + + # Make a list of atoms in the existing residue that are not in the new residue # atom : simtk.openmm.app.topology.Atom in existing residue - for atom in residue.atoms(): # shouldn't remove hydrogen - if atom.name not in standard_atoms: - delete_atoms.append(atom) -# if residue == chain_residues[0]: # this doesn't apply? -# template_atoms = [atom for atom in template_atoms if atom.name not in ('P', 'OP1', 'OP2')] + for atom in residue.atoms(): + if atom.name not in template_atom_names.values(): + excess_atoms.append(atom) + + # if residue == chain_residues[0]: # this doesn't apply? + # template_atoms = [atom for atom in template_atoms if atom.name not in ('P', 'OP1', 'OP2')] + + # Make a list of atoms in the new residue that are not in the existing residue # missing : list(simtk.openmm.app.topology._TemplateAtomData) atoms in new residue not found in existing residue missing = list() # atom : simtk.openmm.app.topology._TemplateAtomData atoms in new residue for atom in template_atoms: - if atom.name not in atom_names: + if atom.name not in old_atom_names: missing.append(atom) - # BUG : error if missing = 0? + if len(missing) > 0: missing_atoms[residue] = missing - # topology : simtk.openmm.app.Topology extra atoms from old residue have been deleted, missing atoms in new residue not yet added - topology = self._to_delete(topology, delete_atoms) - topology = self._to_delete_bonds(topology, residue_map) - topology._numAtoms = len(list(topology.atoms())) + else: + missing_atoms[residue] = [] - return(topology, missing_atoms) + # Make a dictionary to map atom names in old residue to atom object + # old_atom_map : dict, key : str (atom name) , value : simtk.openmm.app.topology.Atom + old_atom_map = dict() + # atom : simtk.openmm.app.topology.Atom + for atom in residue.atoms(): + # atom.name : str + old_atom_map[atom.name] = atom + + # Make a dictionary to map atom names in new residue to atom object + # new_atom_map : dict, key : str (atom name) , value : simtk.openmm.app.topology.Atom + new_atom_map = dict() + # atom : simtk.openmm.app.topology.Atom + for atom in template_atoms: + # atom.name : str + new_atom_map[atom.name] = atom - def _to_delete(self, topology, delete_atoms): + # Make a list of bonds already existing in new residue + # old_bonds : list(tuple(str (atom name), str (atom name))) bonds between atoms both within old residue + old_bonds = list() + # bond : tuple(simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom) + for bond in topology.bonds(): + if bond[0].residue == residue and bond[1].residue == residue: + old_bonds.append((bond[0].name, bond[1].name)) + + # Add any bonds that exist in old residue but not in template to excess_bonds + for bond in old_bonds: + if bond not in template_bonds and (bond[1], bond[0]) not in template_bonds: + excess_bonds.append((old_atom_map[bond[0]], old_atom_map[bond[1]])) + # Add any bonds that exist in template but not in old residue to missing_bonds + for bond in template_bonds: + if bond not in old_bonds and (bond[1], bond[0]) not in old_bonds: + missing_bonds.append((new_atom_map[bond[0]], new_atom_map[bond[1]])) + + return(excess_atoms, excess_bonds, missing_atoms, missing_bonds) + + def _delete_atoms(self, topology, to_delete): """ - remove instances of atoms and corresponding bonds from topology + Delete excess atoms (and corresponding bonds) from specified topology - Arguments - --------- - topology : simtk.openmm.app.Topology - delete_atoms : list(simtk.openmm.app.topology.Atom) + Arguments + --------- + topology : simtk.openmm.app.Topology + excess_atoms : list(simtk.openmm.app.topology.Atom) atoms from existing residue not in new residue - Returns - ------- - topology : simtk.openmm.app.Topology - extra atoms from old residue have been deleted, missing atoms in new residue not yet added - """ - # delete_atoms : list(simtk.openmm.app.topology.Atom) atoms from existing residue not in new residue - # atom : simtk.openmm.app.topology.Atom - for atom in delete_atoms: - atom.residue._atoms.remove(atom) - for bond in topology._bonds: - if atom in bond: - topology._bonds = list(filter(lambda a: a != bond, topology._bonds)) - # topology : simtk.openmm.app.Topology extra atoms from old residue have been deleted, missing atoms in new residue not yet added - return topology - - def _to_delete_bonds(self, topology, residue_map): - """ - Remove any bonds between atoms in both new and old residue that do not belong in new residue - (e.g. breaking the ring in PRO) - Arguments - --------- - topology : simtk.openmm.app.Topology - residue_map : list(tuples) - simtk.openmm.app.topology.Residue (existing residue), str (three letter residue name of proposed residue) - Returns - ------- - topology : simtk.openmm.app.Topology - extra atoms and bonds from old residue have been deleted, missing atoms in new residue not yet added - """ + excess_bonds : list(tuple (simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom)) + bonds from old residue not in new residue - for residue, replace_with in residue_map: - # template : simtk.openmm.app.topology._TemplateData - template = self._templates[replace_with] + Returns + ------- + topology : simtk.openmm.app.Topology + extra atoms and bonds from old residue have been deleted, missing atoms and bottoms in new residue not yet added + """ - old_res_bonds = list() - # bond : tuple(simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom) - for atom1, atom2 in topology._bonds: - if atom1.residue == residue or atom2.residue == residue: - old_res_bonds.append((atom1.name, atom2.name)) - # make a list of bonds that should exist in new residue - # template_bonds : list(tuple(str (atom name), str (atom name))) bonds in template - template_bonds = [(template.atoms[bond[0]].name, template.atoms[bond[1]].name) for bond in template.bonds] - # add any bonds that exist in template but not in new residue - for bond in old_res_bonds: - if bond not in template_bonds and (bond[1],bond[0]) not in template_bonds: - topology._bonds = list(filter(lambda a: a != bond, topology._bonds)) - topology._bonds = list(filter(lambda a: a != (bond[1],bond[0]), topology._bonds)) - return topology - - def _add_new_atoms(self, topology, missing_atoms, residue_map): - """ - add new atoms (and corresponding bonds) to new residues + new_topology = app.Topology() + new_topology.setPeriodicBoxVectors(topology.getPeriodicBoxVectors()) + + # new_atoms : dict, key : simtk.openmm.app.topology.Atom, value : simtk.openmm.app.topology.Atom maps old atoms to the corresponding Atom in the new residue + new_atoms = {} + delete_set = set(to_delete) + + for chain in topology.chains(): + if chain not in delete_set: + new_chain = new_topology.addChain(chain.id) + for residue in chain.residues(): + new_residue = new_topology.addResidue(residue.name, new_chain, residue.id) + for atom in residue.atoms(): + if atom not in delete_set: + new_atom = new_topology.addAtom(atom.name, atom.element, new_residue, atom.id) + new_atom.old_index = atom.index + new_atoms[atom] = new_atom + for bond in topology.bonds(): + if bond[0] in new_atoms and bond[1] in new_atoms: + if bond not in delete_set and (bond[1], bond[0]) not in delete_set: + new_topology.addBond(new_atoms[bond[0]], new_atoms[bond[1]]) + return new_topology + + def _add_new_atoms(self, topology, missing_atoms, missing_bonds, residue_map): + """ + Add new atoms (and corresponding bonds) to new residues Arguments --------- @@ -1164,78 +1213,92 @@ def _add_new_atoms(self, topology, missing_atoms, residue_map): missing_atoms : dict key : simtk.openmm.app.topology.Residue value : list(simtk.openmm.app.topology._TemplateAtomData) + missing_bonds : list(tuple (simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom)) + bonds from new residue not in existing residue residue_map : list(tuples) simtk.openmm.app.topology.Residue, str (three letter residue name of new residue) Returns ------- topology : simtk.openmm.app.Topology - new residue has all correct atoms for desired mutation + new residues have all correct atoms and bonds for desired mutation """ - # add new atoms to new residues - # topology : simtk.openmm.app.Topology extra atoms from old residue have been deleted, missing atoms in new residue not yet added - # missing_atoms : dict, key : simtk.openmm.app.topology.Residue, value : list(simtk.openmm.app.topology._TemplateAtomData) - # residue_map : list(tuples : simtk.openmm.app.topology.Residue (old residue), str (three letter residue name of new residue)) - # new_atoms : list(simtk.openmm.app.topology.Atom) atoms that have been added to new residue - new_atoms = list() - # k : int - # residue_ent : tuple(simtk.openmm.app.topology.Residue (old residue), str (three letter residue name of new residue)) - for k, residue_ent in enumerate(residue_map): - # residue : simtk.openmm.app.topology.Residue (old residue) BUG : wasn't this editing the residue in place what is old and new map - residue = residue_ent[0] - # replace_with : str (three letter residue name of new residue) - replace_with = residue_ent[1] - # directly edit the simtk.openmm.app.topology.Residue instance - residue.name = replace_with - # load template to compare bonds - # template : simtk.openmm.app.topology._TemplateData - template = self._templates[replace_with] - # add each missing atom - # atom : simtk.openmm.app.topology._TemplateAtomData - try: - for atom in missing_atoms[residue]: + new_topology = app.Topology() + new_topology.setPeriodicBoxVectors(topology.getPeriodicBoxVectors()) + # new_atoms : dict, key : simtk.openmm.app.topology.Atom, value : simtk.openmm.app.topology.Atom maps old atoms to the corresponding Atom in the new residue + new_atoms = {} + # new_atom_names : dict, key : str new atom name, value : simtk.openmm.app.topology.Atom maps name of new atom to the corresponding Atom in the new residue + new_atom_names = {} + # old_residues : list(simtk.openmm.app.topology.Residue) + old_residues = [old.index for old, new in residue_map] + for chain in topology.chains(): + new_chain = new_topology.addChain(chain.id) + for residue in chain.residues(): + new_residue = new_topology.addResidue(residue.name, new_chain, residue.id) + # Add modified property to residues in new topology + new_residue.modified_aa = True if residue.index in old_residues else False + # Copy over atoms from old residue to new residue + for atom in residue.atoms(): # new_atom : simtk.openmm.app.topology.Atom - new_atom = topology.addAtom(atom.name, atom.element, residue) - # new_atoms : list(simtk.openmm.app.topology.Atom) - new_atoms.append(new_atom) - except KeyError: - pass - # make a dictionary to map atom names in new residue to atom object - # new_res_atoms : dict, key : str (atom name) , value : simtk.openmm.app.topology.Atom - new_res_atoms = dict() - # atom : simtk.openmm.app.topology.Atom - for atom in residue.atoms(): - # atom.name : str - new_res_atoms[atom.name] = atom - # make a list of bonds already existing in new residue - # new_res_bonds : list(tuple(str (atom name), str (atom name))) bonds between atoms both within new residue - new_res_bonds = list() - # bond : tuple(simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom) - for bond in topology._bonds: - if bond[0].residue == residue and bond[1].residue == residue: - new_res_bonds.append((bond[0].name, bond[1].name)) - # make a list of bonds that should exist in new residue - # template_bonds : list(tuple(str (atom name), str (atom name))) bonds in template - template_bonds = [(template.atoms[bond[0]].name, template.atoms[bond[1]].name) for bond in template.bonds] - # add any bonds that exist in template but not in new residue - for bond in new_res_bonds: - if bond not in template_bonds and (bond[1],bond[0]) not in template_bonds: - bonded_0 = new_res_atoms[bond[0]] - bonded_1 = new_res_atoms[bond[1]] - topology._bonds.remove((bonded_0, bonded_1)) - for bond in template_bonds: - if bond not in new_res_bonds and (bond[1],bond[0]) not in new_res_bonds: - # new_bonded_0 : simtk.openmm.app.topology.Atom - new_bonded_0 = new_res_atoms[bond[0]] - # new_bonded_1 : simtk.openmm.app.topology.Atom - new_bonded_1 = new_res_atoms[bond[1]] - topology.addBond(new_bonded_0, new_bonded_1) - topology._numAtoms = len(list(topology.atoms())) + new_atom = new_topology.addAtom(atom.name, atom.element, new_residue) + new_atom.old_index = atom.old_index + new_atoms[atom] = new_atom + new_atom_names[new_atom.name] = new_atom + # Check if old residue is in residue_map + # old_residue : simtk.openmm.app.topology.Residue (old residue) + # new_residue_name : str (three letter residue name of new residue) + for i, (old_residue, new_residue_name) in enumerate(residue_map): + if self._is_residue_equal(residue, old_residue): + # Add missing atoms to new residue + # atom : simtk.openmm.app.topology._TemplateAtomData + for atom in missing_atoms[old_residue]: + new_atom = new_topology.addAtom(atom.name, atom.element, new_residue) + new_atoms[atom] = new_atom + new_atom_names[new_atom.name] = new_atom + new_residue.name = residue_map[i][1] + + # Copy over bonds from topology to new topology + for bond in topology.bonds(): + new_topology.addBond(new_atoms[bond[0]], new_atoms[bond[1]]) + + for bond in missing_bonds: + new_topology.addBond(new_atom_names[bond[0].name], new_atom_names[bond[1].name]) + + return new_topology + + def _is_residue_equal(self, residue, other_residue): + """ + Check if residue is equal to other_residue based on their names, indices, ids, and chain ids. - # add new bonds to the new residues - return topology + Arguments + --------- + residue : simtk.openmm.app.topology.Residue + other_residue : simtk.openmm.app.topology.Residue + + Returns + ------- + boolean True if residues are equal, otherwise False + """ + return residue.name == other_residue.name and residue.index == other_residue.index and residue.chain.id == other_residue.chain.id and residue.id == other_residue.id def _construct_atom_map(self, residue_map, old_topology, index_to_new_residues, new_topology): + """ + Construct atom map (key: index to new residue, value: index to old residue) to supply as an argument to the TopologyProposal. + + Arguments + --------- + residue_map : list(tuples) + simtk.openmm.app.topology.Residue, str (three letter residue name of new residue) + old_topology : simtk.openmm.app.Topology + index_to_new_residues : dict, key : int (index) , value : str (three letter name of proposed residue) + new_topology : simtk.openmm.app.Topology + + Returns + ------- + atom_map : dict, key: int (index + new residues have all correct atoms and bonds for desired mutation + """ + # atom_map : dict, key : int (index of atom in old topology) , value : int (index of same atom in new topology) atom_map = dict() @@ -1263,35 +1326,59 @@ def match_backbone(old_residue, new_residue, atom_name): assert found_new_atom return new_atom.index, old_atom.index + # old_to_new_residues : dict, key : str old residue name, key : simtk.openmm.app.topology.Residue new residue + old_to_new_residues = {} + for old_residue in old_topology.residues(): + for new_residue in new_topology.residues(): + if old_residue.index == new_residue.index: + old_to_new_residues[old_residue.name] = new_residue + break + + # modified_residues : dict, key : index of old residue, value : proposed residue modified_residues = dict() - old_residues = dict() for map_entry in residue_map: - modified_residues[map_entry[0].index] = map_entry[0] + old_residue = map_entry[0] + modified_residues[old_residue.index] = old_to_new_residues[old_residue.name] + + # old_residues : dict, key : index of old residue, value : old residue + old_residues = dict() for residue in old_topology.residues(): if residue.index in index_to_new_residues.keys(): old_residues[residue.index] = residue - for k, atom in enumerate(new_topology.atoms()): - atom.index=k + + # Create initial atom map for atoms in new topology that are not part of modified residues + for atom in new_topology.atoms(): if atom.residue in modified_residues.values(): continue try: atom_map[atom.index] = atom.old_index except AttributeError: pass + + # Update atom map with atom mappings for residues that have been modified for index in index_to_new_residues.keys(): - old_oemol_res = FFAllAngleGeometryEngine._oemol_from_residue(old_residues[index]) - new_oemol_res = FFAllAngleGeometryEngine._oemol_from_residue(modified_residues[index]) - _ , local_atom_map = self._get_mol_atom_matches(old_oemol_res, new_oemol_res) + old_res = old_residues[index] + new_res = modified_residues[index] + + # Save index of first atom in old residue and new residue + for atom in old_res.atoms(): + first_atom_index_old = atom.index + break + for atom in new_res.atoms(): + first_atom_index_new = atom.index + break + old_oemol_res = FFAllAngleGeometryEngine.oemol_from_residue(old_res) + new_oemol_res = FFAllAngleGeometryEngine.oemol_from_residue(new_res) + # local_atom_map : dict, key : index of atom in new residue, value : index of atom in old residue. + local_atom_map = self._get_mol_atom_matches(old_oemol_res, new_oemol_res, first_atom_index_old, first_atom_index_new) for backbone_name in ['CA','N']: new_index, old_index = match_backbone(old_residues[index], modified_residues[index], backbone_name) local_atom_map[new_index] = old_index - atom_map.update(local_atom_map) - return atom_map - def _get_mol_atom_matches(self, current_molecule, proposed_molecule): + def _get_mol_atom_matches(self, current_molecule, proposed_molecule, first_atom_index_old, first_atom_index_new): """ Given two molecules, returns the mapping of atoms between them. @@ -1301,16 +1388,27 @@ def _get_mol_atom_matches(self, current_molecule, proposed_molecule): The current molecule in the sampler proposed_molecule : openeye.oechem.oemol object The proposed new molecule + first_atom_index_old : int + The index of the first atom in the old resiude/current molecule + first_atom_index_new : int + The index of the first atom in the new residue/proposed molecule + + Note: Since FFAllAngleGeometryEngine.oemol_from_residue creates a new topology for the specified residue, + the atom indices in the output oemol (i.e. current_molecule and proposed_molecule) are reset to start at 0. + Therefore, first_atom_index_old and first_atom_index_new are used to correct the indices such that they match + the atom indices of the original old and new residues. Returns ------- - matches : list of match - list of the matches between the molecules + new_to_old_atom_map : dict, key : index of atom in new residue, value : index of atom in old residue + """ + # Load current and proposed residues as OEGraphMol objects oegraphmol_current = oechem.OEGraphMol(current_molecule) oegraphmol_proposed = oechem.OEGraphMol(proposed_molecule) - mcs = oechem.OEMCSSearch(oechem.OEMCSType_Exhaustive) + # Instantiate Maximum Common Substructures (MCS) object and intialize properties + mcs = oechem.OEMCSSearch(oechem.OEMCSType_Exhaustive) atomexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_RingMember | oechem.OEExprOpts_Degree | oechem.OEExprOpts_AtomicNumber bondexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_RingMember mcs.Init(oegraphmol_current, atomexpr, bondexpr) @@ -1325,19 +1423,20 @@ def forcibly_matched(mcs, proposed, atom_name): new_atom = new_atom[0] return old_atom, new_atom - force_matches = list() + # Forcibly match C and O atoms in proposed residue to atoms in current residue for matched_atom_name in ['C','O']: - force_matches.append(oechem.OEHasAtomName(matched_atom_name)) - - for force_match in force_matches: + force_match = oechem.OEHasAtomName(matched_atom_name) old_atom, new_atom = forcibly_matched(mcs, oegraphmol_proposed, force_match) this_match = oechem.OEMatchPairAtom(old_atom, new_atom) assert mcs.AddConstraint(this_match) + # Generate matches using MCS mcs.SetMCSFunc(oechem.OEMCSMaxBondsCompleteCycles()) unique = True matches = [m for m in mcs.Match(oegraphmol_proposed, unique)] - if len(matches)==0: + + # Handle case where there are no matches + if len(matches) == 0: from perses.tests.utils import describe_oemol msg = 'No matches found in _get_mol_atom_matches.\n' msg += '\n' @@ -1347,24 +1446,30 @@ def forcibly_matched(mcs, proposed, atom_name): msg += 'oegraphmol_proposed:\n' msg += describe_oemol(oegraphmol_proposed) raise Exception(msg) + + # Select match and generate atom map match = np.random.choice(matches) new_to_old_atom_map = {} - for matchpair in match.GetAtoms(): - old_index = matchpair.pattern.GetData("topology_index") - new_index = matchpair.target.GetData("topology_index") - if old_index < 0 or new_index < 0: + for match_pair in match.GetAtoms(): + if match_pair.pattern.GetAtomicNum() == 1 and match_pair.target.GetAtomicNum() == 1: # Do not map hydrogens + continue + O2_index_current = current_molecule.NumAtoms() - 2 + O2_index_proposed = proposed_molecule.NumAtoms() -2 + if 'O2' in match_pair.pattern.GetName() and 'O2' in match_pair.target.GetName() and match_pair.pattern.GetIdx() == O2_index_current and match_pair.target.GetIdx() == O2_index_proposed: # Do not map O2 if its second to last index in atom (this O2 was added to oemol to complete the residue) continue + old_index = match_pair.pattern.GetData("topology_index") + new_index = match_pair.target.GetData("topology_index") new_to_old_atom_map[new_index] = old_index - return matches, new_to_old_atom_map + return new_to_old_atom_map def compute_state_key(self, topology): for chain in topology.chains(): if chain.id == self._chain_id: break chemical_state_key = '' - for (index, res) in enumerate(chain._residues): - if (index > 0): + for index, res in enumerate(chain.residues()): + if index > 0: chemical_state_key += '-' chemical_state_key += res.name @@ -1381,68 +1486,70 @@ class PointMutationEngine(PolymerProposalEngine): (using the first chain with the id, if there are multiple) proposal_metadata : dict -- OPTIONAL Contains information necessary to initialize proposal engine - max_point_mutants : int -- OPTIONAL - default = None residues_allowed_to_mutate : list(str) -- OPTIONAL default = None - allowed_mutations : list(list(tuple)) -- OPTIONAL + Contains residue ids + If not specified, engine assumes all residues (except ACE and NME caps) may be mutated. + allowed_mutations : list(tuple) -- OPTIONAL default = None ('residue id to mutate','desired mutant residue name (3-letter code)') Example: Desired systems are wild type T4 lysozyme, T4 lysozyme L99A, and T4 lysozyme L99A/M102Q allowed_mutations = [ - [('99','ALA')], - [('99','ALA'),('102','GLN')] + ('99','ALA'), + ('102','GLN') ] + If this is not specified, the engine will propose a random amino acid at a random location. always_change : Boolean -- OPTIONAL, default True - Have the proposal engine always propose another mutation - If allowed_mutations is not specified, always_change will require ALL - point mutations to be different - The proposal engine will choose number of locations specified by - max_point_mutants, and will require all of those residues to change - eg: if old topology included L99A and M102Q, the new proposal cannot - include L99A OR M102Q - (This is only relevant in cases where max_point_mutants > 1) + Have the proposal engine always propose a state different from the current state. + If the current state is WT, always propose a mutation. + If the current state is mutant, always propose a different mutant or WT. + aggregate : Boolean -- OPTIONAL + default = False + Have the proposal engine aggregate mutations. + If aggregate is set to False, the engine will undo mutants in the current topology such that the next proposal + if the WT state + If aggregate is set to True, the engine will not undo the mutants from the current topology, thereby allowing + each proposal to contain multiple mutations. """ - def __init__(self, wildtype_topology, system_generator, chain_id, proposal_metadata=None, max_point_mutants=None, residues_allowed_to_mutate=None, allowed_mutations=None, verbose=False, always_change=True): - super(PointMutationEngine,self).__init__(system_generator, chain_id, proposal_metadata=proposal_metadata, verbose=verbose, always_change=always_change) + def __init__(self, wildtype_topology, system_generator, chain_id, proposal_metadata=None, residues_allowed_to_mutate=None, allowed_mutations=None, verbose=False, always_change=True, aggregate=False): + super(PointMutationEngine,self).__init__(system_generator, chain_id, proposal_metadata=proposal_metadata, verbose=verbose, always_change=always_change, aggregate=aggregate) + + # Convert wildtype_topology to openmm + wildtype_topology = md.Topology.from_openmm(wildtype_topology) + wildtype_topology = wildtype_topology.to_openmm() # Check that provided topology has specified chain. - chain_ids_in_topology = [ chain.id for chain in wildtype_topology.chains() ] + chain_ids_in_topology = [chain.id for chain in wildtype_topology.chains()] if chain_id not in chain_ids_in_topology: raise Exception("Specified chain_id '%s' not found in provided wildtype_topology. Choices are: %s" % (chain_id, str(chain_ids_in_topology))) self._wildtype = wildtype_topology - self._max_point_mutants = max_point_mutants self._ff = system_generator.forcefield self._templates = self._ff._templates self._residues_allowed_to_mutate = residues_allowed_to_mutate - if allowed_mutations is not None: - for mutation in allowed_mutations: - mutation.sort() self._allowed_mutations = allowed_mutations if proposal_metadata is None: proposal_metadata = dict() self._metadata = proposal_metadata - if max_point_mutants is None and allowed_mutations is None: - raise Exception("Must specify either max_point_mutants or allowed_mutations.") - if max_point_mutants is not None and allowed_mutations is not None: - warnings.warn("PointMutationEngine: max_point_mutants and allowed_mutations were both specified -- max_point_mutants will be ignored") def _choose_mutant(self, topology, metadata): chain_id = self._chain_id - old_key = self.compute_state_key(topology) + old_key = self._compute_mutant_key(topology, chain_id) index_to_new_residues = self._undo_old_mutants(topology, chain_id, old_key) - if self._allowed_mutations is not None: - allowed_mutations = self._allowed_mutations - index_to_new_residues = self._choose_mutation_from_allowed(topology, chain_id, allowed_mutations, index_to_new_residues, old_key) - else: - # index_to_new_residues : dict, key : int (index) , value : str (three letter residue name) - index_to_new_residues = self._propose_mutations(topology, chain_id, index_to_new_residues, old_key) + if index_to_new_residues and not self._aggregate: # Starting state is mutant and mutations should not be aggregated + pass # At this point, index_to_new_residues contains mutations that need to be undone. This iteration will result in WT, + else: # Starting state is WT or starting state is mutant and mutations should be aggregated + index_to_new_residues = dict() + if self._allowed_mutations is not None: + allowed_mutations = self._allowed_mutations + index_to_new_residues = self._choose_mutation_from_allowed(topology, chain_id, allowed_mutations, index_to_new_residues, old_key) + else: + # index_to_new_residues : dict, key : int (index) , value : str (three letter residue name) + index_to_new_residues = self._propose_mutation(topology, chain_id, index_to_new_residues, old_key) # metadata['mutations'] : list(str (three letter WT residue name - index - three letter MUT residue name) ) metadata['mutations'] = self._save_mutations(topology, index_to_new_residues) - return index_to_new_residues, metadata def _undo_old_mutants(self, topology, chain_id, old_key): @@ -1452,11 +1559,10 @@ def _undo_old_mutants(self, topology, chain_id, old_key): for chain in topology.chains(): if chain.id == chain_id: break - residue_id_to_index = {residue.id : residue.index for residue in chain._residues} + residue_id_to_index = {residue.id : residue.index for residue in chain.residues()} for mutant in old_key.split('-'): old_res = mutant[:3] residue_id = mutant[3:-3] - new_res = mutant[-3:] index_to_new_residues[residue_id_to_index[residue_id]] = old_res return index_to_new_residues @@ -1469,9 +1575,14 @@ def _choose_mutation_from_allowed(self, topology, chain_id, allowed_mutations, i --------- topology : simtk.openmm.app.Topology chain_id : str - allowed_mutations : list(list(tuple)) - list of allowed mutant states; each entry in the list is a list because multiple mutations may be desired - tuple : (str, str) -- residue id and three-letter amino acid code of desired mutant + allowed_mutations : list(tuple) + list of allowed mutations -- each mutation is a tuple of the residue id and three-letter amino acid code of desired mutant + index_to_new_residues : dict + key : int (index, zero-indexed in chain) + value : str (three letter residue name) + contains information to mutate back to WT as starting point for new mutants + old_key : str + chemical_state_key for old topology Returns ------- @@ -1480,64 +1591,102 @@ def _choose_mutation_from_allowed(self, topology, chain_id, allowed_mutations, i value : str (three letter residue name) """ - # chain : simtk.openmm.app.topology.Chain + # Set chain and create id-index mapping for residues in chain chain_found = False for anychain in topology.chains(): if anychain.id == chain_id: + # chain : simtk.openmm.app.topology.Chain chain = anychain chain_found = True break if not chain_found: - chains = [ chain.id for chain in topology.chains() ] + chains = [chain.id for chain in topology.chains()] raise Exception("Chain '%s' not found in Topology. Chains present are: %s" % (chain_id, str(chains))) - residue_id_to_index = {residue.id : residue.index for residue in chain._residues} - # location_prob : np.array, probability value for each residue location (uniform) + residue_id_to_index = {residue.id : residue.index for residue in chain.residues()} + + # Define location probabilities and propose a location/mutant state if self._always_change: - location_prob = np.array([1.0/len(allowed_mutations) for i in range(len(allowed_mutations)+1)]) + # location_prob : np.array, probability value for each mutant state at their respective locations in allowed_mutations (uniform). if old_key == 'WT': - location_prob[len(allowed_mutations)] = 0.0 + location_prob = [1.0 / len(allowed_mutations)] * len(allowed_mutations) + proposed_location = np.random.choice(range(len(allowed_mutations)), p=location_prob) else: - current_mutation = list() + current_mutations = [] for mutant in old_key.split('-'): residue_id = mutant[3:-3] new_res = mutant[-3:] - current_mutation.append((residue_id,new_res)) - current_mutation.sort() - location_prob[allowed_mutations.index(current_mutation)] = 0.0 - else: - location_prob = np.array([1.0/(len(allowed_mutations)+1.0) for i in range(len(allowed_mutations)+1)]) - proposed_location = np.random.choice(range(len(allowed_mutations)+1), p=location_prob) - if proposed_location == len(allowed_mutations): - # choose WT - pass + current_mutations.append((residue_id, new_res)) + + new_mutations = [] + for mutation in allowed_mutations: + if mutation not in current_mutations: + new_mutations.append(mutation) + if not new_mutations: + raise Exception("The old topology state contains all allowed mutations (%s). Please specify additional mutations." % allowed_mutations[0]) + + location_prob = [1.0 / (len(new_mutations))] * len(new_mutations) + proposed_location = np.random.choice(range(len(new_mutations)), p=location_prob) + else: - for residue_id, residue_name in allowed_mutations[proposed_location]: - # original_residue : simtk.openmm.app.topology.Residue - original_residue = chain._residues[residue_id_to_index[residue_id]] - if original_residue.name in ['HID','HIE']: - original_residue.name = 'HIS' - if original_residue.name == residue_name: - continue - # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name) - index_to_new_residues[residue_id_to_index[residue_id]] = residue_name - if residue_name == 'HIS': - his_state = ['HIE','HID'] - his_prob = np.array([0.5 for i in range(len(his_state))]) - his_choice = np.random.choice(range(len(his_state)),p=his_prob) - index_to_new_residues[residue_id_to_index[residue_id]] = his_state[his_choice] - # DEBUG - if self.verbose: print('Proposed mutation: %s %s %s' % (original_residue.name, residue_id, residue_name)) + if old_key == 'WT': + location_prob = [1.0 / (len(allowed_mutations)+1.0)] * (len(allowed_mutations)+1) + proposed_location = np.random.choice(range(len(allowed_mutations) + 1), p=location_prob) + else: + location_prob = [1.0 / len(allowed_mutations)] * len(allowed_mutations) + proposed_location = np.random.choice(range(len(allowed_mutations)), p=location_prob) + + # If the proposed state is the same as the current state + # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name) + if old_key == 'WT' and proposed_location == len(allowed_mutations): + # Choose WT + return index_to_new_residues + elif old_key != 'WT': + for mutant in old_key.split('-'): + residue_id = mutant[3:-3] + new_res = mutant[-3:] + if allowed_mutations.index((residue_id, new_res)) == proposed_location: + return index_to_new_residues + + residue_id = allowed_mutations[proposed_location][0] + residue_name = allowed_mutations[proposed_location][1] + # Verify residue with mutation exists in old topology + # original_residue : simtk.openmm.app.topology.Residue + original_residue = '' + for res in chain.residues(): + if res.index == residue_id_to_index[residue_id]: + original_residue = res + break + if not original_residue: + raise Exception("User-specified an allowed mutation at residue %s , but that residue does not exist" % residue_id) + + # Check if mutated residue's name is same as residue's name in old topology + if original_residue.name in ['HID', 'HIE']: + original_residue.name = 'HIS' + if original_residue.name == residue_name: + return index_to_new_residues + # Save proposed mutation to index_to_new_residues # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name) + index_to_new_residues[residue_id_to_index[residue_id]] = residue_name + + # Randomly choose HIS template ('HIS' does not exist as a template) + if residue_name == 'HIS': + his_state = ['HIE','HID'] + his_prob = [1/len(his_state)] * len(his_state) + his_choice = np.random.choice(range(len(his_state)), p=his_prob) + index_to_new_residues[residue_id_to_index[residue_id]] = his_state[his_choice] + print(index_to_new_residues) ## IVY return index_to_new_residues - def _propose_mutations(self, topology, chain_id, index_to_new_residues, old_key): + def _propose_mutation(self, topology, chain_id, index_to_new_residues): """ Arguments --------- topology : simtk.openmm.app.Topology chain_id : str index_to_new_residues : dict + key : int (index, zero-indexed in chain) + value : str (three letter residue name) contains information to mutate back to WT as starting point for new mutants old_key : str chemical_state_key for old topology @@ -1548,65 +1697,76 @@ def _propose_mutations(self, topology, chain_id, index_to_new_residues, old_key) key : int (index, zero-indexed in chain) value : str (three letter residue name) """ - # this shouldn't be here - aminos = ['ALA','ARG','ASN','ASP','CYS','GLN','GLU','GLY','HIS','ILE','LEU','LYS','MET','PHE','PRO','SER','THR','TRP','TYR','VAL'] + + # Set chain and create id-index mapping for residues in chain # chain : simtk.openmm.app.topology.Chain chain_found = False for anychain in topology.chains(): if anychain.id == chain_id: chain = anychain + chain_found = True + residue_id_to_index = {residue.id: residue.index for residue in chain.residues()} if self._residues_allowed_to_mutate is None: + chain_residues = [res for res in chain.residues() if res.name != 'ACE' and res.name != 'NME'] # num_residues : int - num_residues = len(chain._residues) - chain_residues = chain._residues - chain_found = True - residue_id_to_index = {residue.id : residue.index for residue in chain._residues} + num_residues = len(chain_residues) + else: + for res_id in self._residues_allowed_to_mutate: + if res_id not in residue_id_to_index.keys(): + raise Exception( + "Residue id '%s' not found in Topology. Residue ids present are: %s. " + "\n\t Note: The type of the residue id must be 'str'" % (res_id, str(residue_id_to_index.keys()))) + num_residues = len(self._residues_allowed_to_mutate) + chain_residues = self._mutable_residues(chain) break if not chain_found: - chains = [ chain.id for chain in topology.chains() ] + chains = [chain.id for chain in topology.chains()] raise Exception("Chain '%s' not found in Topology. Chains present are: %s" % (chain_id, str(chains))) - if self._residues_allowed_to_mutate is not None: - num_residues = len(self._residues_allowed_to_mutate) - chain_residues = self._mutable_residues(chain) + + # Define location probabilities # location_prob : np.array, probability value for each residue location (uniform) - location_prob = np.array([1.0/num_residues for i in range(num_residues)]) + location_prob = [1.0/num_residues] * num_residues + + # Propose a location at which to mutate the residue + # proposed_location : int, index of chosen entry in location_prob + proposed_location = np.random.choice(range(num_residues), p=location_prob) + + # Rename residue to HIS if it uses one of the HIS-derived templates + # original_residue : simtk.openmm.app.topology.Residue + original_residue = chain_residues[proposed_location] + if original_residue.name in ['HIE', 'HID']: + original_residue.name = 'HIS' + + if self._residues_allowed_to_mutate is None: + proposed_location = original_residue.index + else: + proposed_location = residue_id_to_index[self._residues_allowed_to_mutate[proposed_location]] + + # Define probabilities for amino acid options and choose one + # amino_prob : np.array, probability value for each amino acid option (uniform) + aminos = self._aminos if self._always_change: - residue_id_to_index = {residue.id : residue.index for residue in chain._residues} - current_mutation = dict() - if old_key != 'WT': - for mutant in old_key.split('-'): - residue_id = mutant[3:-3] - new_res = mutant[-3:] - current_mutation[residue_id] = new_res + amino_prob = [1.0/(len(aminos)-1)] * len(aminos) + amino_prob[aminos.index(original_residue.name)] = 0.0 + else: + amino_prob = [1.0/len(aminos)] * len(aminos) + # proposed_amino_index : int, index of three letter residue name in aminos list + proposed_amino_index = np.random.choice(range(len(aminos)), p=amino_prob) - for i in range(self._max_point_mutants): - # proposed_location : int, index of chosen entry in location_prob - proposed_location = np.random.choice(range(num_residues), p=location_prob) - # original_residue : simtk.openmm.app.topology.Residue - original_residue = chain_residues[proposed_location] - if original_residue.name in ['HIE','HID']: - original_residue.name = 'HIS' - # amino_prob : np.array, probability value for each amino acid option (uniform) - if self._always_change: - amino_prob = np.array([1.0/(len(aminos)-1) for l in range(len(aminos))]) - amino_prob[aminos.index(original_residue.name)] = 0.0 - else: - amino_prob = np.array([1.0/(len(aminos)) for k in range(len(aminos))]) - # proposed_amino_index : int, index of three letter residue name in aminos list - proposed_amino_index = np.random.choice(range(len(aminos)), p=amino_prob) - # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name) - if self._residues_allowed_to_mutate is not None: - proposed_location = residue_id_to_index[self._residues_allowed_to_mutate[proposed_location]] - index_to_new_residues[proposed_location] = aminos[proposed_amino_index] - if aminos[proposed_amino_index] == 'HIS': - his_state = ['HIE','HID'] - his_prob = np.array([0.5 for j in range(len(his_state))]) - his_choice = np.random.choice(range(len(his_state)),p=his_prob) - index_to_new_residues[proposed_location] = his_state[his_choice] + # Save proposed mutation to index_to_new_residues + # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name) + index_to_new_residues[proposed_location] = aminos[proposed_amino_index] + + # Randomly choose HIS template ('HIS' does not exist as a template) + if aminos[proposed_amino_index] == 'HIS': + his_state = ['HIE','HID'] + his_prob = [1 / len(his_state)] * len(his_state) + his_choice = np.random.choice(range(len(his_state)), p=his_prob) + index_to_new_residues[proposed_location] = his_state[his_choice] return index_to_new_residues def _mutable_residues(self, chain): - chain_residues = [residue for residue in chain._residues if residue.id in self._residues_allowed_to_mutate] + chain_residues = [residue for residue in chain.residues() if residue.id in self._residues_allowed_to_mutate] return chain_residues def _save_mutations(self, topology, index_to_new_residues): @@ -1627,25 +1787,26 @@ def _save_mutations(self, topology, index_to_new_residues): """ return [r.name+'-'+str(r.id)+'-'+index_to_new_residues[r.index] for r in topology.residues() if r.index in index_to_new_residues] - def compute_state_key(self, topology): - chemical_state_key = '' + def _compute_mutant_key(self, topology, chain_id): + mutant_key = '' + chain = '' wildtype = self._wildtype for anychain in topology.chains(): - if anychain.id == self._chain_id: + if anychain.id == chain_id: chain = anychain break for anywt_chain in wildtype.chains(): - if anywt_chain.id == self._chain_id: + if anywt_chain.id == chain_id: wt_chain = anywt_chain break - for wt_res, res in zip(wt_chain._residues, chain._residues): + for wt_res, res in zip(wt_chain.residues(), chain.residues()): if wt_res.name != res.name: - if chemical_state_key: - chemical_state_key+='-' - chemical_state_key+= str(wt_res.name)+str(res.id)+str(res.name) - if not chemical_state_key: - chemical_state_key = 'WT' - return chemical_state_key + if mutant_key: + mutant_key+='-' + mutant_key += str(wt_res.name)+str(res.id)+str(res.name) + if not mutant_key: + mutant_key = 'WT' + return mutant_key class PeptideLibraryEngine(PolymerProposalEngine): """ @@ -1871,13 +2032,15 @@ class SmallMoleculeSetProposalEngine(ProposalEngine): SystemGenerator initialized with the appropriate forcefields residue_name : str The name that will be used for small molecule residues in the topology + smiles : str + If specified, will use this smiles string (instead of generating it from the topology) to find match in molecule set proposal_metadata : dict metadata for the proposal engine storage : NetCDFStorageView, optional, default=None If specified, write statistics to this storage """ - def __init__(self, list_of_smiles, system_generator, residue_name='MOL', + def __init__(self, list_of_smiles, system_generator, residue_name='MOL', smiles=None, atom_expr=None, bond_expr=None, proposal_metadata=None, storage=None, always_change=True, atom_map=None): @@ -1887,7 +2050,7 @@ def __init__(self, list_of_smiles, system_generator, residue_name='MOL', self._allow_ring_breaking = True # allow ring breaking # Canonicalize all SMILES strings - self._smiles_list = [SmallMoleculeSetProposalEngine.canonicalize_smiles(smiles) for smiles in set(list_of_smiles)] + self._smiles_list = [SmallMoleculeSetProposalEngine.canonicalize_smiles(smiles) for smiles in set(list_of_smiles)] ## IVY self._n_molecules = len(self._smiles_list) @@ -1928,11 +2091,15 @@ def propose(self, current_system, current_topology, current_mol=None, proposed_m proposal : TopologyProposal object topology proposal object """ + # Determine SMILES string for current small molecule if current_mol is None: - current_mol_smiles, current_mol = self._topology_to_smiles(current_topology) + if current_metadata is None or current_metadata['smiles'] is None: + current_mol_smiles, current_mol = self._topology_to_smiles(current_topology) + else: + current_mol_smiles = current_metadata['smiles'] # Read smiles directly (instead of using topology_to_smiles) because otherwise stereochemistry is lost + current_mol = utils.smiles_to_oemol(current_mol_smiles) else: - # TODO: Make sure we're using canonical mol to smiles conversion current_mol_smiles = oechem.OEMolToSmiles(current_mol) # Remove the small molecule from the current Topology object @@ -1948,12 +2115,21 @@ def propose(self, current_system, current_topology, current_mol=None, proposed_m if proposed_mol is None: proposed_mol_smiles, proposed_mol, logp_proposal = self._propose_molecule(current_system, current_topology, current_mol_smiles) else: - # TODO: Make sure we're using canonical mol to smiles conversion - proposed_mol_smiles = oechem.OEMolToSmiles(current_mol) + proposed_mol_smiles = oechem.OEMolToSmiles(proposed_mol) logp_proposal = 0.0 + print("proposed smiles: ", proposed_mol_smiles) ## IVY # Build the new Topology object, including the proposed molecule new_topology = self._build_new_topology(current_receptor_topology, proposed_mol) + + # Add modified property to residues in the old topology + for res in current_topology.residues(): + res.modified_aa = False + + # Add modified_aa property to residues in the new topology + for res in new_topology.residues(): + res.modified_aa = False + new_mol_start_index, len_new_mol = self._find_mol_start_index(new_topology) # Generate an OpenMM System from the proposed Topology @@ -1981,6 +2157,8 @@ def propose(self, current_system, current_topology, current_mol=None, proposed_m old_idx = i adjusted_atom_map[i] = old_idx + utils.render_atom_mapping("atom_mapping.pdf", current_mol, proposed_mol, adjusted_atom_map) ## IVY + # Create the TopologyProposal onbject proposal = TopologyProposal(logp_proposal=logp_proposal, new_to_old_atom_map=adjusted_atom_map, old_topology=current_topology, new_topology=new_topology, @@ -2012,8 +2190,8 @@ def canonicalize_smiles(smiles): mol = oechem.OEMol() oechem.OESmilesToMol(mol, smiles) oechem.OEAddExplicitHydrogens(mol) - iso_can_smiles = oechem.OECreateSmiString(mol, OESMILES_OPTIONS) - return iso_can_smiles + new_smiles = oechem.OECreateSmiString(mol, OESMILES_OPTIONS) + return new_smiles def _topology_to_smiles(self, topology): """ @@ -2033,14 +2211,15 @@ def _topology_to_smiles(self, topology): molecule """ molecule_name = self._residue_name - matching_molecules = [res for res in topology.residues() if res.name==molecule_name] + + # matching_molecules = [res for res in topology.residues() if res.name == molecule_name] ## IVY # Find residue in topology by searching for residues with name "MOL" + matching_molecules = [res for res in topology.residues() if res.name[:3] == molecule_name[:3]] # Find residue in topology by searching for residues with name "MOL" if len(matching_molecules) != 1: raise ValueError("More than one residue with the same name!") mol_res = matching_molecules[0] oemol = forcefield_generators.generateOEMolFromTopologyResidue(mol_res) smiles_string = oechem.OECreateSmiString(oemol, OESMILES_OPTIONS) - final_smiles_string = smiles_string - return final_smiles_string, oemol + return smiles_string, oemol def compute_state_key(self, topology): """ @@ -2079,7 +2258,8 @@ def _find_mol_start_index(self, topology): the number of atoms in the molecule """ resname = self._residue_name - mol_residues = [res for res in topology.residues() if res.name==resname] + # mol_residues = [res for res in topology.residues() if res.name == resname] ## IVY # Find the residue by searching for residues with "MOL" + mol_residues = [res for res in topology.residues() if res.name[:3]==resname[:3]] # Find the residue by searching for residues with "MOL" if len(mol_residues)!=1: raise ValueError("There must be exactly one residue with a specific name in the topology. Found %d residues with name '%s'" % (len(mol_residues), resname)) mol_residue = mol_residues[0] @@ -2107,7 +2287,7 @@ def _build_new_topology(self, current_receptor_topology, oemol_proposed): _logger.info('Building new Topology object...') timer_start = time.time() - oemol_proposed.SetTitle(self._residue_name) + oemol_proposed.SetTitle(oemol_proposed.GetTitle()) mol_topology = forcefield_generators.generateTopologyFromOEMol(oemol_proposed) new_topology = app.Topology() append_topology(new_topology, current_receptor_topology) @@ -2322,7 +2502,8 @@ def _propose_molecule(self, system, topology, molecule_smiles, exclude_self=Fals proposed_smiles = self._smiles_list[proposed_smiles_idx] logp = np.log(reverse_probability) - np.log(forward_probability) from perses.tests.utils import smiles_to_oemol - proposed_mol = smiles_to_oemol(proposed_smiles) + # proposed_mol = smiles_to_oemol(proposed_smiles) + proposed_mol = smiles_to_oemol(proposed_smiles, "MOL%d" %proposed_smiles_idx) ## IVY add this back in return proposed_smiles, proposed_mol, logp def _calculate_probability_matrix(self, molecule_smiles_list): diff --git a/perses/samplers/samplers.py b/perses/samplers/samplers.py index 305d7af4f..2f9f395eb 100644 --- a/perses/samplers/samplers.py +++ b/perses/samplers/samplers.py @@ -463,7 +463,8 @@ def update_state(self): [system, topology, positions] = [self.sampler.thermodynamic_state.get_system(remove_thermostat=True), self.topology, self.sampler.sampler_state.positions] omm_topology = topology.to_openmm() #convert to OpenMM topology for proposal engine omm_topology.setPeriodicBoxVectors(self.sampler.sampler_state.box_vectors) #set the box vectors because in OpenMM topology has these... - topology_proposal = self.proposal_engine.propose(system, omm_topology) + current_metadata = {"smiles": self.state_key} + topology_proposal = self.proposal_engine.propose(system, omm_topology, current_metadata=current_metadata) if self.verbose: print("Proposed transformation: %s => %s" % (topology_proposal.old_chemical_state_key, topology_proposal.new_chemical_state_key)) # Determine state keys diff --git a/perses/tests/test_ivy.py b/perses/tests/test_ivy.py new file mode 100644 index 000000000..5600f7222 --- /dev/null +++ b/perses/tests/test_ivy.py @@ -0,0 +1,15 @@ +from openmmtools import cache +from simtk import openmm +from perses.tests.testsystems import AlanineDipeptideTestSystem +from perses.tests.testsystems import AlkanesTestSystem +from perses.tests.testsystems import KinaseInhibitorsTestSystem + +# testsystem = AlanineDipeptideTestSystem() +# testsystem = AlkanesTestSystem(storage_filename='output.nc') +testsystem = KinaseInhibitorsTestSystem() +# Build a system +system = testsystem.system_generators['vacuum'].build_system(testsystem.topologies['vacuum']) +# Retrieve a SAMSSampler +sams_sampler = testsystem.sams_samplers['explicit'] ## For alkanes test system and kinase inhibitor test system +# sams_sampler = testsystem.sams_samplers['implicit'] ## For alanine dipeptide test system +testsystem.exen_samplers['vacuum'].run(niterations=20) diff --git a/perses/tests/testsystems.py b/perses/tests/testsystems.py index 7a2698540..a0a295fdb 100644 --- a/perses/tests/testsystems.py +++ b/perses/tests/testsystems.py @@ -182,6 +182,7 @@ def __init__(self, constraints=app.HBonds, **kwargs): topologies = dict() positions = dict() pdbfile = PDBFile(pdb_filename) + topologies['vacuum'] = pdbfile.getTopology() positions['vacuum'] = pdbfile.getPositions(asNumpy=True) topologies['implicit'] = pdbfile.getTopology() @@ -200,8 +201,10 @@ def __init__(self, constraints=app.HBonds, **kwargs): 'always_change' : True # don't propose self-transitions } proposal_engines = dict() - chain_id = ' ' - allowed_mutations = [[('2','VAL')],[('2','LEU')],[('2','ILE')]] + chain_id = '1' + # allowed_mutations = [[('2','VAL')],[('2','LEU')],[('2','ILE')]] + allowed_mutations = [('2', 'ILE'), ('2', 'VAL')] + for environment in environments: proposal_engines[environment] = PointMutationEngine(topologies[environment],system_generators[environment], chain_id, proposal_metadata=proposal_metadata, allowed_mutations=allowed_mutations) @@ -1681,39 +1684,54 @@ class SmallMoleculeLibraryTestSystem(PersesTestSystem): """ def __init__(self, constraints=app.HBonds, premapped_json_dict=None, **kwargs): super(SmallMoleculeLibraryTestSystem, self).__init__(**kwargs) + # Expand molecules without explicit stereochemistry and make canonical isomeric SMILES. molecules = sanitizeSMILES(self.molecules) - molecules = canonicalize_SMILES(molecules) environments = ['explicit', 'vacuum'] temperature = 300*unit.kelvin pressure = 1.0*unit.atmospheres # Create a system generator for our desired forcefields. from perses.rjmc.topology_proposal import SystemGenerator - system_generators = dict() from pkg_resources import resource_filename + system_generators = dict() gaff_xml_filename = resource_filename('perses', 'data/gaff.xml') barostat = openmm.MonteCarloBarostat(pressure, temperature) - system_generators['explicit'] = SystemGenerator([gaff_xml_filename, 'tip3p.xml'], + system_generators['explicit'] = SystemGenerator([gaff_xml_filename, 'tip3p.xml'], use_antechamber=False, forcefield_kwargs={ 'nonbondedMethod' : app.CutoffPeriodic, 'nonbondedCutoff' : 9.0 * unit.angstrom, 'implicitSolvent' : None, 'constraints' : constraints }, barostat=barostat) - system_generators['vacuum'] = SystemGenerator([gaff_xml_filename], + system_generators['vacuum'] = SystemGenerator([gaff_xml_filename], use_antechamber=False, forcefield_kwargs={ 'nonbondedMethod' : app.NoCutoff, 'implicitSolvent' : None, 'constraints' : constraints }) - # # Create topologies and positions - # topologies = dict() positions = dict() + # Parametrize and generate residue templates for small molecule set from openmoltools import forcefield_generators - forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') - forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) + from io import StringIO + from perses.tests.utils import smiles_to_oemol, extractPositionsFromOEMOL + clinical_kinase_inhibitors_filename = resource_filename('perses', 'data/clinical-kinase-inhibitors.xml') + forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml', clinical_kinase_inhibitors_filename) + d_smiles_to_oemol = {smiles : smiles_to_oemol(smiles, "MOL%d" % i)for i, smiles in enumerate(molecules)} + + # ## Generate and ffxml, then add to forcefield + # ffxml, failed_molecule_list = forcefield_generators.generateForceFieldFromMolecules(list(d_smiles_to_oemol.values()), ignoreFailures=True) + # f = open('clinical-kinase-inhibitors.xml', 'w') + # f.write(ffxml) + # f.close() + # if failed_molecule_list: + # raise Exception("Failed to generate forcefield for the following molecules: ", failed_molecule_list) + # forcefield.loadFile(StringIO(ffxml)) # Create molecule in vacuum. - from perses.tests.utils import createOEMolFromSMILES, extractPositionsFromOEMOL - smiles = molecules[0] # current sampler state - molecule = createOEMolFromSMILES(smiles) + smiles = molecules[0] # current sampler state + print("smiles: ", smiles) + molecule = smiles_to_oemol(smiles, title=d_smiles_to_oemol[smiles].GetTitle()) + topologies['vacuum'] = forcefield_generators.generateTopologyFromOEMol(molecule) + for residue in topologies['vacuum'].residues(): ## IVY + print("residue name: ", residue.name) + positions['vacuum'] = extractPositionsFromOEMOL(molecule) # Create molecule in solvent. @@ -1724,29 +1742,26 @@ def __init__(self, constraints=app.HBonds, premapped_json_dict=None, **kwargs): # Set up the proposal engines. from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine, PremappedSmallMoleculeSetProposalEngine, SmallMoleculeAtomMapper - proposal_metadata = { } + proposal_metadata = {} proposal_engines = dict() if not premapped_json_dict: for environment in environments: - proposal_engines[environment] = SmallMoleculeSetProposalEngine(molecules, system_generators[environment]) - + proposal_engines[environment] = SmallMoleculeSetProposalEngine(molecules, system_generators[environment], residue_name=d_smiles_to_oemol[smiles].GetTitle()) + else: atom_mapper = SmallMoleculeAtomMapper.from_json(premapped_json_dict) for environment in environments: - proposal_engines[environment] = PremappedSmallMoleculeSetProposalEngine(atom_mapper, system_generators[environment]) + proposal_engines[environment] = PremappedSmallMoleculeSetProposalEngine(atom_mapper, system_generators[environment], residue_name=d_smiles_to_oemol[smiles].GetTitle()) # Generate systems systems = dict() for environment in environments: systems[environment] = system_generators[environment].build_system(topologies[environment]) - # Define thermodynamic state of interest. - thermodynamic_states = dict() thermodynamic_states['explicit'] = states.ThermodynamicState(system=systems['explicit'], temperature=temperature, pressure=pressure) - thermodynamic_states['vacuum'] = states.ThermodynamicState(system=systems['vacuum'], temperature=temperature) - + thermodynamic_states['vacuum'] = states.ThermodynamicState(system=systems['vacuum'], temperature=temperature) # Create SAMS samplers from perses.samplers.samplers import ExpandedEnsembleSampler, SAMSSampler mcmc_samplers = dict() @@ -1764,8 +1779,8 @@ def __init__(self, constraints=app.HBonds, premapped_json_dict=None, **kwargs): sampler_state = states.SamplerState(positions=positions[environment]) mcmc_samplers[environment] = MCMCSampler(thermodynamic_states[environment], sampler_state, copy.deepcopy(self._move)) # reduce number of steps for testing - - exen_samplers[environment] = ExpandedEnsembleSampler(mcmc_samplers[environment], topologies[environment], chemical_state_key, proposal_engines[environment], self.geometry_engine, options={'nsteps':self._ncmc_nsteps}, storage=storage) + + exen_samplers[environment] = ExpandedEnsembleSampler(mcmc_samplers[environment], topologies[environment], chemical_state_key, proposal_engines[environment], self.geometry_engine, options={'nsteps':5000}, storage=storage) exen_samplers[environment].verbose = True sams_samplers[environment] = SAMSSampler(exen_samplers[environment], storage=storage) sams_samplers[environment].verbose = True @@ -1774,7 +1789,6 @@ def __init__(self, constraints=app.HBonds, premapped_json_dict=None, **kwargs): from perses.samplers.samplers import MultiTargetDesign target_samplers = { sams_samplers['explicit'] : 1.0, sams_samplers['vacuum'] : -1.0 } designer = MultiTargetDesign(target_samplers, storage=self.storage) - # Store things. self.molecules = molecules self.environments = environments @@ -1808,6 +1822,7 @@ def __init__(self, **kwargs): molecules = list() with open(smiles_filename, 'r') as csvfile: csvreader = csv.reader(csvfile, delimiter=',', quotechar='"') + # next(csvreader) ## Only use this if using the new version of clinical-kinase-inhibitors.csv (with 44 inhibitors) for row in csvreader: name = row[0] smiles = row[1] diff --git a/perses/tests/utils.py b/perses/tests/utils.py index c901a1cb9..763ce0946 100644 --- a/perses/tests/utils.py +++ b/perses/tests/utils.py @@ -322,13 +322,7 @@ def get_atoms_with_undefined_stereocenters(molecule, verbose=False): undefined_stereocenters = list() for atom in molecule.GetAtoms(): chiral = atom.IsChiral() - stereo = OEAtomStereo_Undefined - if atom.HasStereoSpecified(OEAtomStereo_Tetrahedral): - v = list() - for nbr in atom.GetAtoms(): - v.append(nbr) - stereo = atom.GetStereo(v, OEAtomStereo_Tetrahedral) - + stereo = oechem.OEPerceiveCIPStereo(molecule, atom) if chiral and (stereo == OEAtomStereo_Undefined): undefined_stereocenters.append(atom) if verbose: @@ -439,6 +433,7 @@ def smiles_to_oemol(smiles_string, title="MOL"): oechem.OETriposAtomNames(mol) oechem.OETriposBondTypeNames(mol) omega = oeomega.OEOmega() + omega.SetStrictStereo(True) omega.SetMaxConfs(1) omega(mol) return mol @@ -488,11 +483,13 @@ def sanitizeSMILES(smiles_list, mode='drop', verbose=False): 4 """ - from openeye.oechem import OEGraphMol, OESmilesToMol, OECreateIsoSmiString + from openeye.oechem import OEGraphMol, OESmilesToMol sanitized_smiles_set = set() + OESMILES_OPTIONS = oechem.OESMILESFlag_ISOMERIC | oechem.OESMILESFlag_Hydrogens for smiles in smiles_list: molecule = OEGraphMol() OESmilesToMol(molecule, smiles) + oechem.OEAddExplicitHydrogens(molecule) if verbose: molecule.SetTitle(smiles) @@ -511,13 +508,13 @@ def sanitizeSMILES(smiles_list, mode='drop', verbose=False): print('original: %s', smiles) molecules = enumerate_undefined_stereocenters(molecule, verbose=verbose) for molecule in molecules: - isosmiles = OECreateIsoSmiString(molecule) - if verbose: print('expanded: %s', isosmiles) - sanitized_smiles_set.add(isosmiles) + smiles_string = oechem.OECreateSmiString(molecule, OESMILES_OPTIONS) + sanitized_smiles_set.add(smiles_string) + if verbose: print('expanded: %s', smiles_string) else: # Convert to OpenEye's canonical isomeric SMILES. - isosmiles = OECreateIsoSmiString(molecule) - sanitized_smiles_set.add(isosmiles) + smiles_string = oechem.OECreateSmiString(molecule, OESMILES_OPTIONS) + sanitized_smiles_set.add(smiles_string) sanitized_smiles_list = list(sanitized_smiles_set) return sanitized_smiles_list @@ -613,8 +610,7 @@ def add_molecule(mol): oedepict.OERenderMolecule(ofs, ext, rdisp) ofs.close() - -def canonicalize_SMILES(smiles_list): +def canonicalize_SMILES(smiles_list): # This is not used, use SmallMoleculeSetProposalEngine.canonicalize_smiles() in topology_proposal.py instead """Ensure all SMILES strings end up in canonical form. Stereochemistry must already have been expanded. SMILES strings are converted to a OpenEye Topology and back again. @@ -629,13 +625,13 @@ def canonicalize_SMILES(smiles_list): """ # Round-trip each molecule to a Topology to end up in canonical form - from openmoltools.forcefield_generators import generateOEMolFromTopologyResidue, generateTopologyFromOEMol + from openmoltools.forcefield_generators import generateTopologyFromOEMol, generateOEMolFromTopologyResidue from openeye import oechem canonical_smiles_list = list() for smiles in smiles_list: molecule = smiles_to_oemol(smiles) topology = generateTopologyFromOEMol(molecule) - residues = [ residue for residue in topology.residues() ] + residues = [residue for residue in topology.residues()] new_molecule = generateOEMolFromTopologyResidue(residues[0]) new_smiles = oechem.OECreateIsoSmiString(new_molecule) canonical_smiles_list.append(new_smiles)