From faa9a9b277595e30161c83fe84042831cfcbfd48 Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 22 Feb 2023 18:24:43 +0000 Subject: [PATCH] WIP Use PDBInf --- environment.yml | 1 + gufe/components/proteincomponent.py | 157 ++++------------------------ setup.cfg | 1 + 3 files changed, 25 insertions(+), 134 deletions(-) diff --git a/environment.yml b/environment.yml index 3c667a9a..4bd3d089 100644 --- a/environment.yml +++ b/environment.yml @@ -11,6 +11,7 @@ dependencies: - openff-toolkit - openff-units >=0.2.0 - openff-models >=0.0.4 + - pdbinf - pip - pydantic - pytest diff --git a/gufe/components/proteincomponent.py b/gufe/components/proteincomponent.py index 3059fceb..9edcbdde 100644 --- a/gufe/components/proteincomponent.py +++ b/gufe/components/proteincomponent.py @@ -1,15 +1,11 @@ # This code is part of OpenFE and is licensed under the MIT license. # For details, see https://github.com/OpenFreeEnergy/gufe -import ast -import json import io import numpy as np from os import PathLike -from typing import Union, Optional -from collections import defaultdict +from typing import Union -from openmm import app -from openmm import unit as omm_unit +import pdbinf from rdkit import Chem from rdkit.Chem.rdchem import Mol, Atom, Conformer, EditableMol, BondType @@ -46,10 +42,6 @@ } -negative_ions = ["F", "CL", "Br", "I"] -positive_ions = ["NA", "MG", "ZN"] - - class ProteinComponent(ExplicitMoleculeComponent): """Wrapper around a Protein representation. @@ -67,7 +59,6 @@ class ProteinComponent(ExplicitMoleculeComponent): of the protein, by default "" """ - # FROM @classmethod def from_pdb_file(cls, pdb_file: str, name: str = ""): """ @@ -87,9 +78,10 @@ def from_pdb_file(cls, pdb_file: str, name: str = ""): ProteinComponent the deserialized molecule """ - openmm_PDBFile = PDBFile(pdb_file) - return cls._from_openmmPDBFile( - openmm_PDBFile=openmm_PDBFile, name=name + return cls( + rdkit=pdbinf.load_pdb_file(pdb_file, + templates=[pdbinf.STANDARD_AA_DOC]), + name=name, ) @classmethod @@ -101,7 +93,7 @@ def from_pdbx_file(cls, pdbx_file: str, name=""): Parameters ---------- - pdbxfile : str + pdbx_file : str path to the pdb file. name : str, optional name of the input protein, by default "" @@ -111,117 +103,12 @@ def from_pdbx_file(cls, pdbx_file: str, name=""): ProteinComponent the deserialized molecule """ - openmm_PDBxFile = PDBxFile(pdbx_file) - return cls._from_openmmPDBFile( - openmm_PDBFile=openmm_PDBxFile, name=name + return cls( + rdkit=pdbinf.load_pdbx_file(pdbx_file, + templates=[pdbinf.STANDARD_AA_DOC]), + name=name, ) - @classmethod - def _from_openmmPDBFile(cls, openmm_PDBFile: Union[PDBFile, PDBxFile], - name: str = ""): - """Converts to our internal representation (rdkit Mol) - - Parameters - ---------- - openmm_PDBFile : PDBFile or PDBxFile - object of the protein - name : str - name of the protein - - Returns - ------- - ProteinComponent - the deserialized molecule - """ - periodicTable = Chem.GetPeriodicTable() - mol_topology = openmm_PDBFile.getTopology() - - rd_mol = Mol() - editable_rdmol = EditableMol(rd_mol) - - # Add Atoms - for atom in mol_topology.atoms(): - a = Atom(atom.element.atomic_number) - - atom_monomerInfo = Chem.AtomPDBResidueInfo() - atom_monomerInfo.SetChainId(atom.residue.chain.id) - atom_monomerInfo.SetSerialNumber(int(atom.id)) - atom_monomerInfo.SetSegmentNumber(int(atom.residue.chain.index)) - atom_monomerInfo.SetInsertionCode(atom.residue.insertionCode) - atom_monomerInfo.SetName(atom.name) - atom_monomerInfo.SetResidueName(atom.residue.name) - atom_monomerInfo.SetResidueNumber(int(atom.residue.id)) - atom_monomerInfo.SetIsHeteroAtom(False) # TODO: Do hetatoms - - a.SetMonomerInfo(atom_monomerInfo) - - # additonally possible: - # atom_monomerInfo.SetSecondaryStructure - # atom_monomerInfo.SetMonomerType - # atom_monomerInfo.SetAltLoc - - editable_rdmol.AddAtom(a) - - # Add Bonds - for bond in mol_topology.bonds(): - bond_order = _BONDORDERS_OPENMM_TO_RDKIT[bond.order] - editable_rdmol.AddBond( - beginAtomIdx=bond.atom1.index, - endAtomIdx=bond.atom2.index, - order=bond_order, - ) - - # Set Positions - rd_mol = editable_rdmol.GetMol() - positions = np.array( - openmm_PDBFile.positions.value_in_unit(omm_unit.angstrom), ndmin=3 - ) - - for frame_id, frame in enumerate(positions): - conf = Conformer(frame_id) - for atom_id, atom_pos in enumerate(frame): - conf.SetAtomPosition(atom_id, atom_pos) - rd_mol.AddConformer(conf) - - # Add Additionals - # Formal Charge - netcharge = 0 - for a in rd_mol.GetAtoms(): - atomic_num = a.GetAtomicNum() - atom_name = a.GetMonomerInfo().GetName() - - connectivity = sum( - int(bond.GetBondType()) for bond in a.GetBonds() - ) - default_valence = periodicTable.GetDefaultValence(atomic_num) - - if connectivity == 0: # ions: - if atom_name in positive_ions: - fc = default_valence # e.g. Sodium ions - elif atom_name in negative_ions: - fc = - default_valence # e.g. Chlorine ions - else: # -no-cov- - resn = a.GetMonomerInfo().GetResidueName() - resind = int(a.GetMonomerInfo().GetResidueNumber()) - raise ValueError( - "I don't know this Ion or something really went " - f"wrong! \t{atom_name}\t{resn}\t-{resind}\t" - f"connectivity{connectivity}" - ) - elif default_valence > connectivity: - fc = - (default_valence - connectivity) # negative charge - elif default_valence < connectivity: - fc = + (connectivity - default_valence) # positive charge - else: - fc = 0 # neutral - - a.SetFormalCharge(fc) - a.UpdatePropertyCache(strict=True) - - netcharge += fc - - return cls(rdkit=rd_mol, name=name) - @classmethod def _from_dict(cls, ser_dict: dict, name: str = ""): """Deserialize from dict representation""" @@ -283,8 +170,7 @@ def _from_dict(cls, ser_dict: dict, name: str = ""): return cls(rdkit=rd_mol, name=name) - # TO - def to_openmm_topology(self) -> app.Topology: + def to_openmm_topology(self) -> "app.Topology": """Convert to an openmm Topology object Returns @@ -292,6 +178,8 @@ def to_openmm_topology(self) -> app.Topology: openmm.app.Topology resulting topology obj. """ + from openmm import app + def reskey(m): """key for defining when a residue has changed from previous @@ -358,9 +246,9 @@ def chainkey(m): return top - def to_openmm_positions(self) -> omm_unit.Quantity: - """ - serialize the positions to openmm.unit.Quantity + def to_openmm_positions(self) -> "openmm.app.unit.Quantity": + """serialize the positions to openmm.unit.Quantity + ! only one frame at the moment! Returns @@ -368,6 +256,8 @@ def to_openmm_positions(self) -> omm_unit.Quantity: omm_unit.Quantity Quantity containing protein atom positions """ + from openmm import unit as omm_unit + np_pos = deserialize_numpy(self.to_dict()["conformers"][0]) openmm_pos = ( list(map(lambda x: np.array(x), np_pos)) * omm_unit.angstrom @@ -376,8 +266,7 @@ def to_openmm_positions(self) -> omm_unit.Quantity: return openmm_pos def to_pdb_file(self, out_path: Union[str, bytes, PathLike[str], PathLike[bytes], io.TextIOBase]) -> str: - """ - serialize protein to pdb file. + """Write protein to pdb file. Parameters ---------- @@ -422,8 +311,7 @@ def to_pdb_file(self, out_path: Union[str, bytes, PathLike[str], PathLike[bytes] def to_pdbx_file( self, out_path: Union[str, bytes, PathLike[str], PathLike[bytes], io.TextIOBase] ) -> str: - """ - serialize protein to pdbx file. + """Write protein to pdbx file. Parameters ---------- @@ -435,6 +323,8 @@ def to_pdbx_file( str string path to the resulting pdbx. """ + from openmm import unit as omm_unit + # get top: top = self.to_openmm_topology() @@ -460,7 +350,6 @@ def to_pdbx_file( PDBxFile.writeFile(topology=top, positions=openmm_pos, file=out_file) - if must_close: # we only close the file if we had to open it out_file.close() diff --git a/setup.cfg b/setup.cfg index f1279b5e..220abbc6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,6 +24,7 @@ python_requires = >= 3.8 install_requires = numpy networkx + pdbinf include_package_data = True packages = find: