Skip to content

Commit

Permalink
WIP Use PDBInf
Browse files Browse the repository at this point in the history
  • Loading branch information
richardjgowers committed Feb 22, 2023
1 parent 628a4e0 commit faa9a9b
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 134 deletions.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies:
- openff-toolkit
- openff-units >=0.2.0
- openff-models >=0.0.4
- pdbinf
- pip
- pydantic
- pytest
Expand Down
157 changes: 23 additions & 134 deletions gufe/components/proteincomponent.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/gufe
import ast
import json
import io
import numpy as np
from os import PathLike
from typing import Union, Optional
from collections import defaultdict
from typing import Union

from openmm import app
from openmm import unit as omm_unit
import pdbinf

from rdkit import Chem
from rdkit.Chem.rdchem import Mol, Atom, Conformer, EditableMol, BondType
Expand Down Expand Up @@ -46,10 +42,6 @@
}


negative_ions = ["F", "CL", "Br", "I"]
positive_ions = ["NA", "MG", "ZN"]


class ProteinComponent(ExplicitMoleculeComponent):
"""Wrapper around a Protein representation.
Expand All @@ -67,7 +59,6 @@ class ProteinComponent(ExplicitMoleculeComponent):
of the protein, by default ""
"""

# FROM
@classmethod
def from_pdb_file(cls, pdb_file: str, name: str = ""):
"""
Expand All @@ -87,9 +78,10 @@ def from_pdb_file(cls, pdb_file: str, name: str = ""):
ProteinComponent
the deserialized molecule
"""
openmm_PDBFile = PDBFile(pdb_file)
return cls._from_openmmPDBFile(
openmm_PDBFile=openmm_PDBFile, name=name
return cls(
rdkit=pdbinf.load_pdb_file(pdb_file,
templates=[pdbinf.STANDARD_AA_DOC]),
name=name,
)

@classmethod
Expand All @@ -101,7 +93,7 @@ def from_pdbx_file(cls, pdbx_file: str, name=""):
Parameters
----------
pdbxfile : str
pdbx_file : str
path to the pdb file.
name : str, optional
name of the input protein, by default ""
Expand All @@ -111,117 +103,12 @@ def from_pdbx_file(cls, pdbx_file: str, name=""):
ProteinComponent
the deserialized molecule
"""
openmm_PDBxFile = PDBxFile(pdbx_file)
return cls._from_openmmPDBFile(
openmm_PDBFile=openmm_PDBxFile, name=name
return cls(
rdkit=pdbinf.load_pdbx_file(pdbx_file,
templates=[pdbinf.STANDARD_AA_DOC]),
name=name,
)

@classmethod
def _from_openmmPDBFile(cls, openmm_PDBFile: Union[PDBFile, PDBxFile],
name: str = ""):
"""Converts to our internal representation (rdkit Mol)
Parameters
----------
openmm_PDBFile : PDBFile or PDBxFile
object of the protein
name : str
name of the protein
Returns
-------
ProteinComponent
the deserialized molecule
"""
periodicTable = Chem.GetPeriodicTable()
mol_topology = openmm_PDBFile.getTopology()

rd_mol = Mol()
editable_rdmol = EditableMol(rd_mol)

# Add Atoms
for atom in mol_topology.atoms():
a = Atom(atom.element.atomic_number)

atom_monomerInfo = Chem.AtomPDBResidueInfo()
atom_monomerInfo.SetChainId(atom.residue.chain.id)
atom_monomerInfo.SetSerialNumber(int(atom.id))
atom_monomerInfo.SetSegmentNumber(int(atom.residue.chain.index))
atom_monomerInfo.SetInsertionCode(atom.residue.insertionCode)
atom_monomerInfo.SetName(atom.name)
atom_monomerInfo.SetResidueName(atom.residue.name)
atom_monomerInfo.SetResidueNumber(int(atom.residue.id))
atom_monomerInfo.SetIsHeteroAtom(False) # TODO: Do hetatoms

a.SetMonomerInfo(atom_monomerInfo)

# additonally possible:
# atom_monomerInfo.SetSecondaryStructure
# atom_monomerInfo.SetMonomerType
# atom_monomerInfo.SetAltLoc

editable_rdmol.AddAtom(a)

# Add Bonds
for bond in mol_topology.bonds():
bond_order = _BONDORDERS_OPENMM_TO_RDKIT[bond.order]
editable_rdmol.AddBond(
beginAtomIdx=bond.atom1.index,
endAtomIdx=bond.atom2.index,
order=bond_order,
)

# Set Positions
rd_mol = editable_rdmol.GetMol()
positions = np.array(
openmm_PDBFile.positions.value_in_unit(omm_unit.angstrom), ndmin=3
)

for frame_id, frame in enumerate(positions):
conf = Conformer(frame_id)
for atom_id, atom_pos in enumerate(frame):
conf.SetAtomPosition(atom_id, atom_pos)
rd_mol.AddConformer(conf)

# Add Additionals
# Formal Charge
netcharge = 0
for a in rd_mol.GetAtoms():
atomic_num = a.GetAtomicNum()
atom_name = a.GetMonomerInfo().GetName()

connectivity = sum(
int(bond.GetBondType()) for bond in a.GetBonds()
)
default_valence = periodicTable.GetDefaultValence(atomic_num)

if connectivity == 0: # ions:
if atom_name in positive_ions:
fc = default_valence # e.g. Sodium ions
elif atom_name in negative_ions:
fc = - default_valence # e.g. Chlorine ions
else: # -no-cov-
resn = a.GetMonomerInfo().GetResidueName()
resind = int(a.GetMonomerInfo().GetResidueNumber())
raise ValueError(
"I don't know this Ion or something really went "
f"wrong! \t{atom_name}\t{resn}\t-{resind}\t"
f"connectivity{connectivity}"
)
elif default_valence > connectivity:
fc = - (default_valence - connectivity) # negative charge
elif default_valence < connectivity:
fc = + (connectivity - default_valence) # positive charge
else:
fc = 0 # neutral

a.SetFormalCharge(fc)
a.UpdatePropertyCache(strict=True)

netcharge += fc

return cls(rdkit=rd_mol, name=name)

@classmethod
def _from_dict(cls, ser_dict: dict, name: str = ""):
"""Deserialize from dict representation"""
Expand Down Expand Up @@ -283,15 +170,16 @@ def _from_dict(cls, ser_dict: dict, name: str = ""):

return cls(rdkit=rd_mol, name=name)

# TO
def to_openmm_topology(self) -> app.Topology:
def to_openmm_topology(self) -> "app.Topology":
"""Convert to an openmm Topology object
Returns
-------
openmm.app.Topology
resulting topology obj.
"""
from openmm import app

def reskey(m):
"""key for defining when a residue has changed from previous
Expand Down Expand Up @@ -358,16 +246,18 @@ def chainkey(m):

return top

def to_openmm_positions(self) -> omm_unit.Quantity:
"""
serialize the positions to openmm.unit.Quantity
def to_openmm_positions(self) -> "openmm.app.unit.Quantity":
"""serialize the positions to openmm.unit.Quantity
! only one frame at the moment!
Returns
-------
omm_unit.Quantity
Quantity containing protein atom positions
"""
from openmm import unit as omm_unit

np_pos = deserialize_numpy(self.to_dict()["conformers"][0])
openmm_pos = (
list(map(lambda x: np.array(x), np_pos)) * omm_unit.angstrom
Expand All @@ -376,8 +266,7 @@ def to_openmm_positions(self) -> omm_unit.Quantity:
return openmm_pos

def to_pdb_file(self, out_path: Union[str, bytes, PathLike[str], PathLike[bytes], io.TextIOBase]) -> str:
"""
serialize protein to pdb file.
"""Write protein to pdb file.
Parameters
----------
Expand Down Expand Up @@ -422,8 +311,7 @@ def to_pdb_file(self, out_path: Union[str, bytes, PathLike[str], PathLike[bytes]
def to_pdbx_file(
self, out_path: Union[str, bytes, PathLike[str], PathLike[bytes], io.TextIOBase]
) -> str:
"""
serialize protein to pdbx file.
"""Write protein to pdbx file.
Parameters
----------
Expand All @@ -435,6 +323,8 @@ def to_pdbx_file(
str
string path to the resulting pdbx.
"""
from openmm import unit as omm_unit

# get top:
top = self.to_openmm_topology()

Expand All @@ -460,7 +350,6 @@ def to_pdbx_file(

PDBxFile.writeFile(topology=top, positions=openmm_pos, file=out_file)


if must_close:
# we only close the file if we had to open it
out_file.close()
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ python_requires = >= 3.8
install_requires =
numpy
networkx
pdbinf

include_package_data = True
packages = find:
Expand Down

0 comments on commit faa9a9b

Please sign in to comment.