From f386ccb5874d82147c10358a638ddf6ba93a848d Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Fri, 24 Jan 2025 15:12:06 +0100 Subject: [PATCH 1/9] Add `explicit_hydrogen` parameter --- src/biotite/interface/rdkit/mol.py | 27 +++++++++++++++++++++++---- tests/interface/test_rdkit.py | 10 ++++------ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index eef433179..500807cb0 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -76,6 +76,7 @@ def to_mol( kekulize=False, use_dative_bonds=False, include_extra_annotations=(), + explicit_hydrogen=True, ): """ Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a @@ -105,6 +106,11 @@ def to_mol( are always included per default. These standard annotations can be accessed with :meth:`rdkit.Chem.rdchem.Atom.GetPDBResidueInfo()` for each atom in the returned :class:`rdkit.Chem.rdchem.Mol`. + explicit_hydrogen : bool, optional + If set to true, the conversion process expects that all hydrogen atoms are + explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`. + If set to false, the conversion process treats all hydrogen atoms as implicit + and all hydrogen atoms in the :class:`AtomArray` are removed. Returns ------- @@ -141,12 +147,24 @@ def to_mol( HB3 HXT """ + hydrogen_mask = atoms.element == "H" + if explicit_hydrogen: + if not hydrogen_mask.any(): + warnings.warn( + "No hydrogen found in the input, although 'explicit_hydrogen' is 'True'" + ) + else: + atoms = atoms[..., ~hydrogen_mask] + mol = EditableMol(Mol()) has_annot = frozenset(atoms.get_annotation_categories()) extra_annot = set(include_extra_annotations) - _STANDARD_ANNOTATIONS + for i in range(atoms.array_length()): rdkit_atom = Atom(atoms.element[i].capitalize()) + if explicit_hydrogen: + rdkit_atom.SetNoImplicit(True) if "charge" in has_annot: rdkit_atom.SetFormalCharge(atoms.charge[i].item()) @@ -176,11 +194,12 @@ def to_mol( if atoms.bonds is None: raise BadStructureError("An AtomArray with associated BondList is required") - bonds = atoms.bonds.as_array() if kekulize: - bonds = bonds.copy() + bonds = atoms.bonds.copy() bonds.remove_aromaticity() - for atom_i, atom_j, bond_type in atoms.bonds.as_array(): + else: + bonds = atoms.bonds + for atom_i, atom_j, bond_type in bonds.as_array(): if not use_dative_bonds and bond_type == BondType.COORDINATION: bond_type = BondType.SINGLE mol.AddBond( @@ -367,7 +386,7 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): except KekulizeException: warnings.warn( "Kekulization failed, " - "using 'BondType.ANY' instead for aromatic bonds instead", + "using 'BondType.AROMATIC' instead for aromatic bonds instead", LossyConversionWarning, ) rdkit_bonds = list(mol.GetBonds()) diff --git a/tests/interface/test_rdkit.py b/tests/interface/test_rdkit.py index 2e342ae92..f9fc6245d 100644 --- a/tests/interface/test_rdkit.py +++ b/tests/interface/test_rdkit.py @@ -20,11 +20,9 @@ def _load_smiles(): return file.read().splitlines() -@pytest.mark.filterwarnings( - "ignore:" - "The coordinates are missing for some atoms. " - "The fallback coordinates will be used instead" -) +@pytest.mark.filterwarnings("ignore:Missing coordinates.*") +@pytest.mark.filterwarnings("ignore:.*coordinates are missing.*") +@pytest.mark.filterwarnings("ignore::biotite.interface.LossyConversionWarning") @pytest.mark.parametrize( "res_name", np.random.default_rng(0).choice(info.all_residues(), size=200).tolist() ) @@ -46,7 +44,7 @@ def test_conversion_from_biotite(res_name): # Some compounds in the CCD have missing coordinates assert np.allclose(test_atoms.coord, ref_atoms.coord, equal_nan=True) - # There should be now undefined bonds + # There should be no undefined bonds assert (test_atoms.bonds.as_array()[:, 2] != struc.BondType.ANY).all() # Kekulization returns one of multiple resonance structures, so the returned one # might not be the same as the input From 167c31a0d75990686d1f44be467ad11eda21e585 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Sun, 2 Feb 2025 11:15:24 +0100 Subject: [PATCH 2/9] Use canonical rdkit import --- doc/tutorial/interface/rdkit.rst | 20 ++++----- src/biotite/interface/rdkit/mol.py | 66 +++++++++++++----------------- 2 files changed, 36 insertions(+), 50 deletions(-) diff --git a/doc/tutorial/interface/rdkit.rst b/doc/tutorial/interface/rdkit.rst index ff9954a52..e91ee55d2 100644 --- a/doc/tutorial/interface/rdkit.rst +++ b/doc/tutorial/interface/rdkit.rst @@ -27,15 +27,14 @@ For a proper structural formula, we need to compute proper 2D coordinates first. import biotite.interface.rdkit as rdkit_interface import biotite.structure.info as struc + import rdkit.Chem.AllChem as Chem from rdkit.Chem.Draw import MolToImage - from rdkit.Chem.rdDepictor import Compute2DCoords - from rdkit.Chem.rdmolops import RemoveHs penicillin = struc.residue("PNN") mol = rdkit_interface.to_mol(penicillin) # We do not want to include explicit hydrogen atoms in the structural formula - mol = RemoveHs(mol) - Compute2DCoords(mol) + mol = Chem.RemoveHs(mol) + Chem.Compute2DCoords(mol) image = MolToImage(mol, size=(600, 400)) display(image) @@ -49,18 +48,13 @@ One way to to obtain them as :class:`.AtomArray` is passing a *SMILES* string to .. jupyter-execute:: - from rdkit.Chem import MolFromSmiles - from rdkit.Chem.rdDistGeom import EmbedMolecule - from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule - from rdkit.Chem.rdmolops import AddHs - ERTAPENEM_SMILES = "C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)NC4=CC=CC(=C4)C(=O)O)C(=O)O)[C@@H](C)O" - mol = MolFromSmiles(ERTAPENEM_SMILES) + mol = Chem.MolFromSmiles(ERTAPENEM_SMILES) # RDKit uses implicit hydrogen atoms by default, but Biotite requires explicit ones - mol = AddHs(mol) + mol = Chem.AddHs(mol) # Create a 3D conformer - conformer_id = EmbedMolecule(mol) - UFFOptimizeMolecule(mol) + conformer_id = Chem.EmbedMolecule(mol) + Chem.UFFOptimizeMolecule(mol) ertapenem = rdkit_interface.from_mol(mol, conformer_id) print(ertapenem) \ No newline at end of file diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index 500807cb0..eb31a0854 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -9,16 +9,8 @@ import warnings from collections import defaultdict import numpy as np -from rdkit.Chem.rdchem import ( - Atom, - AtomPDBResidueInfo, - Conformer, - EditableMol, - KekulizeException, - Mol, -) -from rdkit.Chem.rdchem import BondType as RDKitBondType -from rdkit.Chem.rdmolops import AddHs, Kekulize, SanitizeFlags, SanitizeMol +import rdkit.Chem.AllChem as Chem +from rdkit.Chem import SanitizeFlags from biotite.interface.version import requires_version from biotite.interface.warning import LossyConversionWarning from biotite.structure.atoms import AtomArray, AtomArrayStack @@ -31,26 +23,26 @@ BondType.TRIPLE: BondType.AROMATIC_TRIPLE, } _BIOTITE_TO_RDKIT_BOND_TYPE = { - BondType.ANY: RDKitBondType.UNSPECIFIED, - BondType.SINGLE: RDKitBondType.SINGLE, - BondType.DOUBLE: RDKitBondType.DOUBLE, - BondType.TRIPLE: RDKitBondType.TRIPLE, - BondType.QUADRUPLE: RDKitBondType.QUADRUPLE, - BondType.AROMATIC_SINGLE: RDKitBondType.AROMATIC, - BondType.AROMATIC_DOUBLE: RDKitBondType.AROMATIC, - BondType.AROMATIC_TRIPLE: RDKitBondType.AROMATIC, - BondType.AROMATIC: RDKitBondType.AROMATIC, + BondType.ANY: Chem.BondType.UNSPECIFIED, + BondType.SINGLE: Chem.BondType.SINGLE, + BondType.DOUBLE: Chem.BondType.DOUBLE, + BondType.TRIPLE: Chem.BondType.TRIPLE, + BondType.QUADRUPLE: Chem.BondType.QUADRUPLE, + BondType.AROMATIC_SINGLE: Chem.BondType.AROMATIC, + BondType.AROMATIC_DOUBLE: Chem.BondType.AROMATIC, + BondType.AROMATIC_TRIPLE: Chem.BondType.AROMATIC, + BondType.AROMATIC: Chem.BondType.AROMATIC, # Dative bonds may lead to a KekulizeException and may potentially be deprecated # in the future (https://github.com/rdkit/rdkit/discussions/6995) - BondType.COORDINATION: RDKitBondType.SINGLE, + BondType.COORDINATION: Chem.BondType.SINGLE, } _RDKIT_TO_BIOTITE_BOND_TYPE = { - RDKitBondType.UNSPECIFIED: BondType.ANY, - RDKitBondType.SINGLE: BondType.SINGLE, - RDKitBondType.DOUBLE: BondType.DOUBLE, - RDKitBondType.TRIPLE: BondType.TRIPLE, - RDKitBondType.QUADRUPLE: BondType.QUADRUPLE, - RDKitBondType.DATIVE: BondType.COORDINATION, + Chem.BondType.UNSPECIFIED: BondType.ANY, + Chem.BondType.SINGLE: BondType.SINGLE, + Chem.BondType.DOUBLE: BondType.DOUBLE, + Chem.BondType.TRIPLE: BondType.TRIPLE, + Chem.BondType.QUADRUPLE: BondType.QUADRUPLE, + Chem.BondType.DATIVE: BondType.COORDINATION, } _STANDARD_ANNOTATIONS = frozenset( { @@ -156,20 +148,20 @@ def to_mol( else: atoms = atoms[..., ~hydrogen_mask] - mol = EditableMol(Mol()) + mol = Chem.EditableMol(Chem.Mol()) has_annot = frozenset(atoms.get_annotation_categories()) extra_annot = set(include_extra_annotations) - _STANDARD_ANNOTATIONS for i in range(atoms.array_length()): - rdkit_atom = Atom(atoms.element[i].capitalize()) + rdkit_atom = Chem.Atom(atoms.element[i].capitalize()) if explicit_hydrogen: rdkit_atom.SetNoImplicit(True) if "charge" in has_annot: rdkit_atom.SetFormalCharge(atoms.charge[i].item()) # add standard pdb annotations - rdkit_atom_res_info = AtomPDBResidueInfo( + rdkit_atom_res_info = Chem.AtomPDBResidueInfo( atomName=atoms.atom_name[i].item(), residueName=atoms.res_name[i].item(), chainId=atoms.chain_id[i].item(), @@ -213,7 +205,7 @@ def to_mol( # Handle AtomArray and AtomArrayStack consistently coord = coord[None, :, :] for model_coord in coord: - conformer = Conformer(mol.GetNumAtoms()) + conformer = Chem.Conformer(mol.GetNumAtoms()) # RDKit silently expects the data to be in C-contiguous order # Otherwise the coordinates would be completely misassigned # (https://github.com/rdkit/rdkit/issues/8221) @@ -290,8 +282,8 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): if add_hydrogen is None: add_hydrogen = not _has_explicit_hydrogen(mol) if add_hydrogen: - SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS) - mol = AddHs(mol, addCoords=False, addResidueInfo=False) + Chem.SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS) + mol = Chem.AddHs(mol, addCoords=False, addResidueInfo=False) rdkit_atoms = mol.GetAtoms() if rdkit_atoms is None: @@ -328,7 +320,7 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): residue_info = rdkit_atom.GetPDBResidueInfo() if residue_info is None: # ... default values for atoms with missing residue information - residue_info = AtomPDBResidueInfo( + residue_info = Chem.AtomPDBResidueInfo( atomName="", occupancy=0.0, tempFactor=float("nan"), @@ -375,15 +367,15 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): rdkit_bonds = list(mol.GetBonds()) is_aromatic = np.array( - [bond.GetBondType() == RDKitBondType.AROMATIC for bond in rdkit_bonds] + [bond.GetBondType() == Chem.BondType.AROMATIC for bond in rdkit_bonds] ) if np.any(is_aromatic): # Determine the kekulized order of aromatic bonds # Copy as 'Kekulize()' modifies the molecule in-place - mol = Mol(mol) + mol = Chem.Mol(mol) try: - Kekulize(mol) - except KekulizeException: + Chem.Kekulize(mol) + except Chem.KekulizeException: warnings.warn( "Kekulization failed, " "using 'BondType.AROMATIC' instead for aromatic bonds instead", From d232eab9c8b915a928f230b30b1d47373dfb14e3 Mon Sep 17 00:00:00 2001 From: Simon Mathis Date: Wed, 12 Feb 2025 12:05:21 +0000 Subject: [PATCH 3/9] fix: enable returning molecules without conformers (with nan coords). Also set coordinates of non-3d conformers to nan even when explicitly requested, as they are meaningless. --- src/biotite/interface/rdkit/mol.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index eb31a0854..8d632d3ae 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -290,14 +290,20 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): raise BadStructureError("Could not obtains atoms from Mol") if conformer_id is None: - conformers = [conf for conf in mol.GetConformers() if conf.Is3D()] + conformers = [conf for conf in mol.GetConformers()] atoms = AtomArrayStack(len(conformers), len(rdkit_atoms)) for i, conformer in enumerate(conformers): - atoms.coord[i] = np.array(conformer.GetPositions()) + if conformer.Is3D(): + atoms.coord[i] = np.array(conformer.GetPositions()) + else: + atoms.coord[i] = np.full((len(rdkit_atoms), 3), np.nan) else: conformer = mol.GetConformer(conformer_id) atoms = AtomArray(len(rdkit_atoms)) - atoms.coord = np.array(conformer.GetPositions()) + if conformer.Is3D(): + atoms.coord = np.array(conformer.GetPositions()) + else: + atoms.coord = np.full((len(rdkit_atoms), 3), np.nan) extra_annotations = defaultdict( # Use 'object' dtype first, as the maximum string length is unknown From e36303c42f270f3cb755def4f0526899c14c9825 Mon Sep 17 00:00:00 2001 From: Simon Mathis Date: Wed, 12 Feb 2025 12:06:07 +0000 Subject: [PATCH 4/9] feat: attempt to type-cast extra annotations instead of always leaving them as 'str' objects. Also type-cast the rdkit internal 'isImplicit' flag. --- src/biotite/interface/rdkit/mol.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index 8d632d3ae..09bc66ab6 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -369,7 +369,25 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): extra_annotations[annot_name][_atom_idx] = rdkit_atom.GetProp(annot_name) for annot_name, array in extra_annotations.items(): - atoms.set_annotation(annot_name, array.astype(str)) + # ... handle special case of implicit hydrogen atom flags + if annot_name == "isImplicit": + atoms.set_annotation(annot_name, array.astype(bool)) + continue + + # ... for all custom annotations, try numeric conversions in order of preference + for dtype in (bool, int, float): + try: + converted = array.astype(dtype) + # Verify the conversion was lossless by converting back + if np.array_equal( + converted.astype(str), array.astype(str), equal_nan=True + ): + atoms.set_annotation(annot_name, converted) + break + except (ValueError, TypeError): + continue + else: # ... if no numeric conversion succeeded + atoms.set_annotation(annot_name, array.astype(str)) rdkit_bonds = list(mol.GetBonds()) is_aromatic = np.array( From c1b64c807091953cfc4fb0d479c90e356ee145dc Mon Sep 17 00:00:00 2001 From: Simon Mathis Date: Wed, 12 Feb 2025 12:15:25 +0000 Subject: [PATCH 5/9] fix: suggestion for default case & warnings for explicit hydrogens --- src/biotite/interface/rdkit/mol.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index 09bc66ab6..21ccf9561 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -68,7 +68,7 @@ def to_mol( kekulize=False, use_dative_bonds=False, include_extra_annotations=(), - explicit_hydrogen=True, + explicit_hydrogen=False, ): """ Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a @@ -103,6 +103,9 @@ def to_mol( explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`. If set to false, the conversion process treats all hydrogen atoms as implicit and all hydrogen atoms in the :class:`AtomArray` are removed. + By default, explicit_hydrogen=False, i.e. hydrogen atoms are never assumed, to + avoid accidentally introducing radicals into the molecule upon calling + :func:`Chem.SanitizeMol()`. Returns ------- @@ -140,12 +143,24 @@ def to_mol( HXT """ hydrogen_mask = atoms.element == "H" + _has_hydrogen = hydrogen_mask.any() if explicit_hydrogen: - if not hydrogen_mask.any(): + if not _has_hydrogen: warnings.warn( - "No hydrogen found in the input, although 'explicit_hydrogen' is 'True'" + "No hydrogen found in the input, although 'explicit_hydrogen' is 'True'. " + "This may lead to unexpected results, as hydrogen atoms are not " + "implicitly added, which often results in radicals after sanitization " + "in RDKit. Make sure this is what you want.", + UserWarning, ) else: + if _has_hydrogen: + warnings.warn( + "Hydrogen atoms are present in the input, although 'explicit_hydrogen' " + "is 'False'. All hydrogen atoms will be removed and then re-inferred " + "by RDKit. Make sure this is what you want.", + UserWarning, + ) atoms = atoms[..., ~hydrogen_mask] mol = Chem.EditableMol(Chem.Mol()) From f0cc55081391752adfb8af6d7f768a9ff82fb5ae Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Sat, 15 Feb 2025 22:23:33 +0100 Subject: [PATCH 6/9] Update src/biotite/interface/rdkit/mol.py Co-authored-by: Simon Mathis --- src/biotite/interface/rdkit/mol.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index 21ccf9561..40a2d5b26 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -171,6 +171,7 @@ def to_mol( for i in range(atoms.array_length()): rdkit_atom = Chem.Atom(atoms.element[i].capitalize()) if explicit_hydrogen: + # ... tell RDKit to not assume any implicit hydrogens rdkit_atom.SetNoImplicit(True) if "charge" in has_annot: rdkit_atom.SetFormalCharge(atoms.charge[i].item()) From 6ade4ce2d5a313a311fa59d2ceb7d47d9a807a66 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Sat, 15 Feb 2025 22:44:20 +0100 Subject: [PATCH 7/9] Infer `explicit_hydrogen` from presence of hydrogen atoms --- src/biotite/interface/rdkit/mol.py | 34 +++++++++++++++++------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index 40a2d5b26..a2e855230 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -68,7 +68,7 @@ def to_mol( kekulize=False, use_dative_bonds=False, include_extra_annotations=(), - explicit_hydrogen=False, + explicit_hydrogen=None, ): """ Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a @@ -78,6 +78,7 @@ def to_mol( ---------- atoms : AtomArray or AtomArrayStack The molecule to be converted. + Must have an associated :class:`BondList`. kekulize : bool, optional If set to true, aromatic bonds are represented by single, double and triple bonds. @@ -101,11 +102,9 @@ def to_mol( explicit_hydrogen : bool, optional If set to true, the conversion process expects that all hydrogen atoms are explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`. - If set to false, the conversion process treats all hydrogen atoms as implicit - and all hydrogen atoms in the :class:`AtomArray` are removed. - By default, explicit_hydrogen=False, i.e. hydrogen atoms are never assumed, to - avoid accidentally introducing radicals into the molecule upon calling - :func:`Chem.SanitizeMol()`. + If set to false, the conversion process treats all hydrogen atoms as implicit. + By default, explicit hydrogen atoms are only assumed of any hydrogen atoms are + present in `atoms`. Returns ------- @@ -114,6 +113,13 @@ def to_mol( If the input `atoms` is an :class:`AtomArrayStack`, all models are included as conformers with conformer IDs starting from ``0``. + Raised + ------ + BadStructureError + If the input `atoms` does not have an associated :class:`BondList`. + Also raises a :class:`BadStructureError`, if `explicit_hydrogen` is set to + ``False`` despite hydrogen atoms being present in `atoms`. + Examples -------- @@ -144,22 +150,20 @@ def to_mol( """ hydrogen_mask = atoms.element == "H" _has_hydrogen = hydrogen_mask.any() - if explicit_hydrogen: + if explicit_hydrogen is None: + explicit_hydrogen = _has_hydrogen + elif explicit_hydrogen: if not _has_hydrogen: warnings.warn( - "No hydrogen found in the input, although 'explicit_hydrogen' is 'True'. " - "This may lead to unexpected results, as hydrogen atoms are not " - "implicitly added, which often results in radicals after sanitization " - "in RDKit. Make sure this is what you want.", + "No hydrogen found, although 'explicit_hydrogen' is 'True'. " + "This may lead to radicals after sanitization in RDKit.", UserWarning, ) else: if _has_hydrogen: - warnings.warn( + raise BadStructureError( "Hydrogen atoms are present in the input, although 'explicit_hydrogen' " - "is 'False'. All hydrogen atoms will be removed and then re-inferred " - "by RDKit. Make sure this is what you want.", - UserWarning, + "is set to 'False'" ) atoms = atoms[..., ~hydrogen_mask] From 8afb1ae833d04f73ba4d68ebe3e103691c54bff8 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Sat, 15 Feb 2025 22:45:32 +0100 Subject: [PATCH 8/9] Add author --- src/biotite/interface/rdkit/mol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index a2e855230..7484842b7 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -3,7 +3,7 @@ # information. __name__ = "biotite.interface.rdkit" -__author__ = "Patrick Kunzmann" +__author__ = "Patrick Kunzmann, Simon Mathis" __all__ = ["to_mol", "from_mol"] import warnings From e4b4083035ec1815cc2cf73cbc303d9fb804022c Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Sat, 15 Feb 2025 23:38:09 +0100 Subject: [PATCH 9/9] Make 2D and 3D conformers selectable --- src/biotite/interface/rdkit/mol.py | 41 +++++++++++++++---------- tests/interface/test_rdkit.py | 49 +++++++++++++++++++++--------- 2 files changed, 59 insertions(+), 31 deletions(-) diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index 7484842b7..9540a4210 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -246,9 +246,12 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): ---------- mol : rdkit.Chem.rdchem.Mol The molecule to be converted. - conformer_id : int, optional - The conformer to be converted. - By default, an :class:`AtomArrayStack` with all conformers is returned. + conformer_id : int or {"2D", "3D"}, optional + The ID of the conformer to be converted. + If set to "2D" or "3D", an :class:`AtomArrayStack` with only the 2D or 3D + conformer is returned, respectively. + By default, an :class:`AtomArrayStack` with all conformers (2D and 3D) is + returned. add_hydrogen : bool, optional If set to true, explicit hydrogen atoms are always added. If set to false, explicit hydrogen atoms are never added. @@ -259,8 +262,10 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): ------- atoms : AtomArray or AtomArrayStack The converted atoms. - An :class:`AtomArrayStack` is only returned, if the `conformer_id` parameter - is not set. + An :class:`AtomArray` is returned if an integer `conformer_id` is given. + Otherwise, an :class:`AtomArrayStack` is returned. + If the input `mol` does not have a conformer, an `AtomArrayStack` with a + single model, where all coordinates are *NaN*, is returned. Notes ----- @@ -309,21 +314,25 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): if rdkit_atoms is None: raise BadStructureError("Could not obtains atoms from Mol") - if conformer_id is None: + if conformer_id in (None, "2D", "3D"): conformers = [conf for conf in mol.GetConformers()] - atoms = AtomArrayStack(len(conformers), len(rdkit_atoms)) - for i, conformer in enumerate(conformers): - if conformer.Is3D(): - atoms.coord[i] = np.array(conformer.GetPositions()) - else: - atoms.coord[i] = np.full((len(rdkit_atoms), 3), np.nan) + if conformer_id == "2D": + conformers = [conf for conf in conformers if not conf.Is3D()] + elif conformer_id == "3D": + conformers = [conf for conf in conformers if conf.Is3D()] + if len(conformers) == 0: + # No conformer in 'Mol' that fulfills the criteria + # -> create a single model with all coordinates set to NaN + atoms = AtomArrayStack(1, len(rdkit_atoms)) + atoms.coord = np.full((1, len(rdkit_atoms), 3), np.nan) + else: + atoms = AtomArrayStack(len(conformers), len(rdkit_atoms)) + for i, conformer in enumerate(conformers): + atoms.coord[i] = np.array(conformer.GetPositions(), dtype=np.float32) else: conformer = mol.GetConformer(conformer_id) atoms = AtomArray(len(rdkit_atoms)) - if conformer.Is3D(): - atoms.coord = np.array(conformer.GetPositions()) - else: - atoms.coord = np.full((len(rdkit_atoms), 3), np.nan) + atoms.coord = np.array(conformer.GetPositions(), dtype=np.float32) extra_annotations = defaultdict( # Use 'object' dtype first, as the maximum string length is unknown diff --git a/tests/interface/test_rdkit.py b/tests/interface/test_rdkit.py index f9fc6245d..15d907842 100644 --- a/tests/interface/test_rdkit.py +++ b/tests/interface/test_rdkit.py @@ -1,13 +1,7 @@ from pathlib import Path import numpy as np import pytest -from rdkit.Chem import MolFromSmiles, MolToSmiles -from rdkit.Chem.rdchem import Atom, EditableMol, Mol -from rdkit.Chem.rdchem import BondType as RDKitBondType -from rdkit.Chem.rdmolops import ( - AddHs, - RemoveStereochemistry, -) +import rdkit.Chem.AllChem as Chem import biotite.interface.rdkit as rdkit_interface import biotite.structure as struc import biotite.structure.info as info @@ -102,19 +96,44 @@ def test_conversion_from_rdkit(smiles): Start from SMILES string to ensure that built-in functionality of RDKit is used to create the initial molecule. """ - ref_mol = MolFromSmiles(smiles) + ref_mol = Chem.MolFromSmiles(smiles) atoms = rdkit_interface.from_mol(ref_mol) test_mol = rdkit_interface.to_mol(atoms) # The intermediate AtomArray has explicit hydrogen atoms so add them explicitly # to the reference as well for fair comparison - ref_mol = AddHs(ref_mol) + ref_mol = Chem.AddHs(ref_mol) # The intermediate AtomArray does not have stereochemistry information, # so this info cannot be preserved in the comparison - RemoveStereochemistry(ref_mol) + Chem.RemoveStereochemistry(ref_mol) # RDKit does not support equality checking -> Use SMILES string as proxy - assert MolToSmiles(test_mol) == MolToSmiles(ref_mol) + assert Chem.MolToSmiles(test_mol) == Chem.MolToSmiles(ref_mol) + + +@pytest.mark.parametrize("selected_conformer_type", ["2D", "3D"]) +@pytest.mark.parametrize("present_conformer_type", ["2D", "3D"]) +def test_conformer_selection(present_conformer_type, selected_conformer_type): + """ + Check if 2D and 3D conformers can be selected and that a model with *NaN* + coordinates is returned if an absent conformer type is selected. + """ + mol = Chem.MolFromSmiles("C1=CC=CC=C1") + conformer = Chem.Conformer(mol.GetNumAtoms()) + conformer.Set3D(present_conformer_type=="3D") + mol.AddConformer(conformer) + + atoms = rdkit_interface.from_mol(mol, conformer_id=selected_conformer_type) + + # In any case there should be a single model + assert atoms.stack_depth() == 1 + if selected_conformer_type != present_conformer_type: + # In case the selected conformer type is not present, expect a model + # with all coordinates set to NaN + assert np.all(np.isnan(atoms.coord)) + else: + # The default conformer set above contains zeros + assert np.all(atoms.coord == 0) def test_kekulization(): @@ -147,11 +166,11 @@ def test_unmappable_bond_type(): """ Test that a warning is raised when a bond type cannot be mapped to Biotite. """ - mol = EditableMol(Mol()) - mol.AddAtom(Atom("F")) - mol.AddAtom(Atom("F")) + mol = Chem.EditableMol(Chem.Mol()) + mol.AddAtom(Chem.Atom("F")) + mol.AddAtom(Chem.Atom("F")) # 'HEXTUPLE' has no corresponding Biotite bond type - mol.AddBond(0, 1, RDKitBondType.HEXTUPLE) + mol.AddBond(0, 1, Chem.BondType.HEXTUPLE) mol = mol.GetMol() with pytest.warns(LossyConversionWarning):