Skip to content

Commit

Permalink
Add functionalities related to aromatic rings
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Feb 11, 2025
1 parent d7df095 commit 13df200
Show file tree
Hide file tree
Showing 4 changed files with 380 additions and 0 deletions.
5 changes: 5 additions & 0 deletions doc/apidoc.json
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,11 @@
"dot_bracket",
"dot_bracket_from_structure",
"base_pairs_from_dot_bracket"
],
"Aromatic rings": [
"find_aromatic_rings",
"find_stacking_interactions",
"PiStacking"
]
},
"biotite.structure.info" : {
Expand Down
1 change: 1 addition & 0 deletions src/biotite/structure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
from .rdf import *
from .repair import *
from .residues import *
from .rings import *
from .sasa import *
from .sequence import *
from .sse import *
Expand Down
204 changes: 204 additions & 0 deletions src/biotite/structure/rings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
# This source code is part of the Biotite package and is distributed
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
# information.

"""
This module provides functions related to aromatic rings.
"""

__name__ = "biotite.structure"
__author__ = "Patrick Kunzmann"
__all__ = ["find_aromatic_rings", "find_stacking_interactions", "PiStacking"]


from enum import IntEnum
import networkx as nx
import numpy as np
from biotite.structure.bonds import BondType
from biotite.structure.error import BadStructureError
from biotite.structure.geometry import displacement
from biotite.structure.util import vector_dot

_MAX_CENTROID_DIST = 6.0
_MAX_NORMAL_CENTROID_ANGLE = np.deg2rad(10)
_MAX_PLANE_ANGLE_DEVIATION = np.deg2rad(15)


class PiStacking(IntEnum):
"""
The type of pi-stacking interaction.
- ``PARALLEL``: parallel pi-stacking (also called *staggered* or *Sandwich*)
- ``PERPENDICULAR``: perpendicular pi-stacking (also called *T-shaped*)
"""

PARALLEL = 0
PERPENDICULAR = 1


def find_aromatic_rings(atoms):
"""
Find (anti-)aromatic rings in a structure.
Parameters
----------
atoms : AtomArray or AtomArrayStack
The atoms to be searched for aromatic rings.
Requires an associated :class:`BondList`.
Returns
-------
rings : list of ndarray
The indices of the atoms that form aromatic rings.
Each ring is represented by a list of indices.
Only rings with minimum size are returned, i.e. two connected rings
(e.g. in tryptophan) are reported as separate rings.
Notes
-----
This function does not distinguish between aromatic and antiaromatic rings.
All cycles containing atoms that are completely connected by aromatic bonds
are considered aromatic rings.
"""
if atoms.bonds is None:
raise BadStructureError("Structure must have an associated BondList")
bond_array = atoms.bonds.as_array()
# To detect aromatic rings, only keep bonds that are aromatic
aromatic_bond_array = bond_array[
np.isin(
bond_array[:, 2],
[
BondType.AROMATIC,
BondType.AROMATIC_SINGLE,
BondType.AROMATIC_DOUBLE,
BondType.AROMATIC_TRIPLE,
],
),
# We can omit the bond type now
:2,
]
aromatic_bond_graph = nx.from_edgelist(aromatic_bond_array.tolist())
# Find the cycles with minimum size -> cycle basis
rings = nx.cycle_basis(aromatic_bond_graph)
return [np.array(ring, dtype=int) for ring in rings]


def find_stacking_interactions(atoms):
"""
Find pi-stacking interactions between aromatic rings.
Parameters
----------
atoms : AtomArray
The atoms to be searched for aromatic rings.
Requires an associated :class:`BondList`.
Returns
-------
interactions : list of tuple(ndarray, ndarray, PiStacking)
The stacking interactions between aromatic rings.
Each element in the list represents one stacking interaction.
The first two elements of each tuple represent atom indices of the stacked
rings.
The third element of each tuple is the type of stacking interaction.
"""
rings = find_aromatic_rings(atoms)
ring_centroids = np.array(
[atoms.coord[atom_indices].mean(axis=0) for atom_indices in rings]
)
ring_normals = np.array(
[_get_ring_normal(atoms.coord[atom_indices] for atom_indices in rings)]
)

# Create an index array that contains the Cartesian product of all rings
indices = np.stack(
[
np.repeat(np.arange(len(rings)), len(rings)),
np.tile(np.arange(len(rings)), len(rings)),
]
)
# Do not include duplicates
indices = indices[indices[:, 0] < indices[:, 1]]

is_interacting = np.full(len(indices), True, dtype=bool)

## Condition one: Ring centroids are close enough to each other
diff = displacement(ring_centroids[indices[:, 0]], ring_centroids[indices[:, 1]])
# Use squared distance to avoid time consuming sqrt computation
sq_distance = vector_dot(diff, diff)
is_interacting = sq_distance < _MAX_CENTROID_DIST**2
indices = indices[is_interacting]
diff = diff[is_interacting]

## Condition two: The ring centroid are not shifted too much
## (in terms of normal-centroid angle)
angles = np.stack(
[_minimum_angle(ring_normals[indices[:, i]], diff) for i in range(2)]
)
is_interacting = np.any(angles < _MAX_NORMAL_CENTROID_ANGLE, axis=0)
indices = indices[is_interacting]

## Condition three: Ring planes are parallel or perpendicular
plane_angles = _minimum_angle(
ring_normals[indices[:, 0]], ring_normals[indices[:, 1]]
)
is_parallel = plane_angles < _MAX_PLANE_ANGLE_DEVIATION
is_perpendicular = np.abs(np.pi / 2 - plane_angles) < _MAX_PLANE_ANGLE_DEVIATION
is_interacting &= is_parallel | is_perpendicular
indices = indices[is_interacting]
is_parallel = is_parallel[is_interacting]

# Only return pairs of rings where all conditions were fulfilled
return [
(
rings[ring_i],
rings[ring_j],
PiStacking.PARALLEL if is_parallel[i] else PiStacking.PERPENDICULAR,
)
for i, (ring_i, ring_j) in enumerate(indices)
]


def _get_ring_normal(ring_coord):
"""
Get the normal vector perpendicular to the ring plane.
Parameters
----------
ring_coord : ndarray
The coordinates of the atoms in the ring.
Returns
-------
normal : ndarray
The normal vector of the ring plane.
"""
# Simply use any three atoms in the ring to calculate the normal vector
# We can also safely assume that there are at least three atoms in the ring,
# as otherwise it would not be a ring
normal = np.cross(ring_coord[1] - ring_coord[0], ring_coord[2] - ring_coord[0])
return normal / np.linalg.norm(normal)


def _minimum_angle(v1, v2):
"""
Get the minimum angle between two vectors, i.e. the possible angle range is
``[0, pi/2]``.
Parameters
----------
v1, v2 : ndarray, shape=(n,3), dtype=float
The vectors to measure the angle between.
Returns
-------
angle : ndarray, shape=(n,), dtype=float
The minimum angle between the two vectors.
Notes
-----
This restriction is added here as the normal vectors of the ring planes
have no 'preferred side'.
"""
# Do not distinguish between the 'sides' of the rings -> take absolute of cosine
return np.arccos(np.abs(vector_dot(v1, v2)))
170 changes: 170 additions & 0 deletions tests/structure/test_rings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
from pathlib import Path
import pytest
import biotite.structure as struc
import biotite.structure.info as info
import biotite.structure.io.pdbx as pdbx
from tests.util import data_dir


@pytest.fixture
def riboswitch_structure():
"""
Get a nucleotide structure with a complex fold, to include a variety of aromatic
ring interactions.
"""
pdbx_file = pdbx.BinaryCIFFile.read(Path(data_dir("structure")) / "4gxy.bcif")
atoms = pdbx.get_structure(pdbx_file, model=1, include_bonds=True)
atoms = atoms[struc.filter_nucleotides(atoms)]
return atoms


@pytest.mark.parametrize(
"res_name, ref_ring_members",
[
# No aromatic rings at all
("ALA", []),
# Rings, but no aromatic ones
("GLC", []),
# One aromatic ring
(
"TYR",
[
("CG", "CD1", "CD2", "CE1", "CE2", "CZ"),
],
),
# Aromatic ring with heteroatoms
(
"HIS",
[
("CG", "CD2", "NE2", "CE1", "ND1"),
],
),
# Two fused aromatic rings
(
"TRP",
[
("CG", "CD1", "CD2", "CE2", "NE1"),
("CD2", "CE2", "CZ2", "CH2", "CZ3", "CE3"),
],
),
# Disconnected aromatic rings
(
"BP5",
[
("N1", "C1", "C2", "C3", "C4", "C5"),
("N2", "C6", "C7", "C8", "C9", "C11"),
],
),
],
# Keep only the residue name as ID
ids=lambda x: x if isinstance(x, str) else "",
)
def test_find_known_aromatic_rings(res_name, ref_ring_members):
"""
Check if aromatic rings are correctly identified by :func:`find_aromatic_rings()` in
known molecules.
"""
molecule = info.residue(res_name)
rings = struc.find_aromatic_rings(molecule)
test_ring_members = set(
[frozenset(molecule.atom_name[atom_indices].tolist()) for atom_indices in rings]
)
ref_ring_members = set([frozenset(ring) for ring in ref_ring_members])

assert test_ring_members == ref_ring_members


@pytest.mark.parametrize(
"stacking_type, included_interactions, excluded_interactions",
[
(
struc.PiStacking.PARALLEL,
[
(13, 14),
(32, 33),
(109, 121),
],
[
(109, 120),
(109, 122),
],
),
(
struc.PiStacking.PERPENDICULAR,
[
(16, 30),
(86, 145),
(99, 100),
],
[
(97, 130),
(16, 29),
],
),
],
)
def test_find_known_stacking_interactions(
riboswitch_structure, stacking_type, included_interactions, excluded_interactions
):
"""
Check if :func:`find_stacking_interactions()` correctly identifies pi-stacking
interactions in a known complex folded nucleic acid structure.
Due to the high number of interactions, check this exemplarily, i.e.
check if interactions between certain residues are reported and others are
definitely absent.
"""
interactions = struc.find_stacking_interactions(riboswitch_structure, stacking_type)
interaction_res_ids = []
for ring_indices_1, ring_indices_2, s_type in interactions:
if s_type == stacking_type:
interaction_res_ids.append(
frozenset(
(
# Taking the first atom index is sufficient,
# as all atoms in the same ring are part of the same residue
riboswitch_structure.res_id[ring_indices_1[0]],
riboswitch_structure.res_id[ring_indices_2[0]],
)
)
)

included_interactions = set(
[frozenset(interaction) for interaction in included_interactions]
)
excluded_interactions = set(
[frozenset(interaction) for interaction in excluded_interactions]
)

assert included_interactions.issubset(interaction_res_ids)
assert excluded_interactions.isdisjoint(interaction_res_ids)


def test_no_duplicate_stacking_interactions(riboswitch_structure):
"""
Check if :func:`find_stacking_interactions()` does not report duplicate
interactions.
"""
interactions = struc.find_stacking_interactions(riboswitch_structure)
original_length = len(interactions)

interactions = set(
[
frozenset((frozenset(ring_indices_1), frozenset(ring_indices_2)))
for ring_indices_1, ring_indices_2, _ in interactions
]
)
deduplicated_length = len(interactions)

assert deduplicated_length == original_length


def test_no_adjacent_stacking_interactions():
"""
Ensure that :func:`find_stacking_interactions()` does not report interactions
between adjacent (fused) aromatic rings.
"""
# Tryptophan contains two fused aromatic rings
molecule = info.residue("TRP")
interactions = struc.find_stacking_interactions(molecule)

assert len(interactions) == 0

0 comments on commit 13df200

Please sign in to comment.