Skip to content

Commit

Permalink
add missing files
Browse files Browse the repository at this point in the history
  • Loading branch information
rvhonorato committed Apr 30, 2024
1 parent 37ff1b8 commit 8165e99
Show file tree
Hide file tree
Showing 9 changed files with 295 additions and 8 deletions.
8 changes: 4 additions & 4 deletions src/prodigy_cryst/interface_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
logging.error("[!] The interface classifier tool requires Biopython")
raise ImportError(e)

from prodigy_cryst.lib import aa_properties
from prodigy_cryst.lib.parsers import parse_structure
from prodigy_cryst.modules import aa_properties
from prodigy_cryst.modules.parsers import parse_structure

# from prodigy_cryst.lib.freesasa import execute_freesasa
from prodigy_cryst.lib.utils import _check_path
from prodigy_cryst.modules.utils import _check_path


def calculate_ic(structure, d_cutoff=5.0, selection=None):
Expand Down Expand Up @@ -99,7 +99,7 @@ def analyse_contacts(contact_list):
}

_data = aa_properties.aa_character_ic
for (res_i, res_j) in contact_list:
for res_i, res_j in contact_list:
contact_type = (_data.get(res_i.resname), _data.get(res_j.resname))
contact_type = "".join(sorted(contact_type))
bins[contact_type] += 1
Expand Down
9 changes: 9 additions & 0 deletions src/prodigy_cryst/modules/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env python
#
# This code is part of the interface classifier tool distribution
# and governed by its license. Please see the LICENSE file that should
# have been included as part of this package.
#
"""
Interface classification methods developed by the Bonvin Lab.
"""
133 changes: 133 additions & 0 deletions src/prodigy_cryst/modules/aa_properties.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#!/usr/bin/env python
#
# This code is part of the interface classifier tool distribution
# and governed by its license. Please see the LICENSE file that should
# have been included as part of this package.
#

"""
Generic properties of amino acids required for the interface classification methods.
"""

__author__ = ["Anna Vangone", "Joao Rodrigues"]

aa_character_ic = {
'ALA': 'A',
'CYS': 'A', # ?
'GLU': 'C',
'ASP': 'C',
'GLY': 'A',
'PHE': 'A',
'ILE': 'A',
'HIS': 'C',
'LYS': 'C',
'MET': 'A',
'LEU': 'A',
'ASN': 'P',
'GLN': 'P',
'PRO': 'A',
'SER': 'P',
'ARG': 'C',
'THR': 'P',
'TRP': 'A',
'VAL': 'A',
'TYR': 'A',
}

aa_character_protorp = {
'ALA': 'A',
'CYS': 'P',
'GLU': 'C',
'ASP': 'C',
'GLY': 'A',
'PHE': 'A',
'ILE': 'A',
'HIS': 'P',
'LYS': 'C',
'MET': 'A',
'LEU': 'A',
'ASN': 'P',
'GLN': 'P',
'PRO': 'A',
'SER': 'P',
'ARG': 'C',
'THR': 'P',
'TRP': 'P',
'VAL': 'A',
'TYR': 'P',
}

# Scaling factors for relative ASA
# Calculated using extended ALA-X-ALA peptides
# Taken from NACCESS
rel_asa = {
'total':
{
'ALA': 107.95,
'CYS': 134.28,
'ASP': 140.39,
'GLU': 172.25,
'PHE': 199.48,
'GLY': 80.10,
'HIS': 182.88,
'ILE': 175.12,
'LYS': 200.81,
'LEU': 178.63,
'MET': 194.15,
'ASN': 143.94,
'PRO': 136.13,
'GLN': 178.50,
'ARG': 238.76,
'SER': 116.50,
'THR': 139.27,
'VAL': 151.44,
'TRP': 249.36,
'TYR': 212.76,
},
'bb':
{
'ALA': 38.54,
'CYS': 37.53,
'ASP': 37.70,
'GLU': 37.51,
'PHE': 35.37,
'GLY': 47.77,
'HIS': 35.80,
'ILE': 37.16,
'LYS': 37.51,
'LEU': 37.51,
'MET': 37.51,
'ASN': 37.70,
'PRO': 16.23,
'GLN': 37.51,
'ARG': 37.51,
'SER': 38.40,
'THR': 37.57,
'VAL': 37.16,
'TRP': 38.10,
'TYR': 35.38,
},
'sc':
{
'ALA': 69.41,
'CYS': 96.75,
'ASP': 102.69,
'GLU': 134.74,
'PHE': 164.11,
'GLY': 32.33,
'HIS': 147.08,
'ILE': 137.96,
'LYS': 163.30,
'LEU': 141.12,
'MET': 156.64,
'ASN': 106.24,
'PRO': 119.90,
'GLN': 140.99,
'ARG': 201.25,
'SER': 78.11,
'THR': 101.70,
'VAL': 114.28,
'TRP': 211.26,
'TYR': 177.38,
}
}
120 changes: 120 additions & 0 deletions src/prodigy_cryst/modules/parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/env python
#
# This code is part of the interface classifier tool distribution
# and governed by its license. Please see the LICENSE file that should
# have been included as part of this package.
#

"""
Functions to read PDB/mmCIF files
"""

from __future__ import division, print_function

import logging
import os

try:
from Bio.PDB import MMCIFParser, PDBParser
from Bio.PDB.Polypeptide import PPBuilder, is_aa
except ImportError as e:
logging.error("[!] The interface classifier tool requires Biopython")
raise ImportError(e)


def parse_structure(path):
"""
Parses a structure using Biopython's PDB/mmCIF Parser
Verifies the integrity of the structure (gaps) and its
suitability for the calculation (is it a complex?).
"""
log = logging.getLogger("Prodigy")
log.info("[+] Reading structure file: {0}".format(path))
fname = os.path.basename(path)
sname = ".".join(fname.split(".")[:-1])
s_ext = fname.split(".")[-1]

_ext = set(("pdb", "ent", "cif"))
if s_ext not in _ext:
raise IOError(
"[!] Structure format '{0}' is not supported. Use '.pdb' or '.cif'.".format(
s_ext
)
)

if s_ext in set(("pdb", "ent")):
sparser = PDBParser(QUIET=1)
elif s_ext == "cif":
sparser = MMCIFParser()

try:
s = sparser.get_structure(sname, path)
except Exception as e:
# log.error("[!] Structure '{0}' could not be parsed".format(sname))
log.error("[!] Structure '{0}' could not be parsed".format(sname))
raise Exception(e)

# Keep first model only
if len(s) > 1:
log.warning(
"[!] Structure contains more than one model. Only the first one will be kept"
)
model_one = s[0].id
for m in s.child_list[:]:
if m.id != model_one:
s.detach_child(m.id)

# Double occupancy check
for atom in list(s.get_atoms()):
if atom.is_disordered():
residue = atom.parent
sel_at = atom.selected_child
sel_at.altloc = " "
sel_at.disordered_flag = 0
residue.detach_child(atom.id)
residue.add(sel_at)

# Remove HETATMs and solvent
res_list = list(s.get_residues())

def _ignore(r):
return r.id[0][0] == "W" or r.id[0][0] == "H"

for res in res_list:
if _ignore(res):
chain = res.parent
chain.detach_child(res.id)
elif not is_aa(res, standard=True):
raise ValueError(
"Unsupported non-standard amino acid found: {0}".format(res.resname)
)
n_res = len(list(s.get_residues()))

# Remove Hydrogens
atom_list = list(s.get_atoms())

def _ignore(x):
return x.element == "H"

for atom in atom_list:
if _ignore(atom):
residue = atom.parent
residue.detach_child(atom.name)

# Detect gaps and compare with no. of chains
pep_builder = PPBuilder()
peptides = pep_builder.build_peptides(s)
n_peptides = len(peptides)
n_chains = len(set([c.id for c in s.get_chains()]))

if n_peptides != n_chains:
log.warning("[!] Structure contains gaps:")
for i_pp, pp in enumerate(peptides):
log.warning(
"\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > {2.parent.id} {2.resname}{2.id[1]}".format(
i_pp, pp[0], pp[-1]
)
)
# raise Exception('Calculation cannot proceed')

return (s, n_chains, n_res)
25 changes: 25 additions & 0 deletions src/prodigy_cryst/modules/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env python
#
# This code is part of the interface classifier tool distribution
# and governed by its license. Please see the LICENSE file that should
# have been included as part of this package.
#

"""
Assorted utility functions.
"""

from __future__ import division, print_function

import os


def _check_path(path):
"""
Checks if a file is readable.
"""

full_path = os.path.abspath(path)
if not os.path.isfile(full_path):
raise IOError('Could not read file: {0}'.format(path))
return full_path
2 changes: 1 addition & 1 deletion tests/test_aa_properties.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from prodigy_cryst.lib.aa_properties import (
from prodigy_cryst.modules.aa_properties import (
aa_character_ic,
aa_character_protorp,
rel_asa,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_interface_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
analyse_contacts,
calculate_ic,
)
from prodigy_cryst.lib.parsers import parse_structure
from prodigy_cryst.modules.parsers import parse_structure
from tests import DATA_FOLDER


Expand Down
2 changes: 1 addition & 1 deletion tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest
from Bio.PDB.Structure import Structure

from prodigy_cryst.lib.parsers import parse_structure
from prodigy_cryst.modules.parsers import parse_structure

from . import DATA_FOLDER

Expand Down
2 changes: 1 addition & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from tempfile import NamedTemporaryFile

from prodigy_cryst.lib.utils import _check_path
from prodigy_cryst.modules.utils import _check_path


def test__check_path():
Expand Down

0 comments on commit 8165e99

Please sign in to comment.