add missing files

haddocking · Apr 30, 2024 · 8165e99 · 8165e99
1 parent 37ff1b8
commit 8165e99
Show file tree

Hide file tree

Showing 9 changed files with 295 additions and 8 deletions.
diff --git a/src/prodigy_cryst/interface_classifier.py b/src/prodigy_cryst/interface_classifier.py
@@ -27,11 +27,11 @@
     logging.error("[!] The interface classifier tool requires Biopython")
     raise ImportError(e)
 
-from prodigy_cryst.lib import aa_properties
-from prodigy_cryst.lib.parsers import parse_structure
+from prodigy_cryst.modules import aa_properties
+from prodigy_cryst.modules.parsers import parse_structure
 
 # from prodigy_cryst.lib.freesasa import execute_freesasa
-from prodigy_cryst.lib.utils import _check_path
+from prodigy_cryst.modules.utils import _check_path
 
 
 def calculate_ic(structure, d_cutoff=5.0, selection=None):
@@ -99,7 +99,7 @@ def analyse_contacts(contact_list):
     }
 
     _data = aa_properties.aa_character_ic
-    for (res_i, res_j) in contact_list:
+    for res_i, res_j in contact_list:
         contact_type = (_data.get(res_i.resname), _data.get(res_j.resname))
         contact_type = "".join(sorted(contact_type))
         bins[contact_type] += 1

diff --git a/src/prodigy_cryst/modules/__init__.py b/src/prodigy_cryst/modules/__init__.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+#
+# This code is part of the interface classifier tool distribution
+# and governed by its license.  Please see the LICENSE file that should
+# have been included as part of this package.
+#
+"""
+Interface classification methods developed by the Bonvin Lab.
+"""
diff --git a/src/prodigy_cryst/modules/aa_properties.py b/src/prodigy_cryst/modules/aa_properties.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+#
+# This code is part of the interface classifier tool distribution
+# and governed by its license.  Please see the LICENSE file that should
+# have been included as part of this package.
+#
+
+"""
+Generic properties of amino acids required for the interface classification methods.
+"""
+
+__author__ = ["Anna Vangone", "Joao Rodrigues"]
+
+aa_character_ic = {
+    'ALA': 'A',
+    'CYS': 'A',  # ?
+    'GLU': 'C',
+    'ASP': 'C',
+    'GLY': 'A',
+    'PHE': 'A',
+    'ILE': 'A',
+    'HIS': 'C',
+    'LYS': 'C',
+    'MET': 'A',
+    'LEU': 'A',
+    'ASN': 'P',
+    'GLN': 'P',
+    'PRO': 'A',
+    'SER': 'P',
+    'ARG': 'C',
+    'THR': 'P',
+    'TRP': 'A',
+    'VAL': 'A',
+    'TYR': 'A',
+}
+
+aa_character_protorp = {
+    'ALA': 'A',
+    'CYS': 'P',
+    'GLU': 'C',
+    'ASP': 'C',
+    'GLY': 'A',
+    'PHE': 'A',
+    'ILE': 'A',
+    'HIS': 'P',
+    'LYS': 'C',
+    'MET': 'A',
+    'LEU': 'A',
+    'ASN': 'P',
+    'GLN': 'P',
+    'PRO': 'A',
+    'SER': 'P',
+    'ARG': 'C',
+    'THR': 'P',
+    'TRP': 'P',
+    'VAL': 'A',
+    'TYR': 'P',
+}
+
+# Scaling factors for relative ASA
+# Calculated using extended ALA-X-ALA peptides
+# Taken from NACCESS
+rel_asa = {
+    'total':
+        {
+            'ALA': 107.95,
+            'CYS': 134.28,
+            'ASP': 140.39,
+            'GLU': 172.25,
+            'PHE': 199.48,
+            'GLY': 80.10,
+            'HIS': 182.88,
+            'ILE': 175.12,
+            'LYS': 200.81,
+            'LEU': 178.63,
+            'MET': 194.15,
+            'ASN': 143.94,
+            'PRO': 136.13,
+            'GLN': 178.50,
+            'ARG': 238.76,
+            'SER': 116.50,
+            'THR': 139.27,
+            'VAL': 151.44,
+            'TRP': 249.36,
+            'TYR': 212.76,
+        },
+    'bb':
+        {
+            'ALA': 38.54,
+            'CYS': 37.53,
+            'ASP': 37.70,
+            'GLU': 37.51,
+            'PHE': 35.37,
+            'GLY': 47.77,
+            'HIS': 35.80,
+            'ILE': 37.16,
+            'LYS': 37.51,
+            'LEU': 37.51,
+            'MET': 37.51,
+            'ASN': 37.70,
+            'PRO': 16.23,
+            'GLN': 37.51,
+            'ARG': 37.51,
+            'SER': 38.40,
+            'THR': 37.57,
+            'VAL': 37.16,
+            'TRP': 38.10,
+            'TYR': 35.38,
+        },
+    'sc':
+        {
+            'ALA': 69.41,
+            'CYS': 96.75,
+            'ASP': 102.69,
+            'GLU': 134.74,
+            'PHE': 164.11,
+            'GLY': 32.33,
+            'HIS': 147.08,
+            'ILE': 137.96,
+            'LYS': 163.30,
+            'LEU': 141.12,
+            'MET': 156.64,
+            'ASN': 106.24,
+            'PRO': 119.90,
+            'GLN': 140.99,
+            'ARG': 201.25,
+            'SER': 78.11,
+            'THR': 101.70,
+            'VAL': 114.28,
+            'TRP': 211.26,
+            'TYR': 177.38,
+        }
+}
diff --git a/src/prodigy_cryst/modules/parsers.py b/src/prodigy_cryst/modules/parsers.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+#
+# This code is part of the interface classifier tool distribution
+# and governed by its license.  Please see the LICENSE file that should
+# have been included as part of this package.
+#
+
+"""
+Functions to read PDB/mmCIF files
+"""
+
+from __future__ import division, print_function
+
+import logging
+import os
+
+try:
+    from Bio.PDB import MMCIFParser, PDBParser
+    from Bio.PDB.Polypeptide import PPBuilder, is_aa
+except ImportError as e:
+    logging.error("[!] The interface classifier tool requires Biopython")
+    raise ImportError(e)
+
+
+def parse_structure(path):
+    """
+    Parses a structure using Biopython's PDB/mmCIF Parser
+    Verifies the integrity of the structure (gaps) and its
+    suitability for the calculation (is it a complex?).
+    """
+    log = logging.getLogger("Prodigy")
+    log.info("[+] Reading structure file: {0}".format(path))
+    fname = os.path.basename(path)
+    sname = ".".join(fname.split(".")[:-1])
+    s_ext = fname.split(".")[-1]
+
+    _ext = set(("pdb", "ent", "cif"))
+    if s_ext not in _ext:
+        raise IOError(
+            "[!] Structure format '{0}' is not supported. Use '.pdb' or '.cif'.".format(
+                s_ext
+            )
+        )
+
+    if s_ext in set(("pdb", "ent")):
+        sparser = PDBParser(QUIET=1)
+    elif s_ext == "cif":
+        sparser = MMCIFParser()
+
+    try:
+        s = sparser.get_structure(sname, path)
+    except Exception as e:
+        # log.error("[!] Structure '{0}' could not be parsed".format(sname))
+        log.error("[!] Structure '{0}' could not be parsed".format(sname))
+        raise Exception(e)
+
+    # Keep first model only
+    if len(s) > 1:
+        log.warning(
+            "[!] Structure contains more than one model. Only the first one will be kept"
+        )
+        model_one = s[0].id
+        for m in s.child_list[:]:
+            if m.id != model_one:
+                s.detach_child(m.id)
+
+    # Double occupancy check
+    for atom in list(s.get_atoms()):
+        if atom.is_disordered():
+            residue = atom.parent
+            sel_at = atom.selected_child
+            sel_at.altloc = " "
+            sel_at.disordered_flag = 0
+            residue.detach_child(atom.id)
+            residue.add(sel_at)
+
+    # Remove HETATMs and solvent
+    res_list = list(s.get_residues())
+
+    def _ignore(r):
+        return r.id[0][0] == "W" or r.id[0][0] == "H"
+
+    for res in res_list:
+        if _ignore(res):
+            chain = res.parent
+            chain.detach_child(res.id)
+        elif not is_aa(res, standard=True):
+            raise ValueError(
+                "Unsupported non-standard amino acid found: {0}".format(res.resname)
+            )
+    n_res = len(list(s.get_residues()))
+
+    # Remove Hydrogens
+    atom_list = list(s.get_atoms())
+
+    def _ignore(x):
+        return x.element == "H"
+
+    for atom in atom_list:
+        if _ignore(atom):
+            residue = atom.parent
+            residue.detach_child(atom.name)
+
+    # Detect gaps and compare with no. of chains
+    pep_builder = PPBuilder()
+    peptides = pep_builder.build_peptides(s)
+    n_peptides = len(peptides)
+    n_chains = len(set([c.id for c in s.get_chains()]))
+
+    if n_peptides != n_chains:
+        log.warning("[!] Structure contains gaps:")
+        for i_pp, pp in enumerate(peptides):
+            log.warning(
+                "\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > {2.parent.id} {2.resname}{2.id[1]}".format(
+                    i_pp, pp[0], pp[-1]
+                )
+            )
+        # raise Exception('Calculation cannot proceed')
+
+    return (s, n_chains, n_res)
diff --git a/src/prodigy_cryst/modules/utils.py b/src/prodigy_cryst/modules/utils.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+#
+# This code is part of the interface classifier tool distribution
+# and governed by its license.  Please see the LICENSE file that should
+# have been included as part of this package.
+#
+
+"""
+Assorted utility functions.
+"""
+
+from __future__ import division, print_function
+
+import os
+
+
+def _check_path(path):
+    """
+    Checks if a file is readable.
+    """
+
+    full_path = os.path.abspath(path)
+    if not os.path.isfile(full_path):
+        raise IOError('Could not read file: {0}'.format(path))
+    return full_path
diff --git a/tests/test_aa_properties.py b/tests/test_aa_properties.py
@@ -1,6 +1,6 @@
 import pytest
 
-from prodigy_cryst.lib.aa_properties import (
+from prodigy_cryst.modules.aa_properties import (
     aa_character_ic,
     aa_character_protorp,
     rel_asa,

diff --git a/tests/test_interface_classifier.py b/tests/test_interface_classifier.py
@@ -9,7 +9,7 @@
     analyse_contacts,
     calculate_ic,
 )
-from prodigy_cryst.lib.parsers import parse_structure
+from prodigy_cryst.modules.parsers import parse_structure
 from tests import DATA_FOLDER
 
 

diff --git a/tests/test_parsers.py b/tests/test_parsers.py
@@ -3,7 +3,7 @@
 import pytest
 from Bio.PDB.Structure import Structure
 
-from prodigy_cryst.lib.parsers import parse_structure
+from prodigy_cryst.modules.parsers import parse_structure
 
 from . import DATA_FOLDER
 

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -1,6 +1,6 @@
 from tempfile import NamedTemporaryFile
 
-from prodigy_cryst.lib.utils import _check_path
+from prodigy_cryst.modules.utils import _check_path
 
 
 def test__check_path():