diff --git a/.conda/arm64/meta.yaml b/.conda/arm64/meta.yaml
index 3ccae7e..500f569 100644
--- a/.conda/arm64/meta.yaml
+++ b/.conda/arm64/meta.yaml
@@ -25,12 +25,12 @@ requirements:
- wheel
- setuptools
run:
- - numpy>=1.17
+ - numpy>=1.21,<1.25
- editdistance>=0.6.0
- Biopython==1.79
- click>=8.1.3
- biopandas>=0.4.1
- - boto3==1.24.59
+ - boto3
- p-tqdm
- networkx==2.8.8
- einops
@@ -41,8 +41,10 @@ requirements:
- awscli==1.25.60
- bs4
- rcsbsearch
- - tmtools
- - fair-esm
+ - pyyaml
+ - rdkit
+ - pypdb
+ - joblib
about:
home: https://github.com/adaptyvbio/ProteinFlow
diff --git a/.conda/default/meta.yaml b/.conda/default/meta.yaml
index ec132c5..cebb308 100644
--- a/.conda/default/meta.yaml
+++ b/.conda/default/meta.yaml
@@ -25,26 +25,28 @@ requirements:
- wheel
- setuptools
run:
- - numpy>=1.17
+ - numpy>=1.21,<1.25
- editdistance>=0.6.0
- Biopython==1.79
- click>=8.1.3
- biopandas>=0.4.1
- - boto3==1.24.59
+ - boto3=
- p-tqdm
- networkx==2.8.8
- einops
- pandas
- pytorch>=1.10.0
- biotite==0.35.0
- - aiobotocore==2.4.2
- - awscli==1.25.60
+ - aiobotocore
+ - awscli
- bs4
- rcsbsearch
- - tmtools
- - fair-esm
- mmseqs2
- foldseek
+ - pyyaml
+ - rdkit
+ - pypdb
+ - joblib
about:
home: https://github.com/adaptyvbio/ProteinFlow
diff --git a/channeldata.json b/channeldata.json
new file mode 100644
index 0000000..d4798e0
--- /dev/null
+++ b/channeldata.json
@@ -0,0 +1,39 @@
+{
+ "channeldata_version": 1,
+ "packages": {
+ "proteinflow": {
+ "activate.d": false,
+ "binary_prefix": false,
+ "deactivate.d": false,
+ "description": null,
+ "dev_url": "https://github.com/adaptyvbio/ProteinFlow",
+ "doc_source_url": null,
+ "doc_url": "https://adaptyvbio.github.io/ProteinFlow/",
+ "home": "https://github.com/adaptyvbio/ProteinFlow",
+ "icon_hash": null,
+ "icon_url": null,
+ "identifiers": null,
+ "keywords": null,
+ "license": "BSD-3-Clause",
+ "post_link": false,
+ "pre_link": false,
+ "pre_unlink": false,
+ "recipe_origin": null,
+ "run_exports": {},
+ "source_git_url": "https://github.com/adaptyvbio/ProteinFlow.git",
+ "source_url": null,
+ "subdirs": [
+ "linux-64"
+ ],
+ "summary": "Versatile pipeline for processing protein structure data for deep learning applications.",
+ "tags": null,
+ "text_prefix": true,
+ "timestamp": 1700064405,
+ "version": "2.4.1"
+ }
+ },
+ "subdirs": [
+ "linux-64",
+ "noarch"
+ ]
+}
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..3e4df35
--- /dev/null
+++ b/index.html
@@ -0,0 +1,90 @@
+
+
+ proteinflow
+
+
+
+ proteinflow
+
+linux-64 noarch
+
+ Package |
+ Latest Version |
+ Doc |
+ Dev |
+ License |
+linux-64 | noarch | Summary |
+
+
+ proteinflow |
+ 2.4.1 |
+ doc |
+ dev |
+ BSD-3-Clause |
+X | | Versatile pipeline for processing protein structure data for deep... |
+
+ Updated: 2023-11-15 16:08:06 +0000 - Files: 1
+
+
\ No newline at end of file
diff --git a/install_optional.sh b/install_optional.sh
index 5b58e49..28b0e65 100644
--- a/install_optional.sh
+++ b/install_optional.sh
@@ -8,7 +8,5 @@ python -m pip install "fair-esm[esmfold]"
python -m pip install 'dllogger @ git+https://github.com/NVIDIA/dllogger.git'
python -m pip install 'openfold @ git+https://github.com/aqlaboratory/openfold.git@4b41059694619831a7db195b7e0988fc4ff3a307'
-python -m pip install ablang igfold immunebuilder
-
python -m pip install -e .
-python -m pip install ipykernel
\ No newline at end of file
+# python -m pip install ipykernel
\ No newline at end of file
diff --git a/proteinflow/__init__.py b/proteinflow/__init__.py
index 9344021..abf911b 100644
--- a/proteinflow/__init__.py
+++ b/proteinflow/__init__.py
@@ -171,6 +171,7 @@
"split": False,
"cli": False,
"ligand": False,
+ "extra": False,
}
__docformat__ = "numpy"
diff --git a/proteinflow/data/__init__.py b/proteinflow/data/__init__.py
index 5a7e837..680fa0a 100644
--- a/proteinflow/data/__init__.py
+++ b/proteinflow/data/__init__.py
@@ -18,18 +18,31 @@
from collections import defaultdict
import Bio.PDB
-import MDAnalysis as mda
import numpy as np
import pandas as pd
-import py3Dmol
from Bio import pairwise2
from biopandas.pdb import PandasPdb
-from methodtools import lru_cache
from torch import Tensor, from_numpy
+try:
+ import MDAnalysis as mda
+except ImportError:
+ pass
+try:
+ from methodtools import lru_cache
+except ImportError:
+
+ def lru_cache():
+ """Make a dummy decorator."""
+
+ def wrapper(func):
+ return func
+
+ return wrapper
+
+
from proteinflow.constants import (
_PMAP,
- ACCENT_COLOR,
ALPHABET,
ALPHABET_REVERSE,
ATOM_MASKS,
@@ -52,6 +65,7 @@
_retrieve_chain_names,
)
from proteinflow.download import download_fasta, download_pdb
+from proteinflow.extra import _get_view, requires_extra
from proteinflow.ligand import _get_ligands
from proteinflow.metrics import (
ablang_pll,
@@ -1979,6 +1993,7 @@ def align_structure(self, reference_pdb_path, save_pdb_path, chain_ids=None):
io.save(save_pdb_path)
@staticmethod
+ @requires_extra("MDAnalysis")
def combine_multiple_frames(files, output_path="combined.pdb"):
"""Combine multiple PDB files into a single multiframe PDB file.
@@ -2570,7 +2585,7 @@ def visualize(
accent_color=accent_color,
)
vis_string = "".join([str(x) for x in outstr])
- view = py3Dmol.view(width=canvas_size[0], height=canvas_size[1])
+ view = _get_view(canvas_size)
view.addModelsAsFrames(vis_string)
for i, at in enumerate(outstr):
view.setStyle(
diff --git a/proteinflow/extra.py b/proteinflow/extra.py
new file mode 100644
index 0000000..c894c8b
--- /dev/null
+++ b/proteinflow/extra.py
@@ -0,0 +1,41 @@
+"""Handling optional dependencies."""
+
+try:
+ import py3Dmol
+except ImportError:
+ pass
+
+import sys
+
+
+def requires_extra(module_name, install_name=None):
+ """Generate a decorator to require an optional dependency for the given function.
+
+ Parameters
+ ----------
+ module_name : str
+ Name of the module to check for
+ install_name : str, optional
+ Name of the module to install if it is not found. If not specified, `module_name` is used
+
+ """
+ if install_name is None:
+ install_name = module_name
+
+ def decorator(func):
+ def wrapper(*args, **kwargs):
+ if module_name not in sys.modules:
+ raise ImportError(
+ f"{install_name} must be installed to use this function. "
+ f"Install it with `pip install {install_name}` or together with most other optional dependencies with `pip install proteinflow[processing]`."
+ )
+ return func(*args, **kwargs)
+
+ return wrapper
+
+ return decorator
+
+
+@requires_extra("py3Dmol")
+def _get_view(canvas_size):
+ return py3Dmol.view(width=canvas_size[0], height=canvas_size[1])
diff --git a/proteinflow/metrics/__init__.py b/proteinflow/metrics/__init__.py
index fc02841..4b83e09 100644
--- a/proteinflow/metrics/__init__.py
+++ b/proteinflow/metrics/__init__.py
@@ -2,16 +2,26 @@
import os
-import Bio.PDB
import biotite.structure.io as bsio
-import blosum as bl
-import esm
import numpy as np
import torch
-from tmtools import tm_align
from torch.nn import functional as F
from tqdm import tqdm
+from proteinflow.extra import requires_extra
+
+try:
+ import blosum as bl
+except ImportError:
+ pass
+try:
+ import esm
+except ImportError:
+ pass
+try:
+ from tmtools import tm_align
+except ImportError:
+ pass
try:
import ablang
except ImportError:
@@ -26,6 +36,7 @@
pass
+@requires_extra("blosum")
def blosum62_score(seq_before, seq_after):
"""Calculate the BLOSUM62 score between two sequences.
@@ -78,6 +89,7 @@ def long_repeat_num(seq, thr=5):
return count
+@requires_extra("esm", install_name="fair-esm")
def _get_esm_model(esm_model_name):
"""Get ESM model, batch converter and tok_to_idx dictionary."""
model_dict = {
@@ -96,6 +108,7 @@ def _get_esm_model(esm_model_name):
return esm_model, batch_converter, tok_to_idx
+@requires_extra("ablang")
def ablang_pll(
sequence,
predict_mask,
@@ -149,6 +162,7 @@ def ablang_pll(
return pll
+@requires_extra("esm", install_name="fair-esm")
def esm_pll(
chain_sequences,
predict_masks,
@@ -229,6 +243,7 @@ def ca_rmsd(coordinates1, coordinates2):
return np.sqrt(((coordinates1 - coordinates2) ** 2).sum(axis=-1).mean())
+@requires_extra("tmtools")
def tm_score(coordinates1, coordinates2, sequence1, sequence2):
"""Calculate TM-score between two structures.
@@ -253,6 +268,7 @@ def tm_score(coordinates1, coordinates2, sequence1, sequence2):
return (res.tm_norm_chain1 + res.tm_norm_chain2) / 2
+@requires_extra("esm", install_name="fair-esm[esmfold]")
def esmfold_generate(sequences, filepaths=None):
"""Generate PDB structures using ESMFold.
@@ -286,6 +302,7 @@ def esmfold_generate(sequences, filepaths=None):
f.write(output)
+@requires_extra("igfold")
def igfold_generate(sequence_dicts, filepaths=None, use_openmm=False):
"""Generate PDB structures using IgFold.
@@ -320,6 +337,7 @@ def igfold_generate(sequence_dicts, filepaths=None, use_openmm=False):
)
+@requires_extra("ImmuneBuilder")
def immunebuilder_generate(sequence_dicts, filepaths=None, protein_type="antibody"):
"""Generate PDB structures using ImmuneBuilder.
diff --git a/proteinflow/visualize.py b/proteinflow/visualize.py
index 07083f1..e05c5f5 100644
--- a/proteinflow/visualize.py
+++ b/proteinflow/visualize.py
@@ -3,9 +3,9 @@
import string
import numpy as np
-import py3Dmol
from proteinflow.data import PDBEntry, ProteinEntry
+from proteinflow.extra import _get_view
def show_animation_from_pdb(
@@ -55,7 +55,7 @@ def show_animation_from_pdb(
models += "".join([str(x) for x in atoms])
models += "ENDMDL\n"
- view = py3Dmol.view(width=canvas_size[0], height=canvas_size[1])
+ view = _get_view(canvas_size)
view.addModelsAsFrames(models)
for i, at in enumerate(atoms):
@@ -116,7 +116,7 @@ def show_animation_from_pickle(
models += "".join([str(x) for x in atoms])
models += "ENDMDL\n"
- view = py3Dmol.view(width=canvas_size[0], height=canvas_size[1])
+ view = _get_view(canvas_size)
view.addModelsAsFrames(models)
for i, at in enumerate(atoms):
diff --git a/pyproject.toml b/pyproject.toml
index ac764df..dbf15c2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,23 +30,29 @@ dependencies = [
"bs4>=0.0.1",
"pyyaml>=6",
"rcsbsearch",
- "blosum>=2.0",
"pre-commit",
"rdkit",
"pypdb",
- "prody",
"joblib",
- "methodtools",
- "py3Dmol",
- "tmtools",
- "fair-esm",
- "MDAnalysis",
]
keywords = ["bioinformatics", "dataset", "protein", "PDB", "deep learning", "antibody"]
[project.scripts]
proteinflow = "proteinflow.cli:cli"
+[project.optional-dependencies]
+processing = [
+ "py3Dmol",
+ "methodtools",
+ "tmtools",
+ "fair-esm",
+ "MDAnalysis",
+ "ablang",
+ "igfold",
+ "immunebuilder",
+ "blosum>=2.0",
+]
+
[tool.setuptools.packages]
find = {}