diff --git a/.conda/arm64/meta.yaml b/.conda/arm64/meta.yaml index 3ccae7e..500f569 100644 --- a/.conda/arm64/meta.yaml +++ b/.conda/arm64/meta.yaml @@ -25,12 +25,12 @@ requirements: - wheel - setuptools run: - - numpy>=1.17 + - numpy>=1.21,<1.25 - editdistance>=0.6.0 - Biopython==1.79 - click>=8.1.3 - biopandas>=0.4.1 - - boto3==1.24.59 + - boto3 - p-tqdm - networkx==2.8.8 - einops @@ -41,8 +41,10 @@ requirements: - awscli==1.25.60 - bs4 - rcsbsearch - - tmtools - - fair-esm + - pyyaml + - rdkit + - pypdb + - joblib about: home: https://github.com/adaptyvbio/ProteinFlow diff --git a/.conda/default/meta.yaml b/.conda/default/meta.yaml index ec132c5..cebb308 100644 --- a/.conda/default/meta.yaml +++ b/.conda/default/meta.yaml @@ -25,26 +25,28 @@ requirements: - wheel - setuptools run: - - numpy>=1.17 + - numpy>=1.21,<1.25 - editdistance>=0.6.0 - Biopython==1.79 - click>=8.1.3 - biopandas>=0.4.1 - - boto3==1.24.59 + - boto3= - p-tqdm - networkx==2.8.8 - einops - pandas - pytorch>=1.10.0 - biotite==0.35.0 - - aiobotocore==2.4.2 - - awscli==1.25.60 + - aiobotocore + - awscli - bs4 - rcsbsearch - - tmtools - - fair-esm - mmseqs2 - foldseek + - pyyaml + - rdkit + - pypdb + - joblib about: home: https://github.com/adaptyvbio/ProteinFlow diff --git a/channeldata.json b/channeldata.json new file mode 100644 index 0000000..d4798e0 --- /dev/null +++ b/channeldata.json @@ -0,0 +1,39 @@ +{ + "channeldata_version": 1, + "packages": { + "proteinflow": { + "activate.d": false, + "binary_prefix": false, + "deactivate.d": false, + "description": null, + "dev_url": "https://github.com/adaptyvbio/ProteinFlow", + "doc_source_url": null, + "doc_url": "https://adaptyvbio.github.io/ProteinFlow/", + "home": "https://github.com/adaptyvbio/ProteinFlow", + "icon_hash": null, + "icon_url": null, + "identifiers": null, + "keywords": null, + "license": "BSD-3-Clause", + "post_link": false, + "pre_link": false, + "pre_unlink": false, + "recipe_origin": null, + "run_exports": {}, + "source_git_url": "https://github.com/adaptyvbio/ProteinFlow.git", + "source_url": null, + "subdirs": [ + "linux-64" + ], + "summary": "Versatile pipeline for processing protein structure data for deep learning applications.", + "tags": null, + "text_prefix": true, + "timestamp": 1700064405, + "version": "2.4.1" + } + }, + "subdirs": [ + "linux-64", + "noarch" + ] +} diff --git a/index.html b/index.html new file mode 100644 index 0000000..3e4df35 --- /dev/null +++ b/index.html @@ -0,0 +1,90 @@ + + + proteinflow + + + +

proteinflow

+

RSS Feed   channeldata.json

+linux-64   noarch    + + + + + + + + + + + + + + + +
PackageLatest VersionDocDevLicenselinux-64noarch Summary
proteinflow2.4.1docdevBSD-3-ClauseX Versatile pipeline for processing protein structure data for deep...
+
Updated: 2023-11-15 16:08:06 +0000 - Files: 1
+ + \ No newline at end of file diff --git a/install_optional.sh b/install_optional.sh index 5b58e49..28b0e65 100644 --- a/install_optional.sh +++ b/install_optional.sh @@ -8,7 +8,5 @@ python -m pip install "fair-esm[esmfold]" python -m pip install 'dllogger @ git+https://github.com/NVIDIA/dllogger.git' python -m pip install 'openfold @ git+https://github.com/aqlaboratory/openfold.git@4b41059694619831a7db195b7e0988fc4ff3a307' -python -m pip install ablang igfold immunebuilder - python -m pip install -e . -python -m pip install ipykernel \ No newline at end of file +# python -m pip install ipykernel \ No newline at end of file diff --git a/proteinflow/__init__.py b/proteinflow/__init__.py index 9344021..abf911b 100644 --- a/proteinflow/__init__.py +++ b/proteinflow/__init__.py @@ -171,6 +171,7 @@ "split": False, "cli": False, "ligand": False, + "extra": False, } __docformat__ = "numpy" diff --git a/proteinflow/data/__init__.py b/proteinflow/data/__init__.py index 5a7e837..680fa0a 100644 --- a/proteinflow/data/__init__.py +++ b/proteinflow/data/__init__.py @@ -18,18 +18,31 @@ from collections import defaultdict import Bio.PDB -import MDAnalysis as mda import numpy as np import pandas as pd -import py3Dmol from Bio import pairwise2 from biopandas.pdb import PandasPdb -from methodtools import lru_cache from torch import Tensor, from_numpy +try: + import MDAnalysis as mda +except ImportError: + pass +try: + from methodtools import lru_cache +except ImportError: + + def lru_cache(): + """Make a dummy decorator.""" + + def wrapper(func): + return func + + return wrapper + + from proteinflow.constants import ( _PMAP, - ACCENT_COLOR, ALPHABET, ALPHABET_REVERSE, ATOM_MASKS, @@ -52,6 +65,7 @@ _retrieve_chain_names, ) from proteinflow.download import download_fasta, download_pdb +from proteinflow.extra import _get_view, requires_extra from proteinflow.ligand import _get_ligands from proteinflow.metrics import ( ablang_pll, @@ -1979,6 +1993,7 @@ def align_structure(self, reference_pdb_path, save_pdb_path, chain_ids=None): io.save(save_pdb_path) @staticmethod + @requires_extra("MDAnalysis") def combine_multiple_frames(files, output_path="combined.pdb"): """Combine multiple PDB files into a single multiframe PDB file. @@ -2570,7 +2585,7 @@ def visualize( accent_color=accent_color, ) vis_string = "".join([str(x) for x in outstr]) - view = py3Dmol.view(width=canvas_size[0], height=canvas_size[1]) + view = _get_view(canvas_size) view.addModelsAsFrames(vis_string) for i, at in enumerate(outstr): view.setStyle( diff --git a/proteinflow/extra.py b/proteinflow/extra.py new file mode 100644 index 0000000..c894c8b --- /dev/null +++ b/proteinflow/extra.py @@ -0,0 +1,41 @@ +"""Handling optional dependencies.""" + +try: + import py3Dmol +except ImportError: + pass + +import sys + + +def requires_extra(module_name, install_name=None): + """Generate a decorator to require an optional dependency for the given function. + + Parameters + ---------- + module_name : str + Name of the module to check for + install_name : str, optional + Name of the module to install if it is not found. If not specified, `module_name` is used + + """ + if install_name is None: + install_name = module_name + + def decorator(func): + def wrapper(*args, **kwargs): + if module_name not in sys.modules: + raise ImportError( + f"{install_name} must be installed to use this function. " + f"Install it with `pip install {install_name}` or together with most other optional dependencies with `pip install proteinflow[processing]`." + ) + return func(*args, **kwargs) + + return wrapper + + return decorator + + +@requires_extra("py3Dmol") +def _get_view(canvas_size): + return py3Dmol.view(width=canvas_size[0], height=canvas_size[1]) diff --git a/proteinflow/metrics/__init__.py b/proteinflow/metrics/__init__.py index fc02841..4b83e09 100644 --- a/proteinflow/metrics/__init__.py +++ b/proteinflow/metrics/__init__.py @@ -2,16 +2,26 @@ import os -import Bio.PDB import biotite.structure.io as bsio -import blosum as bl -import esm import numpy as np import torch -from tmtools import tm_align from torch.nn import functional as F from tqdm import tqdm +from proteinflow.extra import requires_extra + +try: + import blosum as bl +except ImportError: + pass +try: + import esm +except ImportError: + pass +try: + from tmtools import tm_align +except ImportError: + pass try: import ablang except ImportError: @@ -26,6 +36,7 @@ pass +@requires_extra("blosum") def blosum62_score(seq_before, seq_after): """Calculate the BLOSUM62 score between two sequences. @@ -78,6 +89,7 @@ def long_repeat_num(seq, thr=5): return count +@requires_extra("esm", install_name="fair-esm") def _get_esm_model(esm_model_name): """Get ESM model, batch converter and tok_to_idx dictionary.""" model_dict = { @@ -96,6 +108,7 @@ def _get_esm_model(esm_model_name): return esm_model, batch_converter, tok_to_idx +@requires_extra("ablang") def ablang_pll( sequence, predict_mask, @@ -149,6 +162,7 @@ def ablang_pll( return pll +@requires_extra("esm", install_name="fair-esm") def esm_pll( chain_sequences, predict_masks, @@ -229,6 +243,7 @@ def ca_rmsd(coordinates1, coordinates2): return np.sqrt(((coordinates1 - coordinates2) ** 2).sum(axis=-1).mean()) +@requires_extra("tmtools") def tm_score(coordinates1, coordinates2, sequence1, sequence2): """Calculate TM-score between two structures. @@ -253,6 +268,7 @@ def tm_score(coordinates1, coordinates2, sequence1, sequence2): return (res.tm_norm_chain1 + res.tm_norm_chain2) / 2 +@requires_extra("esm", install_name="fair-esm[esmfold]") def esmfold_generate(sequences, filepaths=None): """Generate PDB structures using ESMFold. @@ -286,6 +302,7 @@ def esmfold_generate(sequences, filepaths=None): f.write(output) +@requires_extra("igfold") def igfold_generate(sequence_dicts, filepaths=None, use_openmm=False): """Generate PDB structures using IgFold. @@ -320,6 +337,7 @@ def igfold_generate(sequence_dicts, filepaths=None, use_openmm=False): ) +@requires_extra("ImmuneBuilder") def immunebuilder_generate(sequence_dicts, filepaths=None, protein_type="antibody"): """Generate PDB structures using ImmuneBuilder. diff --git a/proteinflow/visualize.py b/proteinflow/visualize.py index 07083f1..e05c5f5 100644 --- a/proteinflow/visualize.py +++ b/proteinflow/visualize.py @@ -3,9 +3,9 @@ import string import numpy as np -import py3Dmol from proteinflow.data import PDBEntry, ProteinEntry +from proteinflow.extra import _get_view def show_animation_from_pdb( @@ -55,7 +55,7 @@ def show_animation_from_pdb( models += "".join([str(x) for x in atoms]) models += "ENDMDL\n" - view = py3Dmol.view(width=canvas_size[0], height=canvas_size[1]) + view = _get_view(canvas_size) view.addModelsAsFrames(models) for i, at in enumerate(atoms): @@ -116,7 +116,7 @@ def show_animation_from_pickle( models += "".join([str(x) for x in atoms]) models += "ENDMDL\n" - view = py3Dmol.view(width=canvas_size[0], height=canvas_size[1]) + view = _get_view(canvas_size) view.addModelsAsFrames(models) for i, at in enumerate(atoms): diff --git a/pyproject.toml b/pyproject.toml index ac764df..dbf15c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,23 +30,29 @@ dependencies = [ "bs4>=0.0.1", "pyyaml>=6", "rcsbsearch", - "blosum>=2.0", "pre-commit", "rdkit", "pypdb", - "prody", "joblib", - "methodtools", - "py3Dmol", - "tmtools", - "fair-esm", - "MDAnalysis", ] keywords = ["bioinformatics", "dataset", "protein", "PDB", "deep learning", "antibody"] [project.scripts] proteinflow = "proteinflow.cli:cli" +[project.optional-dependencies] +processing = [ + "py3Dmol", + "methodtools", + "tmtools", + "fair-esm", + "MDAnalysis", + "ablang", + "igfold", + "immunebuilder", + "blosum>=2.0", +] + [tool.setuptools.packages] find = {}