Skip to content

Commit

Permalink
Merge pull request #112 from adaptyvbio/foldseek
Browse files Browse the repository at this point in the history
Foldseek
  • Loading branch information
elkoz authored Sep 13, 2023
2 parents 4036af3 + b88c84c commit 64eff0f
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 53 deletions.
4 changes: 4 additions & 0 deletions proteinflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,7 @@ def split_data(
random_seed=42,
exclude_chains_without_ligands=False,
tanimoto_clustering=False,
foldseek=False,
):
"""Split `proteinflow` entry files into training, test and validation.
Expand Down Expand Up @@ -496,6 +497,8 @@ def split_data(
if `True`, exclude biounits that don't contain ligands
tanimoto_clustering: bool, default False
cluster chains based on the tanimoto similarity of their ligands
foldseek: bool, default False
if `True`, use FoldSeek to cluster chains based on their structure similarity
Returns
-------
Expand Down Expand Up @@ -545,6 +548,7 @@ def split_data(
out_split_dict_folder=out_split_dict_folder,
min_seq_id=min_seq_id,
tanimoto_clustering=tanimoto_clustering,
foldseek=foldseek,
)
shutil.rmtree(temp_folder)

Expand Down
5 changes: 5 additions & 0 deletions proteinflow/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,11 @@ def generate(**kwargs):
is_flag=True,
help="Whether to use Tanimoto Clustering instead of MMSeqs2. Only works if the dataset contains ligands",
)
@click.option(
"--foldseek",
is_flag=True,
help="Whether to use FoldSeek to cluster the dataset",
)
@click.option(
"--random_seed",
default=42,
Expand Down
Loading

0 comments on commit 64eff0f

Please sign in to comment.