diff --git a/training/requirements.txt b/training/requirements.txt index 015e8dc..669771f 100644 --- a/training/requirements.txt +++ b/training/requirements.txt @@ -1,10 +1,9 @@ pandas pillow -keras +torch scikit-learn matplotlib numpy -pandas pyyaml kaleido faiss-cpu diff --git a/training/tests/test_create_embedding.py b/training/tests/test_create_embedding.py index 66ecead..934eaa6 100644 --- a/training/tests/test_create_embedding.py +++ b/training/tests/test_create_embedding.py @@ -2,12 +2,10 @@ import re import tempfile from pathlib import Path - import pandas as pd import pytest from alphafind_training.create_embedding import create_embedding - @pytest.fixture(scope="function") def output_file(): # Setup: Define the output file path @@ -32,17 +30,15 @@ def test_create_embedding(): with tempfile.TemporaryDirectory() as tmpdir: cif_path = "./data/cifs" output_path = f"{tmpdir}/embedding.pkl" - granularity = 10 - # 45 features for each protein - (10x10 - 10) / 2 - expected_dimensionality = 45 + expected_dimensionality = 121 - create_embedding(Path(cif_path), Path(output_path), granularity) + create_embedding(Path(cif_path), Path(output_path)) assert os.path.exists(output_path) assert os.path.getsize(output_path) > 0 # load embedding.pkl and check if it has the correct shape - df = pd.read_pickle(output_path) + df = pd.read_parquet(output_path) assert df.shape[0] == len(os.listdir(cif_path)) assert df.shape[1] == expected_dimensionality diff --git a/training/tests/test_create_kmeans.py b/training/tests/test_create_kmeans.py index 3a8dd8b..d77ee89 100644 --- a/training/tests/test_create_kmeans.py +++ b/training/tests/test_create_kmeans.py @@ -10,8 +10,7 @@ def _setup(tempdir: str): create_embedding( input_path=Path("./data/cifs"), - output_path=Path(f"{tempdir}/embedding.pkl"), - granularity=10 + output_path=Path(f"{tempdir}/embedding.pkl") ) diff --git a/training/tests/test_model_creation.py b/training/tests/test_model_creation.py index 7544815..c5d00f2 100644 --- a/training/tests/test_model_creation.py +++ b/training/tests/test_model_creation.py @@ -11,8 +11,7 @@ def _setup(tempdir: str): create_embedding( input_path=Path("./data/cifs"), - output_path=Path(f"{tempdir}/embedding.pkl"), - granularity=10 + output_path=Path(f"{tempdir}/embedding.pkl") ) create_kmeans(