From d4f836ae1393c0e621593cfaa037f51cd32a6237 Mon Sep 17 00:00:00 2001 From: Corentin Meyer Date: Thu, 14 Mar 2024 08:59:39 +0100 Subject: [PATCH] feat(FastEmbed): renaming SPLADE to Sparse because it makes more sense --- integrations/fastembed/README.md | 10 ++-- .../embedders/fastembed/__init__.py | 6 +- ... => fastembed_sparse_document_embedder.py} | 10 ++-- ...r.py => fastembed_sparse_text_embedder.py} | 12 ++-- ...est_fastembed_sparse_document_embedder.py} | 58 +++++++++---------- ...=> test_fastembed_sparse_text_embedder.py} | 54 ++++++++--------- 6 files changed, 75 insertions(+), 75 deletions(-) rename integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/{fastembed_document_SPLADE_embedder.py => fastembed_sparse_document_embedder.py} (95%) rename integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/{fastembed_text_SPLADE_embedder.py => fastembed_sparse_text_embedder.py} (93%) rename integrations/fastembed/tests/{test_fastembed_document_SPLADE_embedder.py => test_fastembed_sparse_document_embedder.py} (84%) rename integrations/fastembed/tests/{test_fastembed_text_SPLADE_embedder.py => test_fastembed_sparse_text_embedder.py} (82%) diff --git a/integrations/fastembed/README.md b/integrations/fastembed/README.md index 70c10087f..d8bd3ce7a 100644 --- a/integrations/fastembed/README.md +++ b/integrations/fastembed/README.md @@ -43,13 +43,13 @@ doc = Document(content="fastembed is supported by and maintained by Qdrant.", me result = embedder.run(documents=[doc]) ``` -You can use `FastembedTextSPLADEEmbedder` and `FastembedDocumentSPLADEEmbedder` by importing as: +You can use `FastembedSparseTextEmbedder` and `FastembedSparseDocumentEmbedder` by importing as: ```python -from haystack_integrations.components.embedders.fastembed import FastembedTextSPLADEEmbedder +from haystack_integrations.components.embedders.fastembed import FastembedSparseTextEmbedder text = "fastembed is supported by and maintained by Qdrant." -text_embedder = FastembedTextSPLADEEmbedder( +text_embedder = FastembedSparseTextEmbedder( model="prithvida/SPLADE_PP_en_v1" ) text_embedder.warm_up() @@ -57,10 +57,10 @@ embedding = text_embedder.run(text)["embedding"] ``` ```python -from haystack_integrations.components.embedders.fastembed import FastembedDocumentSPLADEEmbedder +from haystack_integrations.components.embedders.fastembed import FastembedSparseDocumentEmbedder from haystack.dataclasses import Document -embedder = FastembedDocumentSPLADEEmbedder( +embedder = FastembedSparseDocumentEmbedder( model="prithvida/SPLADE_PP_en_v1", ) embedder.warm_up() diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/__init__.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/__init__.py index fa1ae6043..57c88e3c2 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/__init__.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/__init__.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from .fastembed_document_embedder import FastembedDocumentEmbedder from .fastembed_text_embedder import FastembedTextEmbedder -from .fastembed_document_SPLADE_embedder import FastembedDocumentSPLADEEmbedder -from .fastembed_text_SPLADE_embedder import FastembedTextSPLADEEmbedder +from .fastembed_sparse_document_embedder import FastembedSparseDocumentEmbedder +from .fastembed_sparse_text_embedder import FastembedSparseTextEmbedder -__all__ = ["FastembedDocumentEmbedder", "FastembedTextEmbedder", "FastembedDocumentSPLADEEmbedder", "FastembedTextSPLADEEmbedder"] +__all__ = ["FastembedDocumentEmbedder", "FastembedTextEmbedder", "FastembedSparseDocumentEmbedder", "FastembedSparseTextEmbedder"] diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_SPLADE_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py similarity index 95% rename from integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_SPLADE_embedder.py rename to integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py index 6ec71ff56..be06aa1a9 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_document_SPLADE_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py @@ -6,9 +6,9 @@ @component -class FastembedDocumentSPLADEEmbedder: +class FastembedSparseDocumentEmbedder: """ - FastembedDocumentSPLADEEmbedder computes Document embeddings using Fastembed SPLADE models. + FastembedSparseDocumentEmbedder computes Document embeddings using Fastembed sparse models. The embedding of each Document is stored in the `meta["_sparse_vector"]` field of the Document. @@ -17,10 +17,10 @@ class FastembedDocumentSPLADEEmbedder: # To use this component, install the "fastembed-haystack" package. # pip install fastembed-haystack - from haystack_integrations.components.embedders.fastembed import FastembedDocumentSPLADEEmbedder + from haystack_integrations.components.embedders.fastembed import FastembedSparseDocumentEmbedder from haystack.dataclasses import Document - doc_embedder = FastembedDocumentSPLADEEmbedder( + doc_embedder = FastembedSparseDocumentEmbedder( model="prithvida/SPLADE_PP_en_v1", batch_size=256, ) @@ -150,7 +150,7 @@ def run(self, documents: List[Document]): """ if not isinstance(documents, list) or documents and not isinstance(documents[0], Document): msg = ( - "FastembedDocumentSPLADEEmbedder expects a list of Documents as input. " + "FastembedSparseDocumentEmbedder expects a list of Documents as input. " "In case you want to embed a list of strings, please use the FastembedTextEmbedder." ) raise TypeError(msg) diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_SPLADE_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py similarity index 93% rename from integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_SPLADE_embedder.py rename to integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py index 6703efa19..094f29624 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_text_SPLADE_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py @@ -6,20 +6,20 @@ @component -class FastembedTextSPLADEEmbedder: +class FastembedSparseTextEmbedder: """ - FastembedTextSPLADEEmbedder computes string embedding using fastembed SPLADE models. + FastembedSparseTextEmbedder computes string embedding using fastembed sparse models. Usage example: ```python # To use this component, install the "fastembed-haystack" package. # pip install fastembed-haystack - from haystack_integrations.components.embedders.fastembed import FastembedTextSPLADEEmbedder + from haystack_integrations.components.embedders.fastembed import FastembedSparseTextEmbedder text = "It clearly says online this will work on a Mac OS system. The disk comes and it does not, only Windows. Do Not order this if you have a Mac!!" - text_embedder = FastembedTextSPLADEEmbedder( + text_embedder = FastembedSparseTextEmbedder( model="prithvida/SPLADE_PP_en_v1" ) text_embedder.warm_up() @@ -40,7 +40,7 @@ def __init__( parallel: Optional[int] = None, ): """ - Create a FastembedTextSPLADEEmbedder component. + Create a FastembedSparseTextEmbedder component. :param model: Local path or name of the model in Fastembed's model hub, such as `prithvida/SPLADE_PP_en_v1` :param cache_dir: The path to the cache directory. @@ -107,7 +107,7 @@ def run(self, text: str): """ if not isinstance(text, str): msg = ( - "FastembedTextSPLADEEmbedder expects a string as input. " + "FastembedSparseTextEmbedder expects a string as input. " "In case you want to embed a list of Documents, please use the FastembedDocumentEmbedder." ) raise TypeError(msg) diff --git a/integrations/fastembed/tests/test_fastembed_document_SPLADE_embedder.py b/integrations/fastembed/tests/test_fastembed_sparse_document_embedder.py similarity index 84% rename from integrations/fastembed/tests/test_fastembed_document_SPLADE_embedder.py rename to integrations/fastembed/tests/test_fastembed_sparse_document_embedder.py index 384b3d9a5..35cd521ba 100644 --- a/integrations/fastembed/tests/test_fastembed_document_SPLADE_embedder.py +++ b/integrations/fastembed/tests/test_fastembed_sparse_document_embedder.py @@ -3,17 +3,17 @@ import numpy as np import pytest from haystack import Document, default_from_dict -from haystack_integrations.components.embedders.fastembed.fastembed_document_SPLADE_embedder import ( - FastembedDocumentSPLADEEmbedder, +from haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder import ( + FastembedSparseDocumentEmbedder, ) -class TestFastembedDocumentSPLADEEmbedderDoc: +class TestFastembedSparseDocumentEmbedderDoc: def test_init_default(self): """ - Test default initialization parameters for FastembedDocumentSPLADEEmbedder. + Test default initialization parameters for FastembedSparseDocumentEmbedder. """ - embedder = FastembedDocumentSPLADEEmbedder(model="prithvida/SPLADE_PP_en_v1") + embedder = FastembedSparseDocumentEmbedder(model="prithvida/SPLADE_PP_en_v1") assert embedder.model_name == "prithvida/SPLADE_PP_en_v1" assert embedder.cache_dir is None assert embedder.threads is None @@ -27,9 +27,9 @@ def test_init_default(self): def test_init_with_parameters(self): """ - Test custom initialization parameters for FastembedDocumentSPLADEEmbedder. + Test custom initialization parameters for FastembedSparseDocumentEmbedder. """ - embedder = FastembedDocumentSPLADEEmbedder( + embedder = FastembedSparseDocumentEmbedder( model="prithvida/SPLADE_PP_en_v1", cache_dir="fake_dir", threads=2, @@ -54,12 +54,12 @@ def test_init_with_parameters(self): def test_to_dict(self): """ - Test serialization of FastembedDocumentSPLADEEmbedder to a dictionary, using default initialization parameters. + Test serialization of FastembedSparseDocumentEmbedder to a dictionary, using default initialization parameters. """ - embedder = FastembedDocumentSPLADEEmbedder(model="prithvida/SPLADE_PP_en_v1") + embedder = FastembedSparseDocumentEmbedder(model="prithvida/SPLADE_PP_en_v1") embedder_dict = embedder.to_dict() assert embedder_dict == { - "type": "haystack_integrations.components.embedders.fastembed.fastembed_document_SPLADE_embedder.FastembedDocumentSPLADEEmbedder", # noqa + "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder.FastembedSparseDocumentEmbedder", # noqa "init_parameters": { "model": "prithvida/SPLADE_PP_en_v1", "cache_dir": None, @@ -76,9 +76,9 @@ def test_to_dict(self): def test_to_dict_with_custom_init_parameters(self): """ - Test serialization of FastembedDocumentSPLADEEmbedder to a dictionary, using custom initialization parameters. + Test serialization of FastembedSparseDocumentEmbedder to a dictionary, using custom initialization parameters. """ - embedder = FastembedDocumentSPLADEEmbedder( + embedder = FastembedSparseDocumentEmbedder( model="prithvida/SPLADE_PP_en_v1", cache_dir="fake_dir", threads=2, @@ -92,7 +92,7 @@ def test_to_dict_with_custom_init_parameters(self): ) embedder_dict = embedder.to_dict() assert embedder_dict == { - "type": "haystack_integrations.components.embedders.fastembed.fastembed_document_SPLADE_embedder.FastembedDocumentSPLADEEmbedder", # noqa + "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder.FastembedSparseDocumentEmbedder", # noqa "init_parameters": { "model": "prithvida/SPLADE_PP_en_v1", "cache_dir": "fake_dir", @@ -109,10 +109,10 @@ def test_to_dict_with_custom_init_parameters(self): def test_from_dict(self): """ - Test deserialization of FastembedDocumentSPLADEEmbedder from a dictionary, using default initialization parameters. + Test deserialization of FastembedSparseDocumentEmbedder from a dictionary, using default initialization parameters. """ embedder_dict = { - "type": "haystack_integrations.components.embedders.fastembed.fastembed_document_SPLADE_embedder.FastembedDocumentSPLADEEmbedder", # noqa + "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder.FastembedSparseDocumentEmbedder", # noqa "init_parameters": { "model": "prithvida/SPLADE_PP_en_v1", "cache_dir": None, @@ -126,7 +126,7 @@ def test_from_dict(self): "embedding_separator": "\n", }, } - embedder = default_from_dict(FastembedDocumentSPLADEEmbedder, embedder_dict) + embedder = default_from_dict(FastembedSparseDocumentEmbedder, embedder_dict) assert embedder.model_name == "prithvida/SPLADE_PP_en_v1" assert embedder.cache_dir is None assert embedder.threads is None @@ -140,10 +140,10 @@ def test_from_dict(self): def test_from_dict_with_custom_init_parameters(self): """ - Test deserialization of FastembedDocumentSPLADEEmbedder from a dictionary, using custom initialization parameters. + Test deserialization of FastembedSparseDocumentEmbedder from a dictionary, using custom initialization parameters. """ embedder_dict = { - "type": "haystack_integrations.components.embedders.fastembed.fastembed_document_SPLADE_embedder.FastembedDocumentSPLADEEmbedder", # noqa + "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder.FastembedSparseDocumentEmbedder", # noqa "init_parameters": { "model": "prithvida/SPLADE_PP_en_v1", "cache_dir": "fake_dir", @@ -157,7 +157,7 @@ def test_from_dict_with_custom_init_parameters(self): "embedding_separator": " | ", }, } - embedder = default_from_dict(FastembedDocumentSPLADEEmbedder, embedder_dict) + embedder = default_from_dict(FastembedSparseDocumentEmbedder, embedder_dict) assert embedder.model_name == "prithvida/SPLADE_PP_en_v1" assert embedder.cache_dir == "fake_dir" assert embedder.threads == 2 @@ -170,13 +170,13 @@ def test_from_dict_with_custom_init_parameters(self): assert embedder.embedding_separator == " | " @patch( - "haystack_integrations.components.embedders.fastembed.fastembed_document_SPLADE_embedder._FastembedSparseEmbeddingBackendFactory" + "haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder._FastembedSparseEmbeddingBackendFactory" ) def test_warmup(self, mocked_factory): """ Test for checking embedder instances after warm-up. """ - embedder = FastembedDocumentSPLADEEmbedder(model="prithvida/SPLADE_PP_en_v1") + embedder = FastembedSparseDocumentEmbedder(model="prithvida/SPLADE_PP_en_v1") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() mocked_factory.get_embedding_backend.assert_called_once_with( @@ -184,13 +184,13 @@ def test_warmup(self, mocked_factory): ) @patch( - "haystack_integrations.components.embedders.fastembed.fastembed_document_SPLADE_embedder._FastembedSparseEmbeddingBackendFactory" + "haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder._FastembedSparseEmbeddingBackendFactory" ) def test_warmup_does_not_reload(self, mocked_factory): """ Test for checking backend instances after multiple warm-ups. """ - embedder = FastembedDocumentSPLADEEmbedder(model="prithvida/SPLADE_PP_en_v1") + embedder = FastembedSparseDocumentEmbedder(model="prithvida/SPLADE_PP_en_v1") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() embedder.warm_up() @@ -211,7 +211,7 @@ def test_embed(self): """ Test for checking output dimensions and embedding dimensions. """ - embedder = FastembedDocumentSPLADEEmbedder(model="prithvida/SPLADE_PP_en_v1") + embedder = FastembedSparseDocumentEmbedder(model="prithvida/SPLADE_PP_en_v1") embedder.embedding_backend = MagicMock() embedder.embedding_backend.embed = lambda x, **kwargs: self._generate_mocked_sparse_embedding(len(x)) # noqa: ARG005 @@ -233,20 +233,20 @@ def test_embed_incorrect_input_format(self): """ Test for checking incorrect input format when creating embedding. """ - embedder = FastembedDocumentSPLADEEmbedder(model="prithvida/SPLADE_PP_en_v1") + embedder = FastembedSparseDocumentEmbedder(model="prithvida/SPLADE_PP_en_v1") string_input = "text" list_integers_input = [1, 2, 3] with pytest.raises( TypeError, - match="FastembedDocumentSPLADEEmbedder expects a list of Documents as input.", + match="FastembedSparseDocumentEmbedder expects a list of Documents as input.", ): embedder.run(documents=string_input) with pytest.raises( TypeError, - match="FastembedDocumentSPLADEEmbedder expects a list of Documents as input.", + match="FastembedSparseDocumentEmbedder expects a list of Documents as input.", ): embedder.run(documents=list_integers_input) @@ -255,7 +255,7 @@ def test_embed_metadata(self): Test for checking output dimensions and embedding dimensions for documents with a custom instruction and metadata. """ - embedder = FastembedDocumentSPLADEEmbedder( + embedder = FastembedSparseDocumentEmbedder( model="model", meta_fields_to_embed=["meta_field"], embedding_separator="\n", @@ -281,7 +281,7 @@ def test_embed_metadata(self): @pytest.mark.integration def test_run(self): - embedder = FastembedDocumentSPLADEEmbedder( + embedder = FastembedSparseDocumentEmbedder( model="prithvida/SPLADE_PP_en_v1", ) embedder.warm_up() diff --git a/integrations/fastembed/tests/test_fastembed_text_SPLADE_embedder.py b/integrations/fastembed/tests/test_fastembed_sparse_text_embedder.py similarity index 82% rename from integrations/fastembed/tests/test_fastembed_text_SPLADE_embedder.py rename to integrations/fastembed/tests/test_fastembed_sparse_text_embedder.py index daa211561..fd9ce7b73 100644 --- a/integrations/fastembed/tests/test_fastembed_text_SPLADE_embedder.py +++ b/integrations/fastembed/tests/test_fastembed_sparse_text_embedder.py @@ -3,17 +3,17 @@ import numpy as np import pytest from haystack import default_from_dict -from haystack_integrations.components.embedders.fastembed.fastembed_text_SPLADE_embedder import ( - FastembedTextSPLADEEmbedder, +from haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder import ( + FastembedSparseTextEmbedder, ) -class TestFastembedTextSPLADEEmbedder: +class TestFastembedSparseTextEmbedder: def test_init_default(self): """ - Test default initialization parameters for FastembedTextSPLADEEmbedder. + Test default initialization parameters for FastembedSparseTextEmbedder. """ - embedder = FastembedTextSPLADEEmbedder(model="prithvida/SPLADE_PP_en_v1") + embedder = FastembedSparseTextEmbedder(model="prithvida/SPLADE_PP_en_v1") assert embedder.model_name == "prithvida/SPLADE_PP_en_v1" assert embedder.cache_dir is None assert embedder.threads is None @@ -25,9 +25,9 @@ def test_init_default(self): def test_init_with_parameters(self): """ - Test custom initialization parameters for FastembedTextSPLADEEmbedder. + Test custom initialization parameters for FastembedSparseTextEmbedder. """ - embedder = FastembedTextSPLADEEmbedder( + embedder = FastembedSparseTextEmbedder( model="prithvida/SPLADE_PP_en_v1", cache_dir="fake_dir", threads=2, @@ -48,12 +48,12 @@ def test_init_with_parameters(self): def test_to_dict(self): """ - Test serialization of FastembedTextSPLADEEmbedder to a dictionary, using default initialization parameters. + Test serialization of FastembedSparseTextEmbedder to a dictionary, using default initialization parameters. """ - embedder = FastembedTextSPLADEEmbedder(model="prithvida/SPLADE_PP_en_v1") + embedder = FastembedSparseTextEmbedder(model="prithvida/SPLADE_PP_en_v1") embedder_dict = embedder.to_dict() assert embedder_dict == { - "type": "haystack_integrations.components.embedders.fastembed.fastembed_text_SPLADE_embedder.FastembedTextSPLADEEmbedder", # noqa + "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder.FastembedSparseTextEmbedder", # noqa "init_parameters": { "model": "prithvida/SPLADE_PP_en_v1", "cache_dir": None, @@ -68,9 +68,9 @@ def test_to_dict(self): def test_to_dict_with_custom_init_parameters(self): """ - Test serialization of FastembedTextSPLADEEmbedder to a dictionary, using custom initialization parameters. + Test serialization of FastembedSparseTextEmbedder to a dictionary, using custom initialization parameters. """ - embedder = FastembedTextSPLADEEmbedder( + embedder = FastembedSparseTextEmbedder( model="prithvida/SPLADE_PP_en_v1", cache_dir="fake_dir", threads=2, @@ -82,7 +82,7 @@ def test_to_dict_with_custom_init_parameters(self): ) embedder_dict = embedder.to_dict() assert embedder_dict == { - "type": "haystack_integrations.components.embedders.fastembed.fastembed_text_SPLADE_embedder.FastembedTextSPLADEEmbedder", # noqa + "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder.FastembedSparseTextEmbedder", # noqa "init_parameters": { "model": "prithvida/SPLADE_PP_en_v1", "cache_dir": "fake_dir", @@ -97,10 +97,10 @@ def test_to_dict_with_custom_init_parameters(self): def test_from_dict(self): """ - Test deserialization of FastembedTextSPLADEEmbedder from a dictionary, using default initialization parameters. + Test deserialization of FastembedSparseTextEmbedder from a dictionary, using default initialization parameters. """ embedder_dict = { - "type": "haystack_integrations.components.embedders.fastembed.fastembed_text_SPLADE_embedder.FastembedTextSPLADEEmbedder", # noqa + "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder.FastembedSparseTextEmbedder", # noqa "init_parameters": { "model": "prithvida/SPLADE_PP_en_v1", "cache_dir": None, @@ -112,7 +112,7 @@ def test_from_dict(self): "parallel": None, }, } - embedder = default_from_dict(FastembedTextSPLADEEmbedder, embedder_dict) + embedder = default_from_dict(FastembedSparseTextEmbedder, embedder_dict) assert embedder.model_name == "prithvida/SPLADE_PP_en_v1" assert embedder.cache_dir is None assert embedder.threads is None @@ -124,10 +124,10 @@ def test_from_dict(self): def test_from_dict_with_custom_init_parameters(self): """ - Test deserialization of FastembedTextSPLADEEmbedder from a dictionary, using custom initialization parameters. + Test deserialization of FastembedSparseTextEmbedder from a dictionary, using custom initialization parameters. """ embedder_dict = { - "type": "haystack_integrations.components.embedders.fastembed.fastembed_text_SPLADE_embedder.FastembedTextSPLADEEmbedder", # noqa + "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder.FastembedSparseTextEmbedder", # noqa "init_parameters": { "model": "prithvida/SPLADE_PP_en_v1", "cache_dir": "fake_dir", @@ -139,7 +139,7 @@ def test_from_dict_with_custom_init_parameters(self): "parallel": 1, }, } - embedder = default_from_dict(FastembedTextSPLADEEmbedder, embedder_dict) + embedder = default_from_dict(FastembedSparseTextEmbedder, embedder_dict) assert embedder.model_name == "prithvida/SPLADE_PP_en_v1" assert embedder.cache_dir == "fake_dir" assert embedder.threads == 2 @@ -150,13 +150,13 @@ def test_from_dict_with_custom_init_parameters(self): assert embedder.parallel == 1 @patch( - "haystack_integrations.components.embedders.fastembed.fastembed_text_SPLADE_embedder._FastembedSparseEmbeddingBackendFactory" + "haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder._FastembedSparseEmbeddingBackendFactory" ) def test_warmup(self, mocked_factory): """ Test for checking embedder instances after warm-up. """ - embedder = FastembedTextSPLADEEmbedder(model="prithvida/SPLADE_PP_en_v1") + embedder = FastembedSparseTextEmbedder(model="prithvida/SPLADE_PP_en_v1") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() mocked_factory.get_embedding_backend.assert_called_once_with( @@ -164,13 +164,13 @@ def test_warmup(self, mocked_factory): ) @patch( - "haystack_integrations.components.embedders.fastembed.fastembed_text_SPLADE_embedder._FastembedSparseEmbeddingBackendFactory" + "haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder._FastembedSparseEmbeddingBackendFactory" ) def test_warmup_does_not_reload(self, mocked_factory): """ Test for checking backend instances after multiple warm-ups. """ - embedder = FastembedTextSPLADEEmbedder(model="prithvida/SPLADE_PP_en_v1") + embedder = FastembedSparseTextEmbedder(model="prithvida/SPLADE_PP_en_v1") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() embedder.warm_up() @@ -192,7 +192,7 @@ def test_embed(self): """ Test for checking output dimensions and embedding dimensions. """ - embedder = FastembedTextSPLADEEmbedder(model="BAAI/bge-base-en-v1.5") + embedder = FastembedSparseTextEmbedder(model="BAAI/bge-base-en-v1.5") embedder.embedding_backend = MagicMock() embedder.embedding_backend.embed = lambda x, **kwargs: self._generate_mocked_sparse_embedding( len(x)) # noqa: ARG005 @@ -211,17 +211,17 @@ def test_run_wrong_incorrect_format(self): """ Test for checking incorrect input format when creating embedding. """ - embedder = FastembedTextSPLADEEmbedder(model="BAAI/bge-base-en-v1.5") + embedder = FastembedSparseTextEmbedder(model="BAAI/bge-base-en-v1.5") embedder.embedding_backend = MagicMock() list_integers_input = [1, 2, 3] - with pytest.raises(TypeError, match="FastembedTextSPLADEEmbedder expects a string as input"): + with pytest.raises(TypeError, match="FastembedSparseTextEmbedder expects a string as input"): embedder.run(text=list_integers_input) @pytest.mark.integration def test_run(self): - embedder = FastembedTextSPLADEEmbedder( + embedder = FastembedSparseTextEmbedder( model="prithvida/SPLADE_PP_en_v1", ) embedder.warm_up()