From 1b7cbc8f28cd26f7023240db04ae5ebb26619fee Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 17 Jan 2024 15:08:43 +0100 Subject: [PATCH 1/5] rename model_name_or_path in doc embedder --- .../instructor_document_embedder.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/integrations/instructor_embedders/instructor_embedders_haystack/instructor_document_embedder.py b/integrations/instructor_embedders/instructor_embedders_haystack/instructor_document_embedder.py index 4afe87a3c..7656677aa 100644 --- a/integrations/instructor_embedders/instructor_embedders_haystack/instructor_document_embedder.py +++ b/integrations/instructor_embedders/instructor_embedders_haystack/instructor_document_embedder.py @@ -26,7 +26,7 @@ class InstructorDocumentEmbedder: doc_embedding_instruction = "Represent the Medical Document for retrieval:" doc_embedder = InstructorDocumentEmbedder( - model_name_or_path="hkunlp/instructor-base", + model="hkunlp/instructor-base", instruction=doc_embedding_instruction, batch_size=32, device="cpu", @@ -60,7 +60,7 @@ class InstructorDocumentEmbedder: def __init__( self, - model_name_or_path: str = "hkunlp/instructor-base", + model: str = "hkunlp/instructor-base", device: Optional[str] = None, use_auth_token: Union[bool, str, None] = None, instruction: str = "Represent the document", @@ -73,7 +73,7 @@ def __init__( """ Create an InstructorDocumentEmbedder component. - :param model_name_or_path: Local path or name of the model in Hugging Face's model hub, + :param model: Local path or name of the model in Hugging Face's model hub, such as ``'hkunlp/instructor-base'``. :param device: Device (like 'cuda' / 'cpu') that should be used for computation. If None, checks if a GPU can be used. @@ -95,7 +95,7 @@ def __init__( :param embedding_separator: Separator used to concatenate the meta fields to the Document content. """ - self.model_name_or_path = model_name_or_path + self.model = model # TODO: remove device parameter and use Haystack's device management once migrated self.device = device or "cpu" self.use_auth_token = use_auth_token @@ -112,7 +112,7 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - model_name_or_path=self.model_name_or_path, + model=self.model, device=self.device, use_auth_token=self.use_auth_token, instruction=self.instruction, @@ -136,7 +136,7 @@ def warm_up(self): """ if not hasattr(self, "embedding_backend"): self.embedding_backend = _InstructorEmbeddingBackendFactory.get_embedding_backend( - model_name_or_path=self.model_name_or_path, device=self.device, use_auth_token=self.use_auth_token + model_name_or_path=self.model, device=self.device, use_auth_token=self.use_auth_token ) @component.output_types(documents=List[Document]) From badfc29274e93e0cb513ca99d21853f6e4861564 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 17 Jan 2024 15:26:06 +0100 Subject: [PATCH 2/5] fix tests for doc embedder --- .../test_instructor_document_embedder.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/integrations/instructor_embedders/tests/test_instructor_document_embedder.py b/integrations/instructor_embedders/tests/test_instructor_document_embedder.py index e28df930b..c759c6065 100644 --- a/integrations/instructor_embedders/tests/test_instructor_document_embedder.py +++ b/integrations/instructor_embedders/tests/test_instructor_document_embedder.py @@ -12,8 +12,8 @@ def test_init_default(self): """ Test default initialization parameters for InstructorDocumentEmbedder. """ - embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-base") - assert embedder.model_name_or_path == "hkunlp/instructor-base" + embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-base") + assert embedder.model == "hkunlp/instructor-base" assert embedder.device == "cpu" assert embedder.use_auth_token is None assert embedder.instruction == "Represent the document" @@ -28,7 +28,7 @@ def test_init_with_parameters(self): Test custom initialization parameters for InstructorDocumentEmbedder. """ embedder = InstructorDocumentEmbedder( - model_name_or_path="hkunlp/instructor-base", + model="hkunlp/instructor-base", device="cuda", use_auth_token=True, instruction="Represent the 'domain' 'text_type' for 'task_objective'", @@ -38,7 +38,7 @@ def test_init_with_parameters(self): meta_fields_to_embed=["test_field"], embedding_separator=" | ", ) - assert embedder.model_name_or_path == "hkunlp/instructor-base" + assert embedder.model == "hkunlp/instructor-base" assert embedder.device == "cuda" assert embedder.use_auth_token is True assert embedder.instruction == "Represent the 'domain' 'text_type' for 'task_objective'" @@ -52,12 +52,12 @@ def test_to_dict(self): """ Test serialization of InstructorDocumentEmbedder to a dictionary, using default initialization parameters. """ - embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-base") + embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-base") embedder_dict = embedder.to_dict() assert embedder_dict == { "type": "instructor_embedders_haystack.instructor_document_embedder.InstructorDocumentEmbedder", "init_parameters": { - "model_name_or_path": "hkunlp/instructor-base", + "model": "hkunlp/instructor-base", "device": "cpu", "use_auth_token": None, "instruction": "Represent the document", @@ -74,7 +74,7 @@ def test_to_dict_with_custom_init_parameters(self): Test serialization of InstructorDocumentEmbedder to a dictionary, using custom initialization parameters. """ embedder = InstructorDocumentEmbedder( - model_name_or_path="hkunlp/instructor-base", + model="hkunlp/instructor-base", device="cuda", use_auth_token=True, instruction="Represent the financial document for retrieval", @@ -88,7 +88,7 @@ def test_to_dict_with_custom_init_parameters(self): assert embedder_dict == { "type": "instructor_embedders_haystack.instructor_document_embedder.InstructorDocumentEmbedder", "init_parameters": { - "model_name_or_path": "hkunlp/instructor-base", + "model": "hkunlp/instructor-base", "device": "cuda", "use_auth_token": True, "instruction": "Represent the financial document for retrieval", @@ -107,7 +107,7 @@ def test_from_dict(self): embedder_dict = { "type": "instructor_embedders_haystack.instructor_document_embedder.InstructorDocumentEmbedder", "init_parameters": { - "model_name_or_path": "hkunlp/instructor-base", + "model": "hkunlp/instructor-base", "device": "cpu", "use_auth_token": None, "instruction": "Represent the 'domain' 'text_type' for 'task_objective'", @@ -119,7 +119,7 @@ def test_from_dict(self): }, } embedder = InstructorDocumentEmbedder.from_dict(embedder_dict) - assert embedder.model_name_or_path == "hkunlp/instructor-base" + assert embedder.model == "hkunlp/instructor-base" assert embedder.device == "cpu" assert embedder.use_auth_token is None assert embedder.instruction == "Represent the 'domain' 'text_type' for 'task_objective'" @@ -136,7 +136,7 @@ def test_from_dict_with_custom_init_parameters(self): embedder_dict = { "type": "instructor_embedders_haystack.instructor_document_embedder.InstructorDocumentEmbedder", "init_parameters": { - "model_name_or_path": "hkunlp/instructor-base", + "model": "hkunlp/instructor-base", "device": "cuda", "use_auth_token": True, "instruction": "Represent the financial document for retrieval", @@ -148,7 +148,7 @@ def test_from_dict_with_custom_init_parameters(self): }, } embedder = InstructorDocumentEmbedder.from_dict(embedder_dict) - assert embedder.model_name_or_path == "hkunlp/instructor-base" + assert embedder.model == "hkunlp/instructor-base" assert embedder.device == "cuda" assert embedder.use_auth_token is True assert embedder.instruction == "Represent the financial document for retrieval" @@ -163,7 +163,7 @@ def test_warmup(self, mocked_factory): """ Test for checking embedder instances after warm-up. """ - embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-base") + embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-base") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() mocked_factory.get_embedding_backend.assert_called_once_with( @@ -175,7 +175,7 @@ def test_warmup_does_not_reload(self, mocked_factory): """ Test for checking backend instances after multiple warm-ups. """ - embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-base") + embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-base") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() embedder.warm_up() @@ -185,7 +185,7 @@ def test_embed(self): """ Test for checking output dimensions and embedding dimensions. """ - embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-large") + embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-large") embedder.embedding_backend = MagicMock() embedder.embedding_backend.embed = lambda x, **kwargs: np.random.rand(len(x), 16).tolist() # noqa: ARG005 @@ -204,7 +204,7 @@ def test_embed_incorrect_input_format(self): """ Test for checking incorrect input format when creating embedding. """ - embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-base") + embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-base") string_input = "text" list_integers_input = [1, 2, 3] @@ -221,7 +221,7 @@ def test_embed_metadata(self): with a custom instruction and metadata. """ embedder = InstructorDocumentEmbedder( - model_name_or_path="model", + model="model", instruction="Represent the financial document for retrieval", meta_fields_to_embed=["meta_field"], embedding_separator="\n", @@ -248,7 +248,7 @@ def test_embed_metadata(self): @pytest.mark.integration def test_run(self): embedder = InstructorDocumentEmbedder( - model_name_or_path="hkunlp/instructor-base", + model="hkunlp/instructor-base", device="cpu", instruction="Represent the Science document for retrieval", ) From 2c680ae1cf98c1a6ae3b3cb243707bf6ed03e9be Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 17 Jan 2024 15:27:29 +0100 Subject: [PATCH 3/5] rename model_name_or_path to model in text embedder --- .../instructor_text_embedder.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/integrations/instructor_embedders/instructor_embedders_haystack/instructor_text_embedder.py b/integrations/instructor_embedders/instructor_embedders_haystack/instructor_text_embedder.py index 6c665ab3b..49fab2ce4 100644 --- a/integrations/instructor_embedders/instructor_embedders_haystack/instructor_text_embedder.py +++ b/integrations/instructor_embedders/instructor_embedders_haystack/instructor_text_embedder.py @@ -26,7 +26,7 @@ class InstructorTextEmbedder: ) text_embedder = InstructorTextEmbedder( - model_name_or_path="hkunlp/instructor-base", instruction=instruction, + model="hkunlp/instructor-base", instruction=instruction, device="cpu" ) @@ -36,7 +36,7 @@ class InstructorTextEmbedder: def __init__( self, - model_name_or_path: str = "hkunlp/instructor-base", + model: str = "hkunlp/instructor-base", device: Optional[str] = None, use_auth_token: Union[bool, str, None] = None, instruction: str = "Represent the sentence", @@ -47,7 +47,7 @@ def __init__( """ Create an InstructorTextEmbedder component. - :param model_name_or_path: Local path or name of the model in Hugging Face's model hub, + :param model: Local path or name of the model in Hugging Face's model hub, such as ``'hkunlp/instructor-base'``. :param device: Device (like 'cuda' / 'cpu') that should be used for computation. If None, checks if a GPU can be used. @@ -67,7 +67,7 @@ def __init__( :param normalize_embeddings: If set to true, returned vectors will have the length of 1. """ - self.model_name_or_path = model_name_or_path + self.model = model # TODO: remove device parameter and use Haystack's device management once migrated self.device = device or "cpu" self.use_auth_token = use_auth_token @@ -82,7 +82,7 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - model_name_or_path=self.model_name_or_path, + model=self.model, device=self.device, use_auth_token=self.use_auth_token, instruction=self.instruction, @@ -104,7 +104,7 @@ def warm_up(self): """ if not hasattr(self, "embedding_backend"): self.embedding_backend = _InstructorEmbeddingBackendFactory.get_embedding_backend( - model_name_or_path=self.model_name_or_path, device=self.device, use_auth_token=self.use_auth_token + model_name_or_path=self.model, device=self.device, use_auth_token=self.use_auth_token ) @component.output_types(embedding=List[float]) From d9a5ad4393303b7c6749cd60b8dbf400f26a2514 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 17 Jan 2024 15:28:07 +0100 Subject: [PATCH 4/5] fix tests for text embedder --- .../tests/test_instructor_text_embedder.py | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/integrations/instructor_embedders/tests/test_instructor_text_embedder.py b/integrations/instructor_embedders/tests/test_instructor_text_embedder.py index a4adde771..ddbb368f9 100644 --- a/integrations/instructor_embedders/tests/test_instructor_text_embedder.py +++ b/integrations/instructor_embedders/tests/test_instructor_text_embedder.py @@ -11,8 +11,8 @@ def test_init_default(self): """ Test default initialization parameters for InstructorTextEmbedder. """ - embedder = InstructorTextEmbedder(model_name_or_path="hkunlp/instructor-base") - assert embedder.model_name_or_path == "hkunlp/instructor-base" + embedder = InstructorTextEmbedder(model="hkunlp/instructor-base") + assert embedder.model == "hkunlp/instructor-base" assert embedder.device == "cpu" assert embedder.use_auth_token is None assert embedder.instruction == "Represent the sentence" @@ -25,7 +25,7 @@ def test_init_with_parameters(self): Test custom initialization parameters for InstructorTextEmbedder. """ embedder = InstructorTextEmbedder( - model_name_or_path="hkunlp/instructor-base", + model="hkunlp/instructor-base", device="cuda", use_auth_token=True, instruction="Represent the 'domain' 'text_type' for 'task_objective'", @@ -33,7 +33,7 @@ def test_init_with_parameters(self): progress_bar=False, normalize_embeddings=True, ) - assert embedder.model_name_or_path == "hkunlp/instructor-base" + assert embedder.model == "hkunlp/instructor-base" assert embedder.device == "cuda" assert embedder.use_auth_token is True assert embedder.instruction == "Represent the 'domain' 'text_type' for 'task_objective'" @@ -45,12 +45,12 @@ def test_to_dict(self): """ Test serialization of InstructorTextEmbedder to a dictionary, using default initialization parameters. """ - embedder = InstructorTextEmbedder(model_name_or_path="hkunlp/instructor-base") + embedder = InstructorTextEmbedder(model="hkunlp/instructor-base") embedder_dict = embedder.to_dict() assert embedder_dict == { "type": "instructor_embedders_haystack.instructor_text_embedder.InstructorTextEmbedder", "init_parameters": { - "model_name_or_path": "hkunlp/instructor-base", + "model": "hkunlp/instructor-base", "device": "cpu", "use_auth_token": None, "instruction": "Represent the sentence", @@ -65,7 +65,7 @@ def test_to_dict_with_custom_init_parameters(self): Test serialization of InstructorTextEmbedder to a dictionary, using custom initialization parameters. """ embedder = InstructorTextEmbedder( - model_name_or_path="hkunlp/instructor-base", + model="hkunlp/instructor-base", device="cuda", use_auth_token=True, instruction="Represent the financial document for retrieval", @@ -77,7 +77,7 @@ def test_to_dict_with_custom_init_parameters(self): assert embedder_dict == { "type": "instructor_embedders_haystack.instructor_text_embedder.InstructorTextEmbedder", "init_parameters": { - "model_name_or_path": "hkunlp/instructor-base", + "model": "hkunlp/instructor-base", "device": "cuda", "use_auth_token": True, "instruction": "Represent the financial document for retrieval", @@ -94,7 +94,7 @@ def test_from_dict(self): embedder_dict = { "type": "instructor_embedders_haystack.instructor_text_embedder.InstructorTextEmbedder", "init_parameters": { - "model_name_or_path": "hkunlp/instructor-base", + "model": "hkunlp/instructor-base", "device": "cpu", "use_auth_token": None, "instruction": "Represent the 'domain' 'text_type' for 'task_objective'", @@ -104,7 +104,7 @@ def test_from_dict(self): }, } embedder = InstructorTextEmbedder.from_dict(embedder_dict) - assert embedder.model_name_or_path == "hkunlp/instructor-base" + assert embedder.model == "hkunlp/instructor-base" assert embedder.device == "cpu" assert embedder.use_auth_token is None assert embedder.instruction == "Represent the 'domain' 'text_type' for 'task_objective'" @@ -119,7 +119,7 @@ def test_from_dict_with_custom_init_parameters(self): embedder_dict = { "type": "instructor_embedders_haystack.instructor_text_embedder.InstructorTextEmbedder", "init_parameters": { - "model_name_or_path": "hkunlp/instructor-base", + "model": "hkunlp/instructor-base", "device": "cuda", "use_auth_token": True, "instruction": "Represent the financial document for retrieval", @@ -129,7 +129,7 @@ def test_from_dict_with_custom_init_parameters(self): }, } embedder = InstructorTextEmbedder.from_dict(embedder_dict) - assert embedder.model_name_or_path == "hkunlp/instructor-base" + assert embedder.model == "hkunlp/instructor-base" assert embedder.device == "cuda" assert embedder.use_auth_token is True assert embedder.instruction == "Represent the financial document for retrieval" @@ -142,7 +142,7 @@ def test_warmup(self, mocked_factory): """ Test for checking embedder instances after warm-up. """ - embedder = InstructorTextEmbedder(model_name_or_path="hkunlp/instructor-base") + embedder = InstructorTextEmbedder(model="hkunlp/instructor-base") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() mocked_factory.get_embedding_backend.assert_called_once_with( @@ -154,7 +154,7 @@ def test_warmup_does_not_reload(self, mocked_factory): """ Test for checking backend instances after multiple warm-ups. """ - embedder = InstructorTextEmbedder(model_name_or_path="hkunlp/instructor-base") + embedder = InstructorTextEmbedder(model="hkunlp/instructor-base") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() embedder.warm_up() @@ -164,7 +164,7 @@ def test_embed(self): """ Test for checking output dimensions and embedding dimensions. """ - embedder = InstructorTextEmbedder(model_name_or_path="hkunlp/instructor-large") + embedder = InstructorTextEmbedder(model="hkunlp/instructor-large") embedder.embedding_backend = MagicMock() embedder.embedding_backend.embed = lambda x, **kwargs: np.random.rand(len(x), 16).tolist() # noqa: ARG005 @@ -180,7 +180,7 @@ def test_run_wrong_incorrect_format(self): """ Test for checking incorrect input format when creating embedding. """ - embedder = InstructorTextEmbedder(model_name_or_path="hkunlp/instructor-large") + embedder = InstructorTextEmbedder(model="hkunlp/instructor-large") embedder.embedding_backend = MagicMock() list_integers_input = [1, 2, 3] @@ -191,7 +191,7 @@ def test_run_wrong_incorrect_format(self): @pytest.mark.integration def test_run(self): embedder = InstructorTextEmbedder( - model_name_or_path="hkunlp/instructor-base", + model="hkunlp/instructor-base", device="cpu", instruction="Represent the Science sentence for retrieval", ) From 4e0744cc6cb64a02c9f7967bcbe5989999b856b0 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 17 Jan 2024 15:49:10 +0100 Subject: [PATCH 5/5] feedback --- .../instructor_document_embedder.py | 6 +++--- .../instructor_text_embedder.py | 6 +++--- .../tests/test_instructor_document_embedder.py | 8 ++++---- .../tests/test_instructor_text_embedder.py | 8 ++++---- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/integrations/instructor_embedders/instructor_embedders_haystack/instructor_document_embedder.py b/integrations/instructor_embedders/instructor_embedders_haystack/instructor_document_embedder.py index 7656677aa..7a40f43cd 100644 --- a/integrations/instructor_embedders/instructor_embedders_haystack/instructor_document_embedder.py +++ b/integrations/instructor_embedders/instructor_embedders_haystack/instructor_document_embedder.py @@ -95,7 +95,7 @@ def __init__( :param embedding_separator: Separator used to concatenate the meta fields to the Document content. """ - self.model = model + self.model_name_or_path = model # TODO: remove device parameter and use Haystack's device management once migrated self.device = device or "cpu" self.use_auth_token = use_auth_token @@ -112,7 +112,7 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - model=self.model, + model=self.model_name_or_path, device=self.device, use_auth_token=self.use_auth_token, instruction=self.instruction, @@ -136,7 +136,7 @@ def warm_up(self): """ if not hasattr(self, "embedding_backend"): self.embedding_backend = _InstructorEmbeddingBackendFactory.get_embedding_backend( - model_name_or_path=self.model, device=self.device, use_auth_token=self.use_auth_token + model_name_or_path=self.model_name_or_path, device=self.device, use_auth_token=self.use_auth_token ) @component.output_types(documents=List[Document]) diff --git a/integrations/instructor_embedders/instructor_embedders_haystack/instructor_text_embedder.py b/integrations/instructor_embedders/instructor_embedders_haystack/instructor_text_embedder.py index 49fab2ce4..5a2c66e65 100644 --- a/integrations/instructor_embedders/instructor_embedders_haystack/instructor_text_embedder.py +++ b/integrations/instructor_embedders/instructor_embedders_haystack/instructor_text_embedder.py @@ -67,7 +67,7 @@ def __init__( :param normalize_embeddings: If set to true, returned vectors will have the length of 1. """ - self.model = model + self.model_name_or_path = model # TODO: remove device parameter and use Haystack's device management once migrated self.device = device or "cpu" self.use_auth_token = use_auth_token @@ -82,7 +82,7 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - model=self.model, + model=self.model_name_or_path, device=self.device, use_auth_token=self.use_auth_token, instruction=self.instruction, @@ -104,7 +104,7 @@ def warm_up(self): """ if not hasattr(self, "embedding_backend"): self.embedding_backend = _InstructorEmbeddingBackendFactory.get_embedding_backend( - model_name_or_path=self.model, device=self.device, use_auth_token=self.use_auth_token + model_name_or_path=self.model_name_or_path, device=self.device, use_auth_token=self.use_auth_token ) @component.output_types(embedding=List[float]) diff --git a/integrations/instructor_embedders/tests/test_instructor_document_embedder.py b/integrations/instructor_embedders/tests/test_instructor_document_embedder.py index c759c6065..b1d0d8fe6 100644 --- a/integrations/instructor_embedders/tests/test_instructor_document_embedder.py +++ b/integrations/instructor_embedders/tests/test_instructor_document_embedder.py @@ -13,7 +13,7 @@ def test_init_default(self): Test default initialization parameters for InstructorDocumentEmbedder. """ embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-base") - assert embedder.model == "hkunlp/instructor-base" + assert embedder.model_name_or_path == "hkunlp/instructor-base" assert embedder.device == "cpu" assert embedder.use_auth_token is None assert embedder.instruction == "Represent the document" @@ -38,7 +38,7 @@ def test_init_with_parameters(self): meta_fields_to_embed=["test_field"], embedding_separator=" | ", ) - assert embedder.model == "hkunlp/instructor-base" + assert embedder.model_name_or_path == "hkunlp/instructor-base" assert embedder.device == "cuda" assert embedder.use_auth_token is True assert embedder.instruction == "Represent the 'domain' 'text_type' for 'task_objective'" @@ -119,7 +119,7 @@ def test_from_dict(self): }, } embedder = InstructorDocumentEmbedder.from_dict(embedder_dict) - assert embedder.model == "hkunlp/instructor-base" + assert embedder.model_name_or_path == "hkunlp/instructor-base" assert embedder.device == "cpu" assert embedder.use_auth_token is None assert embedder.instruction == "Represent the 'domain' 'text_type' for 'task_objective'" @@ -148,7 +148,7 @@ def test_from_dict_with_custom_init_parameters(self): }, } embedder = InstructorDocumentEmbedder.from_dict(embedder_dict) - assert embedder.model == "hkunlp/instructor-base" + assert embedder.model_name_or_path == "hkunlp/instructor-base" assert embedder.device == "cuda" assert embedder.use_auth_token is True assert embedder.instruction == "Represent the financial document for retrieval" diff --git a/integrations/instructor_embedders/tests/test_instructor_text_embedder.py b/integrations/instructor_embedders/tests/test_instructor_text_embedder.py index ddbb368f9..bc00f7348 100644 --- a/integrations/instructor_embedders/tests/test_instructor_text_embedder.py +++ b/integrations/instructor_embedders/tests/test_instructor_text_embedder.py @@ -12,7 +12,7 @@ def test_init_default(self): Test default initialization parameters for InstructorTextEmbedder. """ embedder = InstructorTextEmbedder(model="hkunlp/instructor-base") - assert embedder.model == "hkunlp/instructor-base" + assert embedder.model_name_or_path == "hkunlp/instructor-base" assert embedder.device == "cpu" assert embedder.use_auth_token is None assert embedder.instruction == "Represent the sentence" @@ -33,7 +33,7 @@ def test_init_with_parameters(self): progress_bar=False, normalize_embeddings=True, ) - assert embedder.model == "hkunlp/instructor-base" + assert embedder.model_name_or_path == "hkunlp/instructor-base" assert embedder.device == "cuda" assert embedder.use_auth_token is True assert embedder.instruction == "Represent the 'domain' 'text_type' for 'task_objective'" @@ -104,7 +104,7 @@ def test_from_dict(self): }, } embedder = InstructorTextEmbedder.from_dict(embedder_dict) - assert embedder.model == "hkunlp/instructor-base" + assert embedder.model_name_or_path == "hkunlp/instructor-base" assert embedder.device == "cpu" assert embedder.use_auth_token is None assert embedder.instruction == "Represent the 'domain' 'text_type' for 'task_objective'" @@ -129,7 +129,7 @@ def test_from_dict_with_custom_init_parameters(self): }, } embedder = InstructorTextEmbedder.from_dict(embedder_dict) - assert embedder.model == "hkunlp/instructor-base" + assert embedder.model_name_or_path == "hkunlp/instructor-base" assert embedder.device == "cuda" assert embedder.use_auth_token is True assert embedder.instruction == "Represent the financial document for retrieval"