deepset-ai · ZanSara · Jan 17, 2024 · Jan 17, 2024 · Jan 17, 2024 · Jan 17, 2024
@@ -26,7 +26,7 @@ class InstructorDocumentEmbedder:
     doc_embedding_instruction = "Represent the Medical Document for retrieval:"
 
     doc_embedder = InstructorDocumentEmbedder(
-        model_name_or_path="hkunlp/instructor-base",
+        model="hkunlp/instructor-base",
         instruction=doc_embedding_instruction,
         batch_size=32,
         device="cpu",
@@ -60,7 +60,7 @@ class InstructorDocumentEmbedder:
 
     def __init__(
         self,
-        model_name_or_path: str = "hkunlp/instructor-base",
+        model: str = "hkunlp/instructor-base",
         device: Optional[str] = None,
         use_auth_token: Union[bool, str, None] = None,
         instruction: str = "Represent the document",
@@ -73,7 +73,7 @@ def __init__(
         """
         Create an InstructorDocumentEmbedder component.
 
-        :param model_name_or_path: Local path or name of the model in Hugging Face's model hub,
+        :param model: Local path or name of the model in Hugging Face's model hub,
             such as ``'hkunlp/instructor-base'``.
         :param device: Device (like 'cuda' / 'cpu') that should be used for computation.
             If None, checks if a GPU can be used.
@@ -95,7 +95,7 @@ def __init__(
         :param embedding_separator: Separator used to concatenate the meta fields to the Document content.
         """
 
-        self.model_name_or_path = model_name_or_path
+        self.model_name_or_path = model
         # TODO: remove device parameter and use Haystack's device management once migrated
         self.device = device or "cpu"
         self.use_auth_token = use_auth_token
@@ -112,7 +112,7 @@ def to_dict(self) -> Dict[str, Any]:
         """
         return default_to_dict(
             self,
-            model_name_or_path=self.model_name_or_path,
+            model=self.model_name_or_path,
             device=self.device,
             use_auth_token=self.use_auth_token,
             instruction=self.instruction,

@@ -26,7 +26,7 @@ class InstructorTextEmbedder:
     )
 
     text_embedder = InstructorTextEmbedder(
-        model_name_or_path="hkunlp/instructor-base", instruction=instruction,
+        model="hkunlp/instructor-base", instruction=instruction,
         device="cpu"
     )
 
@@ -36,7 +36,7 @@ class InstructorTextEmbedder:
 
     def __init__(
         self,
-        model_name_or_path: str = "hkunlp/instructor-base",
+        model: str = "hkunlp/instructor-base",
         device: Optional[str] = None,
         use_auth_token: Union[bool, str, None] = None,
         instruction: str = "Represent the sentence",
@@ -47,7 +47,7 @@ def __init__(
         """
         Create an InstructorTextEmbedder component.
 
-        :param model_name_or_path: Local path or name of the model in Hugging Face's model hub,
+        :param model: Local path or name of the model in Hugging Face's model hub,
             such as ``'hkunlp/instructor-base'``.
         :param device: Device (like 'cuda' / 'cpu') that should be used for computation.
             If None, checks if a GPU can be used.
@@ -67,7 +67,7 @@ def __init__(
         :param normalize_embeddings: If set to true, returned vectors will have the length of 1.
         """
 
-        self.model_name_or_path = model_name_or_path
+        self.model_name_or_path = model
         # TODO: remove device parameter and use Haystack's device management once migrated
         self.device = device or "cpu"
         self.use_auth_token = use_auth_token
@@ -82,7 +82,7 @@ def to_dict(self) -> Dict[str, Any]:
         """
         return default_to_dict(
             self,
-            model_name_or_path=self.model_name_or_path,
+            model=self.model_name_or_path,
             device=self.device,
             use_auth_token=self.use_auth_token,
             instruction=self.instruction,

@@ -12,7 +12,7 @@ def test_init_default(self):
         """
         Test default initialization parameters for InstructorDocumentEmbedder.
         """
-        embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-base")
+        embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-base")
         assert embedder.model_name_or_path == "hkunlp/instructor-base"
         assert embedder.device == "cpu"
         assert embedder.use_auth_token is None
@@ -28,7 +28,7 @@ def test_init_with_parameters(self):
         Test custom initialization parameters for InstructorDocumentEmbedder.
         """
         embedder = InstructorDocumentEmbedder(
-            model_name_or_path="hkunlp/instructor-base",
+            model="hkunlp/instructor-base",
             device="cuda",
             use_auth_token=True,
             instruction="Represent the 'domain' 'text_type' for 'task_objective'",
@@ -52,12 +52,12 @@ def test_to_dict(self):
         """
         Test serialization of InstructorDocumentEmbedder to a dictionary, using default initialization parameters.
         """
-        embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-base")
+        embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-base")
         embedder_dict = embedder.to_dict()
         assert embedder_dict == {
             "type": "instructor_embedders_haystack.instructor_document_embedder.InstructorDocumentEmbedder",
             "init_parameters": {
-                "model_name_or_path": "hkunlp/instructor-base",
+                "model": "hkunlp/instructor-base",
                 "device": "cpu",
                 "use_auth_token": None,
                 "instruction": "Represent the document",
@@ -74,7 +74,7 @@ def test_to_dict_with_custom_init_parameters(self):
         Test serialization of InstructorDocumentEmbedder to a dictionary, using custom initialization parameters.
         """
         embedder = InstructorDocumentEmbedder(
-            model_name_or_path="hkunlp/instructor-base",
+            model="hkunlp/instructor-base",
             device="cuda",
             use_auth_token=True,
             instruction="Represent the financial document for retrieval",
@@ -88,7 +88,7 @@ def test_to_dict_with_custom_init_parameters(self):
         assert embedder_dict == {
             "type": "instructor_embedders_haystack.instructor_document_embedder.InstructorDocumentEmbedder",
             "init_parameters": {
-                "model_name_or_path": "hkunlp/instructor-base",
+                "model": "hkunlp/instructor-base",
                 "device": "cuda",
                 "use_auth_token": True,
                 "instruction": "Represent the financial document for retrieval",
@@ -107,7 +107,7 @@ def test_from_dict(self):
         embedder_dict = {
             "type": "instructor_embedders_haystack.instructor_document_embedder.InstructorDocumentEmbedder",
             "init_parameters": {
-                "model_name_or_path": "hkunlp/instructor-base",
+                "model": "hkunlp/instructor-base",
                 "device": "cpu",
                 "use_auth_token": None,
                 "instruction": "Represent the 'domain' 'text_type' for 'task_objective'",
@@ -136,7 +136,7 @@ def test_from_dict_with_custom_init_parameters(self):
         embedder_dict = {
             "type": "instructor_embedders_haystack.instructor_document_embedder.InstructorDocumentEmbedder",
             "init_parameters": {
-                "model_name_or_path": "hkunlp/instructor-base",
+                "model": "hkunlp/instructor-base",
                 "device": "cuda",
                 "use_auth_token": True,
                 "instruction": "Represent the financial document for retrieval",
@@ -163,7 +163,7 @@ def test_warmup(self, mocked_factory):
         """
         Test for checking embedder instances after warm-up.
         """
-        embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-base")
+        embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-base")
         mocked_factory.get_embedding_backend.assert_not_called()
         embedder.warm_up()
         mocked_factory.get_embedding_backend.assert_called_once_with(
@@ -175,7 +175,7 @@ def test_warmup_does_not_reload(self, mocked_factory):
         """
         Test for checking backend instances after multiple warm-ups.
         """
-        embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-base")
+        embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-base")
         mocked_factory.get_embedding_backend.assert_not_called()
         embedder.warm_up()
         embedder.warm_up()
@@ -185,7 +185,7 @@ def test_embed(self):
         """
         Test for checking output dimensions and embedding dimensions.
         """
-        embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-large")
+        embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-large")
         embedder.embedding_backend = MagicMock()
         embedder.embedding_backend.embed = lambda x, **kwargs: np.random.rand(len(x), 16).tolist()  # noqa: ARG005
 
@@ -204,7 +204,7 @@ def test_embed_incorrect_input_format(self):
         """
         Test for checking incorrect input format when creating embedding.
         """
-        embedder = InstructorDocumentEmbedder(model_name_or_path="hkunlp/instructor-base")
+        embedder = InstructorDocumentEmbedder(model="hkunlp/instructor-base")
 
         string_input = "text"
         list_integers_input = [1, 2, 3]
@@ -221,7 +221,7 @@ def test_embed_metadata(self):
         with a custom instruction and metadata.
         """
         embedder = InstructorDocumentEmbedder(
-            model_name_or_path="model",
+            model="model",
             instruction="Represent the financial document for retrieval",
             meta_fields_to_embed=["meta_field"],
             embedding_separator="\n",
@@ -248,7 +248,7 @@ def test_embed_metadata(self):
     @pytest.mark.integration
     def test_run(self):
         embedder = InstructorDocumentEmbedder(
-            model_name_or_path="hkunlp/instructor-base",
+            model="hkunlp/instructor-base",
             device="cpu",
             instruction="Represent the Science document for retrieval",
         )

@@ -11,7 +11,7 @@ def test_init_default(self):
         """
         Test default initialization parameters for InstructorTextEmbedder.
         """
-        embedder = InstructorTextEmbedder(model_name_or_path="hkunlp/instructor-base")
+        embedder = InstructorTextEmbedder(model="hkunlp/instructor-base")
         assert embedder.model_name_or_path == "hkunlp/instructor-base"
         assert embedder.device == "cpu"
         assert embedder.use_auth_token is None
@@ -25,7 +25,7 @@ def test_init_with_parameters(self):
         Test custom initialization parameters for InstructorTextEmbedder.
         """
         embedder = InstructorTextEmbedder(
-            model_name_or_path="hkunlp/instructor-base",
+            model="hkunlp/instructor-base",
             device="cuda",
             use_auth_token=True,
             instruction="Represent the 'domain' 'text_type' for 'task_objective'",
@@ -45,12 +45,12 @@ def test_to_dict(self):
         """
         Test serialization of InstructorTextEmbedder to a dictionary, using default initialization parameters.
         """
-        embedder = InstructorTextEmbedder(model_name_or_path="hkunlp/instructor-base")
+        embedder = InstructorTextEmbedder(model="hkunlp/instructor-base")
         embedder_dict = embedder.to_dict()
         assert embedder_dict == {
             "type": "instructor_embedders_haystack.instructor_text_embedder.InstructorTextEmbedder",
             "init_parameters": {
-                "model_name_or_path": "hkunlp/instructor-base",
+                "model": "hkunlp/instructor-base",
                 "device": "cpu",
                 "use_auth_token": None,
                 "instruction": "Represent the sentence",
@@ -65,7 +65,7 @@ def test_to_dict_with_custom_init_parameters(self):
         Test serialization of InstructorTextEmbedder to a dictionary, using custom initialization parameters.
         """
         embedder = InstructorTextEmbedder(
-            model_name_or_path="hkunlp/instructor-base",
+            model="hkunlp/instructor-base",
             device="cuda",
             use_auth_token=True,
             instruction="Represent the financial document for retrieval",
@@ -77,7 +77,7 @@ def test_to_dict_with_custom_init_parameters(self):
         assert embedder_dict == {
             "type": "instructor_embedders_haystack.instructor_text_embedder.InstructorTextEmbedder",
             "init_parameters": {
-                "model_name_or_path": "hkunlp/instructor-base",
+                "model": "hkunlp/instructor-base",
                 "device": "cuda",
                 "use_auth_token": True,
                 "instruction": "Represent the financial document for retrieval",
@@ -94,7 +94,7 @@ def test_from_dict(self):
         embedder_dict = {
             "type": "instructor_embedders_haystack.instructor_text_embedder.InstructorTextEmbedder",
             "init_parameters": {
-                "model_name_or_path": "hkunlp/instructor-base",
+                "model": "hkunlp/instructor-base",
                 "device": "cpu",
                 "use_auth_token": None,
                 "instruction": "Represent the 'domain' 'text_type' for 'task_objective'",
@@ -119,7 +119,7 @@ def test_from_dict_with_custom_init_parameters(self):
         embedder_dict = {
             "type": "instructor_embedders_haystack.instructor_text_embedder.InstructorTextEmbedder",
             "init_parameters": {
-                "model_name_or_path": "hkunlp/instructor-base",
+                "model": "hkunlp/instructor-base",
                 "device": "cuda",
                 "use_auth_token": True,
                 "instruction": "Represent the financial document for retrieval",
@@ -142,7 +142,7 @@ def test_warmup(self, mocked_factory):
         """
         Test for checking embedder instances after warm-up.
         """
-        embedder = InstructorTextEmbedder(model_name_or_path="hkunlp/instructor-base")
+        embedder = InstructorTextEmbedder(model="hkunlp/instructor-base")
         mocked_factory.get_embedding_backend.assert_not_called()
         embedder.warm_up()
         mocked_factory.get_embedding_backend.assert_called_once_with(
@@ -154,7 +154,7 @@ def test_warmup_does_not_reload(self, mocked_factory):
         """
         Test for checking backend instances after multiple warm-ups.
         """
-        embedder = InstructorTextEmbedder(model_name_or_path="hkunlp/instructor-base")
+        embedder = InstructorTextEmbedder(model="hkunlp/instructor-base")
         mocked_factory.get_embedding_backend.assert_not_called()
         embedder.warm_up()
         embedder.warm_up()
@@ -164,7 +164,7 @@ def test_embed(self):
         """
         Test for checking output dimensions and embedding dimensions.
         """
-        embedder = InstructorTextEmbedder(model_name_or_path="hkunlp/instructor-large")
+        embedder = InstructorTextEmbedder(model="hkunlp/instructor-large")
         embedder.embedding_backend = MagicMock()
         embedder.embedding_backend.embed = lambda x, **kwargs: np.random.rand(len(x), 16).tolist()  # noqa: ARG005
 
@@ -180,7 +180,7 @@ def test_run_wrong_incorrect_format(self):
         """
         Test for checking incorrect input format when creating embedding.
         """
-        embedder = InstructorTextEmbedder(model_name_or_path="hkunlp/instructor-large")
+        embedder = InstructorTextEmbedder(model="hkunlp/instructor-large")
         embedder.embedding_backend = MagicMock()
 
         list_integers_input = [1, 2, 3]
@@ -191,7 +191,7 @@ def test_run_wrong_incorrect_format(self):
     @pytest.mark.integration
     def test_run(self):
         embedder = InstructorTextEmbedder(
-            model_name_or_path="hkunlp/instructor-base",
+            model="hkunlp/instructor-base",
             device="cpu",
             instruction="Represent the Science sentence for retrieval",
         )