Merge branch 'main' into adopt-secret-pgvector

deepset-ai · Feb 13, 2024 · 694913e · 694913e
2 parents 88b4725 + 70ed7e6
commit 694913e
Show file tree

Hide file tree

Showing 19 changed files with 275 additions and 1,340 deletions.
diff --git a/integrations/cohere/tests/test_cohere_chat_generator.py b/integrations/cohere/tests/test_cohere_chat_generator.py
@@ -169,6 +169,7 @@ def test_from_dict(self, monkeypatch):
     @pytest.mark.unit
     def test_from_dict_fail_wo_env_var(self, monkeypatch):
         monkeypatch.delenv("COHERE_API_KEY", raising=False)
+        monkeypatch.delenv("CO_API_KEY", raising=False)
         data = {
             "type": "haystack_integrations.components.generators.cohere.chat.chat_generator.CohereChatGenerator",
             "init_parameters": {

diff --git a/integrations/deepeval/pydoc/config.yml b/integrations/deepeval/pydoc/config.yml
@@ -19,9 +19,9 @@ renderer:
   type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
   excerpt: DeepEval integration for Haystack
   category_slug: haystack-integrations
-  title: Chroma
+  title: DeepEval
   slug: integrations-deepeval
-  order: 1
+  order: 45
   markdown:
     descriptive_class_title: false
     descriptive_module_title: true

diff --git a/integrations/fastembed/pydoc/config.yml b/integrations/fastembed/pydoc/config.yml
@@ -3,8 +3,8 @@ loaders:
     search_path: [../src]
     modules:
       [
-        "haystack_integrations.components.embedders.fastembed",
-        "haystack_integrations.components.embedders.fastembed.embedding_backend",
+        "haystack_integrations.components.embedders.fastembed.fastembed_document_embedder",
+        "haystack_integrations.components.embedders.fastembed.fastembed_text_embedder",
       ]
     ignore_when_discovered: ["__init__"]
 processors:
@@ -17,11 +17,11 @@ processors:
   - type: crossref
 renderer:
   type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
-  excerpt: Embedders integration for Haystack
+  excerpt: FastEmbed integration for Haystack
   category_slug: haystack-integrations
-  title: Embedders
+  title: FastEmbed
   slug: fastembed-embedders
-  order: 300
+  order: 55
   markdown:
     descriptive_class_title: false
     descriptive_module_title: true

diff --git a/...ystack_integrations/components/embedders/fastembed/embedding_backend/fastembed_backend.py b/...ystack_integrations/components/embedders/fastembed/embedding_backend/fastembed_backend.py
@@ -38,5 +38,6 @@ def __init__(
         self.model = TextEmbedding(model_name=model_name)
 
     def embed(self, data: List[List[str]], **kwargs) -> List[List[float]]:
-        embeddings = list(self.model.embed(data, **kwargs))
+        # the embed method returns a Iterable[np.ndarray], so we convert it to a list of lists
+        embeddings = [np_array.tolist() for np_array in self.model.embed(data, **kwargs)]
         return embeddings
diff --git a/...d/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py b/...d/src/haystack_integrations/components/embedders/fastembed/fastembed_document_embedder.py
@@ -142,6 +142,6 @@ def run(self, documents: List[Document]):
         )
 
         for doc, emb in zip(documents, embeddings):
-            doc.embedding = list(emb)
+            doc.embedding = emb
 
         return {"documents": documents}
diff --git a/integrations/fastembed/tests/test_fastembed_document_embedder.py b/integrations/fastembed/tests/test_fastembed_document_embedder.py
@@ -248,4 +248,4 @@ def test_run(self):
 
         assert isinstance(embedding, list)
         assert len(embedding) == 384
-        assert all(isinstance(emb.item(), float) for emb in embedding)
+        assert all(isinstance(emb, float) for emb in embedding)
diff --git a/integrations/fastembed/tests/test_fastembed_text_embedder.py b/integrations/fastembed/tests/test_fastembed_text_embedder.py
@@ -186,4 +186,4 @@ def test_run(self):
 
         assert isinstance(embedding, list)
         assert len(embedding) == 384
-        assert all(isinstance(emb.item(), float) for emb in embedding)
+        assert all(isinstance(emb, float) for emb in embedding)
diff --git a/integrations/instructor_embedders/pydoc/config.yml b/integrations/instructor_embedders/pydoc/config.yml
@@ -3,8 +3,8 @@ loaders:
     search_path: [../src]
     modules:
       [
-        "haystack_integrations.components.embedders.instructor_embedders",
-        "haystack_integrations.components.embedders.instructor_embedders.embedding_backend",
+        "haystack_integrations.components.embedders.instructor_embedders.instructor_document_embedder",
+        "haystack_integrations.components.embedders.instructor_embedders.instructor_text_embedder",
       ]
     ignore_when_discovered: ["__init__"]
 processors:
@@ -17,9 +17,9 @@ processors:
   - type: crossref
 renderer:
   type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
-  excerpt: Embedders integration for Haystack
+  excerpt: Instructor embedders integration for Haystack
   category_slug: haystack-integrations
-  title: Embedders
+  title: Instructor Embedders
   slug: integrations-instructor-embedders
   order: 90
   markdown:

diff --git a/integrations/jina/pydoc/config.yml b/integrations/jina/pydoc/config.yml
@@ -1,7 +1,11 @@
 loaders:
   - type: haystack_pydoc_tools.loaders.CustomPythonLoader
     search_path: [../src]
-    modules: ["haystack_integrations.components.embedders.jina"]
+    modules: 
+      [
+        "haystack_integrations.components.embedders.jina.document_embedder",
+        "haystack_integrations.components.embedders.jina.text_embedder",
+      ]
     ignore_when_discovered: ["__init__"]
 processors:
   - type: filter

diff --git a/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py b/integrations/ollama/src/haystack_integrations/components/generators/ollama/generator.py
@@ -1,7 +1,10 @@
-from typing import Any, Dict, List, Optional
+import json
+from typing import Any, Callable, Dict, List, Optional
 
 import requests
-from haystack import component
+from haystack import component, default_from_dict, default_to_dict
+from haystack.components.generators.utils import deserialize_callback_handler, serialize_callback_handler
+from haystack.dataclasses import StreamingChunk
 from requests import Response
 
 
@@ -21,6 +24,7 @@ def __init__(
         template: Optional[str] = None,
         raw: bool = False,
         timeout: int = 120,
+        streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
     ):
         """
         :param model: The name of the model to use. The model should be available in the running Ollama instance.
@@ -36,6 +40,8 @@ def __init__(
             if you are specifying a full templated prompt in your API request.
         :param timeout: The number of seconds before throwing a timeout error from the Ollama API.
             Default is 120 seconds.
+        :param streaming_callback: A callback function that is called when a new token is received from the stream.
+            The callback function accepts StreamingChunk as an argument.
         """
         self.timeout = timeout
         self.raw = raw
@@ -44,8 +50,40 @@ def __init__(
         self.model = model
         self.url = url
         self.generation_kwargs = generation_kwargs or {}
+        self.streaming_callback = streaming_callback
 
-    def _create_json_payload(self, prompt: str, generation_kwargs=None) -> Dict[str, Any]:
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Serialize this component to a dictionary.
+        :return: The serialized component as a dictionary.
+        """
+        callback_name = serialize_callback_handler(self.streaming_callback) if self.streaming_callback else None
+        return default_to_dict(
+            self,
+            timeout=self.timeout,
+            raw=self.raw,
+            template=self.template,
+            system_prompt=self.system_prompt,
+            model=self.model,
+            url=self.url,
+            generation_kwargs=self.generation_kwargs,
+            streaming_callback=callback_name,
+        )
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "OllamaGenerator":
+        """
+        Deserialize this component from a dictionary.
+        :param data: The dictionary representation of this component.
+        :return: The deserialized component instance.
+        """
+        init_params = data.get("init_parameters", {})
+        serialized_callback_handler = init_params.get("streaming_callback")
+        if serialized_callback_handler:
+            data["init_parameters"]["streaming_callback"] = deserialize_callback_handler(serialized_callback_handler)
+        return default_from_dict(cls, data)
+
+    def _create_json_payload(self, prompt: str, stream: bool, generation_kwargs=None) -> Dict[str, Any]:
         """
         Returns a dictionary of JSON arguments for a POST request to an Ollama service.
         :param prompt: The prompt to generate a response for.
@@ -58,26 +96,67 @@ def _create_json_payload(self, prompt: str, generation_kwargs=None) -> Dict[str,
         return {
             "prompt": prompt,
             "model": self.model,
-            "stream": False,
+            "stream": stream,
             "raw": self.raw,
             "template": self.template,
             "system": self.system_prompt,
             "options": generation_kwargs,
         }
 
-    def _convert_to_haystack_response(self, ollama_response: Response) -> Dict[str, List[Any]]:
+    def _convert_to_response(self, ollama_response: Response) -> Dict[str, List[Any]]:
         """
         Convert a response from the Ollama API to the required Haystack format.
         :param ollama_response: A response (requests library) from the Ollama API.
         :return: A dictionary of the returned responses and metadata.
         """
+
         resp_dict = ollama_response.json()
 
         replies = [resp_dict["response"]]
         meta = {key: value for key, value in resp_dict.items() if key != "response"}
 
         return {"replies": replies, "meta": [meta]}
 
+    def _convert_to_streaming_response(self, chunks: List[StreamingChunk]) -> Dict[str, List[Any]]:
+        """
+        Convert a list of chunks response required Haystack format.
+        :param chunks: List of StreamingChunks
+        :return: A dictionary of the returned responses and metadata.
+        """
+
+        replies = ["".join([c.content for c in chunks])]
+        meta = {key: value for key, value in chunks[0].meta.items() if key != "response"}
+
+        return {"replies": replies, "meta": [meta]}
+
+    def _handle_streaming_response(self, response) -> List[StreamingChunk]:
+        """Handles Streaming response case
+
+        :param response: streaming response from ollama api.
+        :return: The List[StreamingChunk].
+        """
+        chunks: List[StreamingChunk] = []
+        for chunk in response.iter_lines():
+            chunk_delta: StreamingChunk = self._build_chunk(chunk)
+            chunks.append(chunk_delta)
+            if self.streaming_callback is not None:
+                self.streaming_callback(chunk_delta)
+        return chunks
+
+    def _build_chunk(self, chunk_response: Any) -> StreamingChunk:
+        """
+        Converts the response from the Ollama API to a StreamingChunk.
+        :param chunk: The chunk returned by the Ollama API.
+        :return: The StreamingChunk.
+        """
+        decoded_chunk = json.loads(chunk_response.decode("utf-8"))
+
+        content = decoded_chunk["response"]
+        meta = {key: value for key, value in decoded_chunk.items() if key != "response"}
+
+        chunk_message = StreamingChunk(content, meta)
+        return chunk_message
+
     @component.output_types(replies=List[str], metadata=List[Dict[str, Any]])
     def run(
         self,
@@ -94,11 +173,17 @@ def run(
         """
         generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
 
-        json_payload = self._create_json_payload(prompt, generation_kwargs)
+        stream = self.streaming_callback is not None
+
+        json_payload = self._create_json_payload(prompt, stream, generation_kwargs)
 
-        response = requests.post(url=self.url, json=json_payload, timeout=self.timeout)
+        response = requests.post(url=self.url, json=json_payload, timeout=self.timeout, stream=stream)
 
         # throw error on unsuccessful response
         response.raise_for_status()
 
-        return self._convert_to_haystack_response(response)
+        if stream:
+            chunks: List[StreamingChunk] = self._handle_streaming_response(response)
+            return self._convert_to_streaming_response(chunks)
+
+        return self._convert_to_response(response)