doc: fixing docstrings and API docs for gradient (#507)

* initial import * adding returned Dict * attending PR comments * linting * fixing doc
deepset-ai · Mar 5, 2024 · c6da120 · c6da120
1 parent d45cf7c
commit c6da120
Show file tree

Hide file tree

Showing 3 changed files with 78 additions and 23 deletions.
diff --git a/...ent/src/haystack_integrations/components/embedders/gradient/gradient_document_embedder.py b/...ent/src/haystack_integrations/components/embedders/gradient/gradient_document_embedder.py
@@ -23,16 +23,32 @@ def _alt_progress_bar(x: Any) -> Any:
 class GradientDocumentEmbedder:
     """
     A component for computing Document embeddings using Gradient AI API.
+
     The embedding of each Document is stored in the `embedding` field of the Document.
 
+    Usage example:
     ```python
-    embedder = GradientDocumentEmbedder(model="bge_large")
-    p = Pipeline()
-    p.add_component(embedder, name="document_embedder")
-    p.add_component(instance=GradientDocumentEmbedder(
-    p.add_component(instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="document_writer")
-    p.connect("document_embedder", "document_writer")
-    p.run({"document_embedder": {"documents": documents}})
+    from haystack import Pipeline
+    from haystack.document_stores.in_memory import InMemoryDocumentStore
+    from haystack.components.writers import DocumentWriter
+    from haystack import Document
+
+    from haystack_integrations.components.embedders.gradient import GradientDocumentEmbedder
+
+    documents = [
+        Document(content="My name is Jean and I live in Paris."),
+        Document(content="My name is Mark and I live in Berlin."),
+        Document(content="My name is Giorgio and I live in Rome."),
+    ]
+
+    indexing_pipeline = Pipeline()
+    indexing_pipeline.add_component(instance=GradientDocumentEmbedder(), name="document_embedder")
+    indexing_pipeline.add_component(
+        instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="document_writer")
+    )
+    indexing_pipeline.connect("document_embedder", "document_writer")
+    indexing_pipeline.run({"document_embedder": {"documents": documents}})
+    >>> {'document_writer': {'documents_written': 3}}
     ```
     """
 
@@ -53,7 +69,7 @@ def __init__(
         :param batch_size: Update cycle for tqdm progress bar, default is to update every 32_768 docs.
         :param access_token: The Gradient access token.
         :param workspace_id: The Gradient workspace ID.
-        :param host: The Gradient host. By default it uses https://api.gradient.ai/.
+        :param host: The Gradient host. By default, it uses [Gradient AI](https://api.gradient.ai/).
         :param progress_bar: Whether to show a progress bar while embedding the documents.
         """
         self._batch_size = batch_size
@@ -75,8 +91,12 @@ def _get_telemetry_data(self) -> Dict[str, Any]:
 
     def to_dict(self) -> dict:
         """
-        Serialize the component to a Python dictionary.
+        Serialize this component to a dictionary.
+
+        :returns:
+            The serialized component as a dictionary.
         """
+
         return default_to_dict(
             self,
             model=self._model_name,
@@ -91,13 +111,17 @@ def to_dict(self) -> dict:
     def from_dict(cls, data: Dict[str, Any]) -> "GradientDocumentEmbedder":
         """
         Deserialize this component from a dictionary.
+
+        :param data: The dictionary representation of this component.
+        :returns:
+            The deserialized component instance.
         """
         deserialize_secrets_inplace(data["init_parameters"], keys=["access_token", "workspace_id"])
         return default_from_dict(cls, data)
 
     def warm_up(self) -> None:
         """
-        Load the embedding model.
+        Initializes the component.
         """
         if not hasattr(self, "_embedding_model"):
             self._embedding_model = self._gradient.get_embeddings_model(slug=self._model_name)
@@ -125,9 +149,14 @@ def _generate_embeddings(self, documents: List[Document], batch_size: int) -> Li
     def run(self, documents: List[Document]):
         """
         Embed a list of Documents.
+
         The embedding of each Document is stored in the `embedding` field of the Document.
 
         :param documents: A list of Documents to embed.
+        :returns:
+            A dictionary with the following keys:
+            - `documents`: The embedded Documents.
+
         """
         if not isinstance(documents, list) or documents and any(not isinstance(doc, Document) for doc in documents):
             msg = "GradientDocumentEmbedder expects a list of Documents as input.\

diff --git a/...radient/src/haystack_integrations/components/embedders/gradient/gradient_text_embedder.py b/...radient/src/haystack_integrations/components/embedders/gradient/gradient_text_embedder.py
@@ -8,15 +8,23 @@
 @component
 class GradientTextEmbedder:
     """
-    A component for embedding strings using models hosted on Gradient AI (https://gradient.ai).
+    A component for embedding strings using models hosted on [Gradient AI](https://gradient.ai).
 
+    Usage example:
     ```python
-    embedder = GradientTextEmbedder(model="bge_large")
+    from haystack_integrations.components.embedders.gradient import GradientTextEmbedder
+    from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
+    from haystack.document_stores.in_memory import InMemoryDocumentStore
+    from haystack import Pipeline
+
+    embedder =
     p = Pipeline()
-    p.add_component(instance=embedder, name="text_embedder")
-    p.add_component(instance=InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()), name="retriever")
+    p.add_component("text_embedder", GradientTextEmbedder(model="bge-large"))
+    p.add_component("retriever", InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()))
     p.connect("text_embedder", "retriever")
-    p.run("embed me!!!")
+    p.run(data={"text_embedder": {"text":"You can embed me put I'll return no matching documents"}})
+    >>> No Documents found with embeddings. Returning empty list. To generate embeddings, use a DocumentEmbedder.
+    >>> {'retriever': {'documents': []}}
     ```
     """
 
@@ -34,7 +42,7 @@ def __init__(
         :param model: The name of the model to use.
         :param access_token: The Gradient access token.
         :param workspace_id: The Gradient workspace ID.
-        :param host: The Gradient host. By default it uses https://api.gradient.ai/.
+        :param host: The Gradient host. By default, it uses [Gradient AI](https://api.gradient.ai/).
         """
         self._host = host
         self._model_name = model
@@ -53,7 +61,10 @@ def _get_telemetry_data(self) -> Dict[str, Any]:
 
     def to_dict(self) -> dict:
         """
-        Serialize the component to a Python dictionary.
+        Serialize this component to a dictionary.
+
+        :returns:
+            The serialized component as a dictionary.
         """
         return default_to_dict(
             self,
@@ -67,13 +78,17 @@ def to_dict(self) -> dict:
     def from_dict(cls, data: Dict[str, Any]) -> "GradientTextEmbedder":
         """
         Deserialize this component from a dictionary.
+
+        :param data: The dictionary representation of this component.
+        :returns:
+            The deserialized component instance.
         """
         deserialize_secrets_inplace(data["init_parameters"], keys=["access_token", "workspace_id"])
         return default_from_dict(cls, data)
 
     def warm_up(self) -> None:
         """
-        Load the embedding model.
+        Initializes the component.
         """
         if not hasattr(self, "_embedding_model"):
             self._embedding_model = self._gradient.get_embeddings_model(slug=self._model_name)

diff --git a/integrations/gradient/src/haystack_integrations/components/generators/gradient/base.py b/integrations/gradient/src/haystack_integrations/components/generators/gradient/base.py
@@ -16,7 +16,10 @@ class GradientGenerator:
     Queries the LLM using Gradient AI's SDK ('gradientai' package).
     See [Gradient AI API](https://docs.gradient.ai/docs/sdk-quickstart) for more details.
 
+    Usage example:
     ```python
+    from haystack_integrations.components.generators.gradient import GradientGenerator
+
     llm = GradientGenerator(base_model_slug="llama2-7b-chat")
     llm.warm_up()
     print(llm.run(prompt="What is the meaning of life?"))
@@ -40,17 +43,17 @@ def __init__(
         """
         Create a GradientGenerator component.
 
-        :param access_token: The Gradient access token. If not provided it's read from the environment
-                             variable GRADIENT_ACCESS_TOKEN.
+        :param access_token: The Gradient access token as a `Secret`. If not provided it's read from the environment
+                             variable `GRADIENT_ACCESS_TOKEN`.
         :param base_model_slug: The base model slug to use.
-        :param host: The Gradient host. By default it uses https://api.gradient.ai/.
+        :param host: The Gradient host. By default, it uses [Gradient AI](https://api.gradient.ai/).
         :param max_generated_token_count: The maximum number of tokens to generate.
         :param model_adapter_id: The model adapter ID to use.
         :param temperature: The temperature to use.
         :param top_k: The top k to use.
         :param top_p: The top p to use.
-        :param workspace_id: The Gradient workspace ID. If not provided it's read from the environment
-                             variable GRADIENT_WORKSPACE_ID.
+        :param workspace_id: The Gradient workspace ID as a `Secret`. If not provided it's read from the environment
+                             variable `GRADIENT_WORKSPACE_ID`.
         """
         self._access_token = access_token
         self._base_model_slug = base_model_slug
@@ -84,6 +87,9 @@ def __init__(
     def to_dict(self) -> Dict[str, Any]:
         """
         Serialize this component to a dictionary.
+
+        :returns:
+            The serialized component as a dictionary.
         """
         return default_to_dict(
             self,
@@ -102,7 +108,12 @@ def to_dict(self) -> Dict[str, Any]:
     def from_dict(cls, data: Dict[str, Any]) -> "GradientGenerator":
         """
         Deserialize this component from a dictionary.
+
+        :param data: The dictionary representation of this component.
+        :returns:
+            The deserialized component instance.
         """
+
         deserialize_secrets_inplace(data["init_parameters"], keys=["access_token", "workspace_id"])
         return default_from_dict(cls, data)