Skip to content

Commit

Permalink
doc: fixing docstrings and API docs for gradient (#507)
Browse files Browse the repository at this point in the history
* initial import

* adding returned Dict

* attending PR comments

* linting

* fixing doc
  • Loading branch information
davidsbatista authored Mar 5, 2024
1 parent d45cf7c commit c6da120
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,32 @@ def _alt_progress_bar(x: Any) -> Any:
class GradientDocumentEmbedder:
"""
A component for computing Document embeddings using Gradient AI API.
The embedding of each Document is stored in the `embedding` field of the Document.
Usage example:
```python
embedder = GradientDocumentEmbedder(model="bge_large")
p = Pipeline()
p.add_component(embedder, name="document_embedder")
p.add_component(instance=GradientDocumentEmbedder(
p.add_component(instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="document_writer")
p.connect("document_embedder", "document_writer")
p.run({"document_embedder": {"documents": documents}})
from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.writers import DocumentWriter
from haystack import Document
from haystack_integrations.components.embedders.gradient import GradientDocumentEmbedder
documents = [
Document(content="My name is Jean and I live in Paris."),
Document(content="My name is Mark and I live in Berlin."),
Document(content="My name is Giorgio and I live in Rome."),
]
indexing_pipeline = Pipeline()
indexing_pipeline.add_component(instance=GradientDocumentEmbedder(), name="document_embedder")
indexing_pipeline.add_component(
instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="document_writer")
)
indexing_pipeline.connect("document_embedder", "document_writer")
indexing_pipeline.run({"document_embedder": {"documents": documents}})
>>> {'document_writer': {'documents_written': 3}}
```
"""

Expand All @@ -53,7 +69,7 @@ def __init__(
:param batch_size: Update cycle for tqdm progress bar, default is to update every 32_768 docs.
:param access_token: The Gradient access token.
:param workspace_id: The Gradient workspace ID.
:param host: The Gradient host. By default it uses https://api.gradient.ai/.
:param host: The Gradient host. By default, it uses [Gradient AI](https://api.gradient.ai/).
:param progress_bar: Whether to show a progress bar while embedding the documents.
"""
self._batch_size = batch_size
Expand All @@ -75,8 +91,12 @@ def _get_telemetry_data(self) -> Dict[str, Any]:

def to_dict(self) -> dict:
"""
Serialize the component to a Python dictionary.
Serialize this component to a dictionary.
:returns:
The serialized component as a dictionary.
"""

return default_to_dict(
self,
model=self._model_name,
Expand All @@ -91,13 +111,17 @@ def to_dict(self) -> dict:
def from_dict(cls, data: Dict[str, Any]) -> "GradientDocumentEmbedder":
"""
Deserialize this component from a dictionary.
:param data: The dictionary representation of this component.
:returns:
The deserialized component instance.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["access_token", "workspace_id"])
return default_from_dict(cls, data)

def warm_up(self) -> None:
"""
Load the embedding model.
Initializes the component.
"""
if not hasattr(self, "_embedding_model"):
self._embedding_model = self._gradient.get_embeddings_model(slug=self._model_name)
Expand Down Expand Up @@ -125,9 +149,14 @@ def _generate_embeddings(self, documents: List[Document], batch_size: int) -> Li
def run(self, documents: List[Document]):
"""
Embed a list of Documents.
The embedding of each Document is stored in the `embedding` field of the Document.
:param documents: A list of Documents to embed.
:returns:
A dictionary with the following keys:
- `documents`: The embedded Documents.
"""
if not isinstance(documents, list) or documents and any(not isinstance(doc, Document) for doc in documents):
msg = "GradientDocumentEmbedder expects a list of Documents as input.\
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,23 @@
@component
class GradientTextEmbedder:
"""
A component for embedding strings using models hosted on Gradient AI (https://gradient.ai).
A component for embedding strings using models hosted on [Gradient AI](https://gradient.ai).
Usage example:
```python
embedder = GradientTextEmbedder(model="bge_large")
from haystack_integrations.components.embedders.gradient import GradientTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack import Pipeline
embedder =
p = Pipeline()
p.add_component(instance=embedder, name="text_embedder")
p.add_component(instance=InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()), name="retriever")
p.add_component("text_embedder", GradientTextEmbedder(model="bge-large"))
p.add_component("retriever", InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()))
p.connect("text_embedder", "retriever")
p.run("embed me!!!")
p.run(data={"text_embedder": {"text":"You can embed me put I'll return no matching documents"}})
>>> No Documents found with embeddings. Returning empty list. To generate embeddings, use a DocumentEmbedder.
>>> {'retriever': {'documents': []}}
```
"""

Expand All @@ -34,7 +42,7 @@ def __init__(
:param model: The name of the model to use.
:param access_token: The Gradient access token.
:param workspace_id: The Gradient workspace ID.
:param host: The Gradient host. By default it uses https://api.gradient.ai/.
:param host: The Gradient host. By default, it uses [Gradient AI](https://api.gradient.ai/).
"""
self._host = host
self._model_name = model
Expand All @@ -53,7 +61,10 @@ def _get_telemetry_data(self) -> Dict[str, Any]:

def to_dict(self) -> dict:
"""
Serialize the component to a Python dictionary.
Serialize this component to a dictionary.
:returns:
The serialized component as a dictionary.
"""
return default_to_dict(
self,
Expand All @@ -67,13 +78,17 @@ def to_dict(self) -> dict:
def from_dict(cls, data: Dict[str, Any]) -> "GradientTextEmbedder":
"""
Deserialize this component from a dictionary.
:param data: The dictionary representation of this component.
:returns:
The deserialized component instance.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["access_token", "workspace_id"])
return default_from_dict(cls, data)

def warm_up(self) -> None:
"""
Load the embedding model.
Initializes the component.
"""
if not hasattr(self, "_embedding_model"):
self._embedding_model = self._gradient.get_embeddings_model(slug=self._model_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ class GradientGenerator:
Queries the LLM using Gradient AI's SDK ('gradientai' package).
See [Gradient AI API](https://docs.gradient.ai/docs/sdk-quickstart) for more details.
Usage example:
```python
from haystack_integrations.components.generators.gradient import GradientGenerator
llm = GradientGenerator(base_model_slug="llama2-7b-chat")
llm.warm_up()
print(llm.run(prompt="What is the meaning of life?"))
Expand All @@ -40,17 +43,17 @@ def __init__(
"""
Create a GradientGenerator component.
:param access_token: The Gradient access token. If not provided it's read from the environment
variable GRADIENT_ACCESS_TOKEN.
:param access_token: The Gradient access token as a `Secret`. If not provided it's read from the environment
variable `GRADIENT_ACCESS_TOKEN`.
:param base_model_slug: The base model slug to use.
:param host: The Gradient host. By default it uses https://api.gradient.ai/.
:param host: The Gradient host. By default, it uses [Gradient AI](https://api.gradient.ai/).
:param max_generated_token_count: The maximum number of tokens to generate.
:param model_adapter_id: The model adapter ID to use.
:param temperature: The temperature to use.
:param top_k: The top k to use.
:param top_p: The top p to use.
:param workspace_id: The Gradient workspace ID. If not provided it's read from the environment
variable GRADIENT_WORKSPACE_ID.
:param workspace_id: The Gradient workspace ID as a `Secret`. If not provided it's read from the environment
variable `GRADIENT_WORKSPACE_ID`.
"""
self._access_token = access_token
self._base_model_slug = base_model_slug
Expand Down Expand Up @@ -84,6 +87,9 @@ def __init__(
def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
:returns:
The serialized component as a dictionary.
"""
return default_to_dict(
self,
Expand All @@ -102,7 +108,12 @@ def to_dict(self) -> Dict[str, Any]:
def from_dict(cls, data: Dict[str, Any]) -> "GradientGenerator":
"""
Deserialize this component from a dictionary.
:param data: The dictionary representation of this component.
:returns:
The deserialized component instance.
"""

deserialize_secrets_inplace(data["init_parameters"], keys=["access_token", "workspace_id"])
return default_from_dict(cls, data)

Expand Down

0 comments on commit c6da120

Please sign in to comment.