From 60c4d4a54997c1ee8d0834bd8785e25221be9a7c Mon Sep 17 00:00:00 2001 From: Aaron Ji <127167174+DresAaron@users.noreply.github.com> Date: Wed, 18 Sep 2024 20:50:01 +0800 Subject: [PATCH] docs: update docstrings of JinaDocumentEmbedder and JinaTextEmbedder (#1092) * chore: update function doc * chore: polished function doc * chore: polished function doc * chore: update function doc * Update integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py Co-authored-by: Julian Risch * Update integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py Co-authored-by: Julian Risch * chore: update function doc * fix: lint issues * chore: update function doc --------- Co-authored-by: Julian Risch --- .../components/embedders/jina/document_embedder.py | 10 ++++++++++ .../components/embedders/jina/text_embedder.py | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py index bbac547c3..715092b8a 100644 --- a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py +++ b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py @@ -62,6 +62,16 @@ def __init__( to keep the logs clean. :param meta_fields_to_embed: List of meta fields that should be embedded along with the Document text. :param embedding_separator: Separator used to concatenate the meta fields to the Document text. + :param task: The downstream task for which the embeddings will be used. + The model will return the optimized embeddings for that task. + Check the list of available tasks on [Jina documentation](https://jina.ai/embeddings/). + :param dimensions: Number of desired dimension. + Smaller dimensions are easier to store and retrieve, with minimal performance impact thanks to MRL. + :param late_chunking: A boolean to enable or disable late chunking. + Apply the late chunking technique to leverage the model's long-context capabilities for + generating contextual chunk embeddings. + + The support of `task` and `late_chunking` parameters is only available for jina-embeddings-v3. """ resolved_api_key = api_key.resolve_value() diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py index c22f9ea2c..288b9a834 100644 --- a/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py +++ b/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py @@ -52,6 +52,16 @@ def __init__( Check the list of available models on [Jina documentation](https://jina.ai/embeddings/). :param prefix: A string to add to the beginning of each text. :param suffix: A string to add to the end of each text. + :param task: The downstream task for which the embeddings will be used. + The model will return the optimized embeddings for that task. + Check the list of available tasks on [Jina documentation](https://jina.ai/embeddings/). + :param dimensions: Number of desired dimension. + Smaller dimensions are easier to store and retrieve, with minimal performance impact thanks to MRL. + :param late_chunking: A boolean to enable or disable late chunking. + Apply the late chunking technique to leverage the model's long-context capabilities for + generating contextual chunk embeddings. + + The support of `task` and `late_chunking` parameters is only available for jina-embeddings-v3. """ resolved_api_key = api_key.resolve_value()