From 7e2dda568dcad3ae71810a77e2202a330946ff8e Mon Sep 17 00:00:00 2001 From: MARTRENCHAR Date: Thu, 14 Nov 2024 12:16:29 +0100 Subject: [PATCH] README.md: add new example with the ranker and fix the old ones ranker_example.py: fix the example --- integrations/fastembed/README.md | 37 ++++++++++++++++--- .../fastembed/examples/ranker_example.py | 2 +- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/integrations/fastembed/README.md b/integrations/fastembed/README.md index c021dec3b..f3c2bb135 100644 --- a/integrations/fastembed/README.md +++ b/integrations/fastembed/README.md @@ -8,6 +8,7 @@ **Table of Contents** - [Installation](#installation) +- [Usage](#Usage) - [License](#license) ## Installation @@ -33,7 +34,7 @@ embedding = text_embedder.run(text)["embedding"] ```python from haystack_integrations.components.embedders.fastembed import FastembedDocumentEmbedder -from haystack.dataclasses import Document +from haystack import Document embedder = FastembedDocumentEmbedder( model="BAAI/bge-small-en-v1.5", @@ -50,24 +51,50 @@ from haystack_integrations.components.embedders.fastembed import FastembedSparse text = "fastembed is supported by and maintained by Qdrant." text_embedder = FastembedSparseTextEmbedder( - model="prithvida/Splade_PP_en_v1" + model="prithivida/Splade_PP_en_v1" ) text_embedder.warm_up() -embedding = text_embedder.run(text)["embedding"] +embedding = text_embedder.run(text)["sparse_embedding"] ``` ```python from haystack_integrations.components.embedders.fastembed import FastembedSparseDocumentEmbedder -from haystack.dataclasses import Document +from haystack import Document embedder = FastembedSparseDocumentEmbedder( - model="prithvida/Splade_PP_en_v1", + model="prithivida/Splade_PP_en_v1", ) embedder.warm_up() doc = Document(content="fastembed is supported by and maintained by Qdrant.", meta={"long_answer": "no",}) result = embedder.run(documents=[doc]) ``` +You can use `FastembedRanker` by importing as: + +```python +from haystack import Document + +from haystack_integrations.components.rankers.fastembed import FastembedRanker + +query = "Who is maintaining Qdrant?" +documents = [ + Document( + content="This is built to be faster and lighter than other embedding libraries e.g. Transformers, Sentence-Transformers, etc." + ), + Document(content="fastembed is supported by and maintained by Qdrant."), +] + +ranker = FastembedRanker(model_name="Xenova/ms-marco-MiniLM-L-6-v2") +ranker.warm_up() +reranked_documents = ranker.run(query=query, documents=documents)["documents"] + +print(reranked_documents[0]) + +# Document(id=..., +# content: 'fastembed is supported by and maintained by Qdrant.', +# score: 5.472434997558594..) +``` + ## License `fastembed-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. diff --git a/integrations/fastembed/examples/ranker_example.py b/integrations/fastembed/examples/ranker_example.py index 7a31e4646..593334e90 100644 --- a/integrations/fastembed/examples/ranker_example.py +++ b/integrations/fastembed/examples/ranker_example.py @@ -15,7 +15,7 @@ reranked_documents = ranker.run(query=query, documents=documents)["documents"] -print(reranked_documents["documents"][0]) +print(reranked_documents[0]) # Document(id=..., # content: 'fastembed is supported by and maintained by Qdrant.',