From 4f1b5f89bcc2da29febb3f88c75a2692fb35e70c Mon Sep 17 00:00:00 2001 From: Nicola Procopio Date: Thu, 15 Feb 2024 09:34:41 +0100 Subject: [PATCH] fastembed integration: add example (#401) * created project * added parallel param * updated test * version 0.0.1 * renamed folder * removed print * updated readme * added fastembed.yml * fix typos * python version to 3.9 for lint * updated file * force install black * return to original file * try to fix workflow * retry * add missing info to pyproject * add hatch-vcs to check version * Update pyproject.toml * fixed typos * removed python 3.9 * Update fastembed.yml * Update fastembed_document_embedder.py * Update fastembed_text_embedder.py * ignore errors for bool arguments * fix * try moving noqa * move noqa * formatted with black * added numpy dependency * removed numpy * removed numpy * make mypy happy * Update fastembed_backend.py * removed classvar * fix * Update pyproject.toml * added import numpy lint * skip docs generation for the time being * Update README.md * added config.yml * generate docs * Update fastembed.yml * Update config.yml * rm unnecessary from_dict * final touch * updated labeler.yml * updated library readme * fix typos * fix docstrings/README * added example.py * formatted with black * fixed formatting * fix --------- Co-authored-by: Stefano Fiorucci --- integrations/fastembed/example/example.py | 33 +++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 integrations/fastembed/example/example.py diff --git a/integrations/fastembed/example/example.py b/integrations/fastembed/example/example.py new file mode 100644 index 000000000..3e8077a79 --- /dev/null +++ b/integrations/fastembed/example/example.py @@ -0,0 +1,33 @@ +from haystack import Document, Pipeline +from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever +from haystack.document_stores.in_memory import InMemoryDocumentStore +from haystack_integrations.components.embedders.fastembed import FastembedDocumentEmbedder, FastembedTextEmbedder + +document_store = InMemoryDocumentStore(embedding_similarity_function="cosine") + +documents = [ + Document(content="My name is Wolfgang and I live in Berlin"), + Document(content="I saw a black horse running"), + Document(content="Germany has many big cities"), + Document(content="fastembed is supported by and maintained by Qdrant."), +] + +document_embedder = FastembedDocumentEmbedder() +document_embedder.warm_up() +documents_with_embeddings = document_embedder.run(documents)["documents"] +document_store.write_documents(documents_with_embeddings) + +query_pipeline = Pipeline() +query_pipeline.add_component("text_embedder", FastembedTextEmbedder()) +query_pipeline.add_component("retriever", InMemoryEmbeddingRetriever(document_store=document_store)) +query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") + +query = "Who supports fastembed?" + +result = query_pipeline.run({"text_embedder": {"text": query}}) + +print(result["retriever"]["documents"][0]) # noqa: T201 + +# Document(id=..., +# content: 'fastembed is supported by and maintained by Qdrant.', +# score: 0.758..)