Skip to content

Commit

Permalink
Qdrant: add embedding retrieval example
Browse files Browse the repository at this point in the history
  • Loading branch information
anakin87 committed Apr 16, 2024
1 parent 0307c49 commit ab3d1a3
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 0 deletions.
52 changes: 52 additions & 0 deletions integrations/qdrant/examples/embedding_retrieval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Install required packages for this example, including qdrant-haystack and other libraries needed
# for Markdown conversion and embeddings generation. Use the following command:
# pip install qdrant-haystack markdown-it-py mdit_plain sentence-transformers

# Download some Markdown files to index.
# git clone https://github.com/anakin87/neural-search-pills

import glob

from haystack import Pipeline
from haystack.components.converters import MarkdownToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter
from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore

# Initialize QdrantDocumentStore: for simplicity, we use an in-memory store here.
# You can also run a Qdrant instance using Docker or use Qdrant Cloud.
document_store = QdrantDocumentStore(
":memory:",
index="Document",
embedding_dim=768,
recreate_index=True,
)

# Create the indexing Pipeline and index some documents
file_paths = glob.glob("neural-search-pills/pills/*.md")


indexing = Pipeline()
indexing.add_component("converter", MarkdownToDocument())
indexing.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2))
indexing.add_component("embedder", SentenceTransformersDocumentEmbedder())
indexing.add_component("writer", DocumentWriter(document_store))
indexing.connect("converter", "splitter")
indexing.connect("splitter", "embedder")
indexing.connect("embedder", "writer")

indexing.run({"converter": {"sources": file_paths}})

# Create the querying Pipeline and try a query
querying = Pipeline()
querying.add_component("embedder", SentenceTransformersTextEmbedder())
querying.add_component("retriever", QdrantEmbeddingRetriever(document_store=document_store, top_k=3))
querying.connect("embedder", "retriever")

results = querying.run({"embedder": {"text": "What is a cross-encoder?"}})

for doc in results["retriever"]["documents"]:
print(doc)
print("-" * 10)
2 changes: 2 additions & 0 deletions integrations/qdrant/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ ban-relative-imports = "parents"
[tool.ruff.per-file-ignores]
# Tests can use magic values, assertions, and relative imports
"tests/**/*" = ["PLR2004", "S101", "TID252"]
# examples can contain "print" commands
"examples/**/*" = ["T201"]


[tool.coverage.run]
Expand Down

0 comments on commit ab3d1a3

Please sign in to comment.