Skip to content

Commit

Permalink
Pinecone: rename retriever (#396)
Browse files Browse the repository at this point in the history
* rename retriever

* fix linting and test

* fix pydoc config
  • Loading branch information
anakin87 authored Feb 12, 2024
1 parent d02286c commit 1167b8f
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 1,311 deletions.
50 changes: 50 additions & 0 deletions integrations/pinecone/examples/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Install the Pinecone integration, Haystack will come as a dependency
# Install also some optional dependencies needed for Markdown conversion and text embedding
# pip install -U pinecone-haystack markdown-it-py mdit_plain "sentence-transformers>=2.2.0"

# Download some markdown files to index
# git clone https://github.com/anakin87/neural-search-pills


# Create the indexing Pipeline and index some documents

import glob

from haystack import Pipeline
from haystack.components.converters import MarkdownToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter
from pinecone_haystack import PineconeDocumentStore
from pinecone_haystack.dense_retriever import PineconeEmbeddingRetriever

file_paths = glob.glob("neural-search-pills/pills/*.md")

document_store = PineconeDocumentStore(
api_key="YOUR-PINECONE-API-KEY", environment="gcp-starter", index="default", namespace="default", dimension=768
)

indexing = Pipeline()
indexing.add_component("converter", MarkdownToDocument())
indexing.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2))
indexing.add_component("embedder", SentenceTransformersDocumentEmbedder())
indexing.add_component("writer", DocumentWriter(document_store))
indexing.connect("converter", "splitter")
indexing.connect("splitter", "embedder")
indexing.connect("embedder", "writer")

indexing.run({"converter": {"sources": file_paths}})


# Create the querying Pipeline and try a query

querying = Pipeline()
querying.add_component("embedder", SentenceTransformersTextEmbedder())
querying.add_component("retriever", PineconeEmbeddingRetriever(document_store=document_store, top_k=3))
querying.connect("embedder", "retriever")

results = querying.run({"embedder": {"text": "What is Question Answering?"}})

for doc in results["retriever"]["documents"]:
print(doc)
print("-" * 10)
Loading

0 comments on commit 1167b8f

Please sign in to comment.