From dd2613b82804d5a8ca8b3c50ae8d763f39680b40 Mon Sep 17 00:00:00 2001
From: Ashwin Mathur <97467100+awinml@users.noreply.github.com>
Date: Wed, 20 Mar 2024 02:56:48 +0530
Subject: [PATCH] Update example and description (#210)

---
 integrations/voyage.md | 50 ++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/integrations/voyage.md b/integrations/voyage.md
index 6ee1c13d..c8ec3cee 100644
--- a/integrations/voyage.md
+++ b/integrations/voyage.md
@@ -3,11 +3,11 @@ layout: integration
 name: Voyage AI
 description: A component for computing embeddings using Voyage AI embedding models - built for Haystack 2.0.
 authors:
-    - name: Ashwin Mathur
-      socials:
-        github: awinml
-        twitter: awinml
-        linkedin: ashwin-mathur-ds
+  - name: Ashwin Mathur
+    socials:
+      github: awinml
+      twitter: awinml
+      linkedin: ashwin-mathur-ds
 pypi: https://pypi.org/project/voyage-embedders-haystack/
 repo: https://github.com/awinml/voyage-embedders-haystack/tree/main
 type: Model Provider
@@ -17,8 +17,9 @@ version: Haystack 2.0
 toc: true
 ---
 
-[![PyPI](https://img.shields.io/pypi/v/voyage-embedders-haystack)](https://pypi.org/project/voyage-embedders-haystack/) 
-![PyPI - Python Version](https://img.shields.io/pypi/pyversions/voyage-embedders-haystack?logo=python&logoColor=gold) 
+[![PyPI](https://img.shields.io/pypi/v/voyage-embedders-haystack)](https://pypi.org/project/voyage-embedders-haystack/)
+![PyPI - Python Version](https://img.shields.io/pypi/pyversions/voyage-embedders-haystack?logo=python&logoColor=gold)
+
 ### **Table of Contents**
 
 - [Installation](#installation)
@@ -27,8 +28,7 @@ toc: true
 
 Custom component for [Haystack](https://github.com/deepset-ai/haystack) (2.x) for creating embeddings using the [VoyageAI Embedding Models](https://voyageai.com/).
 
-Voyage’s embedding models, `voyage-01` and `voyage-lite-01`, are state-of-the-art in retrieval accuracy. These models outperform top performing embedding models like `BAAI-bge` and `OpenAI text-embedding-ada-002` on the [MTEB Benchmark](https://github.com/embeddings-benchmark/mteb).
-
+Voyage’s embedding models, `voyage-2` and `voyage-2-code`, are state-of-the-art in retrieval accuracy. These models outperform top performing embedding models like `intfloat/e5-mistral-7b-instruct` and `OpenAI/text-embedding-3-large` on the [MTEB Benchmark](https://github.com/embeddings-benchmark/mteb). `voyage-2` is current ranked second on the [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard).
 
 ## Installation
 
@@ -40,17 +40,18 @@ pip install voyage-embedders-haystack
 
 You can use Voyage Embedding models with two components: [VoyageTextEmbedder](https://github.com/awinml/voyage-embedders-haystack/blob/main/src/voyage_embedders/voyage_text_embedder.py) and [VoyageDocumentEmbedder](https://github.com/awinml/voyage-embedders-haystack/blob/main/src/voyage_embedders/voyage_document_embedder.py).
 
-To create semantic embeddings for documents, use `VoyageDocumentEmbedder` in your indexing pipeline. For generating embeddings for queries, use `VoyageTextEmbedder`. Once you've selected the suitable component for your specific use case, initialize the component with the model name and Voyage AI API key. You can also
-set the environment variable "VOYAGE_API_KEY" instead of passing the api key as an argument.
+To create semantic embeddings for documents, use `VoyageDocumentEmbedder` in your indexing pipeline. For generating embeddings for queries, use `VoyageTextEmbedder`.
+
+Once you've selected the suitable component for your specific use case, initialize the component with the model name and VoyageAI API key. You can also
+set the environment variable `VOYAGE_API_KEY` instead of passing the API key as an argument.
 
 Information about the supported models, can be found on the [Embeddings Documentation.](https://docs.voyageai.com/embeddings/)
 
 To get an API key, please see the [Voyage AI website.](https://www.voyageai.com/)
 
-
 ## Example
 
-Below is the example Semantic Search pipeline that uses the [Simple Wikipedia](https://huggingface.co/datasets/pszemraj/simple_wikipedia) Dataset from HuggingFace. You can find more examples in the [`examples`](https://github.com/awinml/voyage-embedders-haystack/tree/main/examples) folder.  
+Below is the example Semantic Search pipeline that uses the [Simple Wikipedia](https://huggingface.co/datasets/pszemraj/simple_wikipedia) Dataset from HuggingFace. You can find more examples in the [`examples`](https://github.com/awinml/voyage-embedders-haystack/tree/main/examples) folder.
 
 Load the dataset:
 
@@ -64,8 +65,7 @@ from haystack.dataclasses import Document
 from haystack.document_stores.in_memory import InMemoryDocumentStore
 
 # Import Voyage Embedders
-from voyage_embedders.voyage_document_embedder import VoyageDocumentEmbedder
-from voyage_embedders.voyage_text_embedder import VoyageTextEmbedder
+from haystack_integrations.components.embedders.voyage_embedders import VoyageDocumentEmbedder, VoyageTextEmbedder
 
 # Load first 100 rows of the Simple Wikipedia Dataset from HuggingFace
 dataset = load_dataset("pszemraj/simple_wikipedia", split="validation[:100]")
@@ -86,17 +86,19 @@ Index the documents to the `InMemoryDocumentStore` using the `VoyageDocumentEmbe
 
 ```python
 doc_store = InMemoryDocumentStore(embedding_similarity_function="cosine")
+retriever = InMemoryEmbeddingRetriever(document_store=doc_store)
+doc_writer = DocumentWriter(document_store=doc_store)
+
 doc_embedder = VoyageDocumentEmbedder(
-    model_name="voyage-01",
+    model="voyage-2",
     input_type="document",
-    batch_size=8,
-    api_key="VOYAGE_API_KEY",
 )
+text_embedder = VoyageTextEmbedder(model="voyage-2", input_type="query")
 
 # Indexing Pipeline
 indexing_pipeline = Pipeline()
 indexing_pipeline.add_component(instance=doc_embedder, name="DocEmbedder")
-indexing_pipeline.add_component(instance=DocumentWriter(document_store=doc_store), name="DocWriter")
+indexing_pipeline.add_component(instance=doc_writer, name="DocWriter")
 indexing_pipeline.connect("DocEmbedder", "DocWriter")
 
 indexing_pipeline.run({"DocEmbedder": {"documents": docs}})
@@ -107,15 +109,15 @@ print(f"Embedding of first Document: {doc_store.filter_documents()[0].embedding}
 ```
 
 Query the Semantic Search Pipeline using the `InMemoryEmbeddingRetriever` and `VoyageTextEmbedder`:
+
 ```python
-text_embedder = VoyageTextEmbedder(model_name="voyage-01", input_type="query", api_key="VOYAGE_API_KEY")
+text_embedder = VoyageTextEmbedder(model="voyage-2", input_type="query")
 
 # Query Pipeline
 query_pipeline = Pipeline()
-query_pipeline.add_component("TextEmbedder", text_embedder)
-query_pipeline.add_component("Retriever", InMemoryEmbeddingRetriever(document_store=doc_store))
-query_pipeline.connect("TextEmbedder", "Retriever")
-
+query_pipeline.add_component(instance=text_embedder, name="TextEmbedder")
+query_pipeline.add_component(instance=retriever, name="Retriever")
+query_pipeline.connect("TextEmbedder.embedding", "Retriever.query_embedding")
 
 # Search
 results = query_pipeline.run({"TextEmbedder": {"text": "Which year did the Joker movie release?"}})