From ebf53b8e4f8c0fc003c3eaa458c3a2826451a3a6 Mon Sep 17 00:00:00 2001 From: Anush008 Date: Sun, 17 Dec 2023 15:01:38 +0530 Subject: [PATCH 1/8] feat: qdrant sparse vector retriever --- .../retrievers/__init__.py | 4 + .../qdrant_sparse_vector_retriever.py | 206 ++++++++++++++++++ .../test_qdrant_sparse_vector_retriever.py | 169 ++++++++++++++ 3 files changed, 379 insertions(+) create mode 100644 libs/community/langchain_community/retrievers/qdrant_sparse_vector_retriever.py create mode 100644 libs/community/tests/integration_tests/retrievers/test_qdrant_sparse_vector_retriever.py diff --git a/libs/community/langchain_community/retrievers/__init__.py b/libs/community/langchain_community/retrievers/__init__.py index 3eaed0a31ecb8..5785069cc0e4e 100644 --- a/libs/community/langchain_community/retrievers/__init__.py +++ b/libs/community/langchain_community/retrievers/__init__.py @@ -57,6 +57,9 @@ PineconeHybridSearchRetriever, ) from langchain_community.retrievers.pubmed import PubMedRetriever +from langchain_community.retrievers.qdrant_sparse_vector_retriever import ( + QdrantSparseVectorRetriever, +) from langchain_community.retrievers.remote_retriever import RemoteLangChainRetriever from langchain_community.retrievers.svm import SVMRetriever from langchain_community.retrievers.tavily_search_api import TavilySearchAPIRetriever @@ -93,6 +96,7 @@ "OutlineRetriever", "PineconeHybridSearchRetriever", "PubMedRetriever", + "QdrantSparseVectorRetriever", "RemoteLangChainRetriever", "SVMRetriever", "TavilySearchAPIRetriever", diff --git a/libs/community/langchain_community/retrievers/qdrant_sparse_vector_retriever.py b/libs/community/langchain_community/retrievers/qdrant_sparse_vector_retriever.py new file mode 100644 index 0000000000000..1b0c8292bd463 --- /dev/null +++ b/libs/community/langchain_community/retrievers/qdrant_sparse_vector_retriever.py @@ -0,0 +1,206 @@ +import uuid +from itertools import islice +from typing import ( + Any, + Callable, + Dict, + Generator, + Iterable, + List, + Optional, + Sequence, + Tuple, + cast, +) + +from langchain_core.callbacks import CallbackManagerForRetrieverRun +from langchain_core.documents import Document +from langchain_core.pydantic_v1 import Extra, root_validator +from langchain_core.retrievers import BaseRetriever + +from langchain_community.vectorstores.qdrant import Qdrant, QdrantException + + +class QdrantSparseVectorRetriever(BaseRetriever): + """Qdrant sparse vector retriever.""" + + client: Any + """'qdrant_client' instance to use.""" + collection_name: str + """Qdrant collection name.""" + sparse_vector_name: str + """Name of the sparse vector to use.""" + sparse_encoder: Callable[[str], Tuple[List[int], List[float]]] + """Sparse encoder function to use.""" + k: int = 4 + """Number of documents to return per query. Defaults to 4.""" + filter: Optional[Any] = None + """Qdrant qdrant_client.models.Filter to use for queries. Defaults to None.""" + content_payload_key: str = "content" + """Payload field containing the document content. Defaults to 'content'""" + metadata_payload_key: str = "metadata" + """Payload field containing the document metadata. Defaults to 'metadata'.""" + search_options: Dict[str, Any] = {} + """Additional search options to pass to qdrant_client.QdrantClient.search().""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that 'qdrant_client' python package exists in environment.""" + try: + from grpc import RpcError + from qdrant_client import QdrantClient, models + from qdrant_client.http.exceptions import UnexpectedResponse + except ImportError: + raise ImportError( + "Could not import qdrant-client python package. " + "Please install it with `pip install qdrant-client`." + ) + + client = values["client"] + if not isinstance(client, QdrantClient): + raise ValueError( + f"client should be an instance of qdrant_client.QdrantClient, " + f"got {type(client)}" + ) + + filter = values["filter"] + if filter is not None and not isinstance(filter, models.Filter): + raise ValueError( + f"filter should be an instance of qdrant_client.models.Filter, " + f"got {type(filter)}" + ) + + client = cast(QdrantClient, client) + + collection_name = values["collection_name"] + sparse_vector_name = values["sparse_vector_name"] + + try: + collection_info = client.get_collection(collection_name) + sparse_vectors_config = collection_info.config.params.sparse_vectors + + if sparse_vector_name not in sparse_vectors_config: + raise QdrantException( + f"Existing Qdrant collection {collection_name} does not " + f"contain sparse vector named {sparse_vector_name}." + f"Did you mean one of {', '.join(sparse_vectors_config.keys())}?" + ) + except (UnexpectedResponse, RpcError, ValueError): + raise QdrantException( + f"Qdrant collection {collection_name} does not exist." + ) + return values + + def _get_relevant_documents( + self, query: str, *, run_manager: CallbackManagerForRetrieverRun + ) -> List[Document]: + from qdrant_client import QdrantClient, models + + client = cast(QdrantClient, self.client) + query_indices, query_values = self.sparse_encoder(query) + results = client.search( + self.collection_name, + query_filter=self.filter, + query_vector=models.NamedSparseVector( + name=self.sparse_vector_name, + vector=models.SparseVector( + indices=query_indices, + values=query_values, + ), + ), + limit=self.k, + with_vectors=False, + **self.search_options, + ) + return [ + Qdrant._document_from_scored_point( + point, self.content_payload_key, self.metadata_payload_key + ) + for point in results + ] + + def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]: + """Run more documents through the embeddings and add to the vectorstore. + + Args: + documents (List[Document]: Documents to add to the vectorstore. + + Returns: + List[str]: List of IDs of the added texts. + """ + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + return self.add_texts(texts, metadatas, **kwargs) + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[Sequence[str]] = None, + batch_size: int = 64, + **kwargs: Any, + ) -> List[str]: + from qdrant_client import QdrantClient + + added_ids = [] + client = cast(QdrantClient, self.client) + for batch_ids, points in self._generate_rest_batches( + texts, metadatas, ids, batch_size + ): + client.upsert(self.collection_name, points=points, **kwargs) + added_ids.extend(batch_ids) + + return added_ids + + def _generate_rest_batches( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[Sequence[str]] = None, + batch_size: int = 64, + ) -> Generator[Tuple[List[str], List[Any]], None, None]: + from qdrant_client import models as rest + + texts_iterator = iter(texts) + metadatas_iterator = iter(metadatas or []) + ids_iterator = iter(ids or [uuid.uuid4().hex for _ in iter(texts)]) + while batch_texts := list(islice(texts_iterator, batch_size)): + # Take the corresponding metadata and id for each text in a batch + batch_metadatas = list(islice(metadatas_iterator, batch_size)) or None + batch_ids = list(islice(ids_iterator, batch_size)) + + # Generate the sparse embeddings for all the texts in a batch + batch_embeddings: List[Tuple[List[int], List[float]]] = [ + self.sparse_encoder(text) for text in batch_texts + ] + + points = [ + rest.PointStruct( + id=point_id, + vector={ + self.sparse_vector_name: rest.SparseVector( + indices=sparse_vector[0], + values=sparse_vector[1], + ) + }, + payload=payload, + ) + for point_id, sparse_vector, payload in zip( + batch_ids, + batch_embeddings, + Qdrant._build_payloads( + batch_texts, + batch_metadatas, + self.content_payload_key, + self.metadata_payload_key, + ), + ) + ] + + yield batch_ids, points diff --git a/libs/community/tests/integration_tests/retrievers/test_qdrant_sparse_vector_retriever.py b/libs/community/tests/integration_tests/retrievers/test_qdrant_sparse_vector_retriever.py new file mode 100644 index 0000000000000..ad7466b235855 --- /dev/null +++ b/libs/community/tests/integration_tests/retrievers/test_qdrant_sparse_vector_retriever.py @@ -0,0 +1,169 @@ +import random +import uuid +from typing import List, Tuple + +import pytest +from langchain_core.documents import Document + +from langchain_community.retrievers import QdrantSparseVectorRetriever +from langchain_community.vectorstores.qdrant import QdrantException + + +def consistent_fake_sparse_encoder( + query: str, size: int = 100, density: int = 0.7 +) -> Tuple[List[int], List[float]]: + """ + Generates a consistent fake sparse vector. + + Parameters: + - query (str): The query string to make the function deterministic. + - size (int): The size of the vector to generate. + - density (float): The density of the vector to generate. + + Returns: + - indices (list): List of indices where the non-zero elements are located. + - values (list): List of corresponding float values at the non-zero indices. + """ + # Ensure density is within the valid range [0, 1] + density = max(0.0, min(1.0, density)) + + # Use a deterministic seed based on the query + seed = hash(query) + random.seed(seed) + + # Calculate the number of non-zero elements based on density + num_non_zero_elements = int(size * density) + + # Generate random indices without replacement + indices = random.sample(range(size), num_non_zero_elements) + + # Generate random float values for the non-zero elements + values = [random.uniform(0.0, 1.0) for _ in range(num_non_zero_elements)] + + indices.sort() + return indices, values + + +@pytest.fixture +def retriever() -> QdrantSparseVectorRetriever: + from qdrant_client import QdrantClient, models + + client = QdrantClient() + + collection_name = uuid.uuid4().hex + vector_name = uuid.uuid4().hex + + client.recreate_collection( + collection_name, + vectors_config={}, + sparse_vectors_config={ + vector_name: models.SparseVectorParams( + index=models.SparseIndexParams( + on_disk=False, + ) + ) + }, + ) + + return QdrantSparseVectorRetriever( + client=client, + collection_name=collection_name, + sparse_vector_name=vector_name, + sparse_encoder=consistent_fake_sparse_encoder, + ) + + +def test_invalid_collection_name(retriever: QdrantSparseVectorRetriever) -> None: + with pytest.raises(QdrantException) as e: + QdrantSparseVectorRetriever( + client=retriever.client, + collection_name="invalid collection", + sparse_vector_name=retriever.sparse_vector_name, + sparse_encoder=consistent_fake_sparse_encoder, + ) + assert "does not exist" in str(e.value) + + +def test_invalid_sparse_vector_name(retriever: QdrantSparseVectorRetriever) -> None: + with pytest.raises(QdrantException) as e: + QdrantSparseVectorRetriever( + client=retriever.client, + collection_name=retriever.collection_name, + sparse_vector_name="invalid sparse vector", + sparse_encoder=consistent_fake_sparse_encoder, + ) + + assert "does not contain sparse vector" in str(e.value) + + +def test_add_documents(retriever: QdrantSparseVectorRetriever) -> None: + documents = [ + Document(page_content="hello world", metadata={"a": 1}), + Document(page_content="foo bar", metadata={"b": 2}), + Document(page_content="baz qux", metadata={"c": 3}), + ] + + ids = retriever.add_documents(documents) + + assert retriever.client.count(retriever.collection_name, exact=True).count == 3 + + documents = [ + Document(page_content="hello world"), + Document(page_content="foo bar"), + Document(page_content="baz qux"), + ] + + ids = retriever.add_documents(documents) + + assert len(ids) == 3 + + assert retriever.client.count(retriever.collection_name, exact=True).count == 6 + + +def test_add_texts(retriever: QdrantSparseVectorRetriever) -> None: + retriever.add_texts( + ["hello world", "foo bar", "baz qux"], [{"a": 1}, {"b": 2}, {"c": 3}] + ) + + assert retriever.client.count(retriever.collection_name, exact=True).count == 3 + + retriever.add_texts(["hello world", "foo bar", "baz qux"]) + + assert retriever.client.count(retriever.collection_name, exact=True).count == 6 + + +def test_get_relevant_documents(retriever: QdrantSparseVectorRetriever): + retriever.add_texts(["Hai there!", "Hello world!", "Foo bar baz!"]) + + expected = [Document(page_content="Hai there!")] + + retriever.k = 1 + results = retriever.get_relevant_documents("Hai there!") + + assert len(results) == retriever.k + assert results == expected + assert retriever.get_relevant_documents("Hai there!") == expected + + +def test_get_relevant_documents_with_filter(retriever: QdrantSparseVectorRetriever): + from qdrant_client import models + + retriever.add_texts( + ["Hai there!", "Hello world!", "Foo bar baz!"], + [ + {"value": 1}, + {"value": 2}, + {"value": 3}, + ], + ) + + retriever.filter = models.Filter( + must=[ + models.FieldCondition( + key="metadata.value", match=models.MatchValue(value=2) + ) + ] + ) + results = retriever.get_relevant_documents("Some query") + + assert results[0] == Document(page_content="Hello world!", metadata={"value": 2}) From 37fd9a4a145b52a82f540d9d1d81029509dfb313 Mon Sep 17 00:00:00 2001 From: Anush008 Date: Sun, 17 Dec 2023 17:46:17 +0530 Subject: [PATCH 2/8] docs: Added docs notebook --- .../retrievers/qdrant-sparse.ipynb | 238 ++++++++++++++++++ .../test_qdrant_sparse_vector_retriever.py | 3 +- .../qdrant_sparse_vector_retriever.py | 5 + 3 files changed, 244 insertions(+), 2 deletions(-) create mode 100644 docs/docs/integrations/retrievers/qdrant-sparse.ipynb create mode 100644 libs/langchain/langchain/retrievers/qdrant_sparse_vector_retriever.py diff --git a/docs/docs/integrations/retrievers/qdrant-sparse.ipynb b/docs/docs/integrations/retrievers/qdrant-sparse.ipynb new file mode 100644 index 0000000000000..6b36a2cf77de6 --- /dev/null +++ b/docs/docs/integrations/retrievers/qdrant-sparse.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ce0f17b9", + "metadata": {}, + "source": [ + "# Qdrant Sparse Vector Retriever\n", + "\n", + ">[Qdrant](https://qdrant.tech/) is an open-source, high-performance vector search engine/database.\n", + "\n", + "\n", + ">`QdrantSparseVectorRetriever` uses [sparse vectors](https://qdrant.tech/articles/sparse-vectors/) introduced in Qdrant [v1.7.0](https://qdrant.tech/articles/qdrant-1.7.x/) for document retrieval.\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "c307b082", + "metadata": {}, + "source": [ + "Set up the retriever:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "bba863a2-977c-4add-b5f4-bfc33a80eae5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#!pip install qdrant_client" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c10dd962", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from qdrant_client import QdrantClient, models\n", + "\n", + "client = QdrantClient(location=\":memory:\")\n", + "collection_name = \"sparse_collection\"\n", + "vector_name = \"sparse_vector\"\n", + "\n", + "client.create_collection(\n", + " collection_name,\n", + " vectors_config={},\n", + " sparse_vectors_config={\n", + " vector_name: models.SparseVectorParams(\n", + " index=models.SparseIndexParams(\n", + " on_disk=False,\n", + " )\n", + " )\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f47a2bfe", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.retrievers.qdrant_sparse_vector_retriever import (\n", + " QdrantSparseVectorRetriever,\n", + ")\n", + "from langchain_core.documents import Document" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "f2eff08e", + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "\n", + "def demo_encoder(_: str) -> tuple[list[int], list[float]]:\n", + " return (\n", + " sorted(random.sample(range(100), 100)),\n", + " [random.uniform(0.1, 1.0) for _ in range(100)],\n", + " )\n", + "# Create a retriever with a demo encoder\n", + "retriever = QdrantSparseVectorRetriever(\n", + " client=client,\n", + " collection_name=collection_name,\n", + " sparse_vector_name=vector_name,\n", + " sparse_encoder=demo_encoder,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "b68debff", + "metadata": {}, + "source": [ + "Add some data:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "cd8a7b17", + "metadata": {}, + "outputs": [], + "source": [ + "docs = [\n", + " Document(\n", + " metadata={\n", + " \"title\": \"Beyond Horizons: AI Chronicles\",\n", + " \"author\": \"Dr. Cassandra Mitchell\",\n", + " },\n", + " page_content=\"An in-depth exploration of the fascinating journey of artificial intelligence, narrated by Dr. Mitchell. This captivating account spans the historical roots, current advancements, and speculative futures of AI, offering a gripping narrative that intertwines technology, ethics, and societal implications.\",\n", + " ),\n", + " Document(\n", + " metadata={\n", + " \"title\": \"Synergy Nexus: Merging Minds with Machines\",\n", + " \"author\": \"Prof. Benjamin S. Anderson\",\n", + " },\n", + " page_content=\"Professor Anderson delves into the synergistic possibilities of human-machine collaboration in 'Synergy Nexus.' The book articulates a vision where humans and AI seamlessly coalesce, creating new dimensions of productivity, creativity, and shared intelligence.\",\n", + " ),\n", + " Document(\n", + " metadata={\n", + " \"title\": \"AI Dilemmas: Navigating the Unknown\",\n", + " \"author\": \"Dr. Elena Rodriguez\",\n", + " },\n", + " page_content=\"Dr. Rodriguez pens an intriguing narrative in 'AI Dilemmas,' probing the uncharted territories of ethical quandaries arising from AI advancements. The book serves as a compass, guiding readers through the complex terrain of moral decisions confronting developers, policymakers, and society as AI evolves.\",\n", + " ),\n", + " Document(\n", + " metadata={\n", + " \"title\": \"Sentient Threads: Weaving AI Consciousness\",\n", + " \"author\": \"Prof. Alexander J. Bennett\",\n", + " },\n", + " page_content=\"In 'Sentient Threads,' Professor Bennett unravels the enigma of AI consciousness, presenting a tapestry of arguments that scrutinize the very essence of machine sentience. The book ignites contemplation on the ethical and philosophical dimensions surrounding the quest for true AI awareness.\",\n", + " ),\n", + " Document(\n", + " metadata={\n", + " \"title\": \"Silent Alchemy: Unseen AI Alleviations\",\n", + " \"author\": \"Dr. Emily Foster\",\n", + " },\n", + " page_content=\"Building upon her previous work, Dr. Foster unveils 'Silent Alchemy,' a profound examination of the covert presence of AI in our daily lives. This illuminating piece reveals the subtle yet impactful ways in which AI invisibly shapes our routines, emphasizing the need for heightened awareness in our technology-driven world.\",\n", + " ),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "3c5970db", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['840573bcc59345628dd273be58478f82',\n", + " 'b5301f9b997343f197fbbc52f754477a',\n", + " '9c82735a2d49442ba6540948ef098890',\n", + " '331cf696182b49c4a997485fa82ebd04',\n", + " '06ee682d149a4f9ebbc3afe423fd72d3']" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.add_documents(docs)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "4fffd0af", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content=\"Building upon her previous work, Dr. Foster unveils 'Silent Alchemy,' a profound examination of the covert presence of AI in our daily lives. This illuminating piece reveals the subtle yet impactful ways in which AI invisibly shapes our routines, emphasizing the need for heightened awareness in our technology-driven world.\", metadata={'title': 'Silent Alchemy: Unseen AI Alleviations', 'author': 'Dr. Emily Foster'}),\n", + " Document(page_content=\"In 'Sentient Threads,' Professor Bennett unravels the enigma of AI consciousness, presenting a tapestry of arguments that scrutinize the very essence of machine sentience. The book ignites contemplation on the ethical and philosophical dimensions surrounding the quest for true AI awareness.\", metadata={'title': 'Sentient Threads: Weaving AI Consciousness', 'author': 'Prof. Alexander J. Bennett'}),\n", + " Document(page_content='An in-depth exploration of the fascinating journey of artificial intelligence, narrated by Dr. Mitchell. This captivating account spans the historical roots, current advancements, and speculative futures of AI, offering a gripping narrative that intertwines technology, ethics, and societal implications.', metadata={'title': 'Beyond Horizons: AI Chronicles', 'author': 'Dr. Cassandra Mitchell'}),\n", + " Document(page_content=\"Building upon her previous work, Dr. Foster unveils 'Silent Alchemy,' a profound examination of the covert presence of AI in our daily lives. This illuminating piece reveals the subtle yet impactful ways in which AI invisibly shapes our routines, emphasizing the need for heightened awareness in our technology-driven world.\", metadata={'title': 'Silent Alchemy: Unseen AI Alleviations', 'author': 'Dr. Emily Foster'})]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.get_relevant_documents(\n", + " \"Life and ethical dilemmas of AI\",\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/community/tests/integration_tests/retrievers/test_qdrant_sparse_vector_retriever.py b/libs/community/tests/integration_tests/retrievers/test_qdrant_sparse_vector_retriever.py index ad7466b235855..505519a1b90ce 100644 --- a/libs/community/tests/integration_tests/retrievers/test_qdrant_sparse_vector_retriever.py +++ b/libs/community/tests/integration_tests/retrievers/test_qdrant_sparse_vector_retriever.py @@ -35,12 +35,11 @@ def consistent_fake_sparse_encoder( num_non_zero_elements = int(size * density) # Generate random indices without replacement - indices = random.sample(range(size), num_non_zero_elements) + indices = sorted(random.sample(range(size), num_non_zero_elements)) # Generate random float values for the non-zero elements values = [random.uniform(0.0, 1.0) for _ in range(num_non_zero_elements)] - indices.sort() return indices, values diff --git a/libs/langchain/langchain/retrievers/qdrant_sparse_vector_retriever.py b/libs/langchain/langchain/retrievers/qdrant_sparse_vector_retriever.py new file mode 100644 index 0000000000000..9c701fc1817cb --- /dev/null +++ b/libs/langchain/langchain/retrievers/qdrant_sparse_vector_retriever.py @@ -0,0 +1,5 @@ +from langchain_community.retrievers.qdrant_sparse_vector_retriever import ( + QdrantSparseVectorRetriever, +) + +__all__ = ["QdrantSparseVectorRetriever"] From 57a880030c84e9f63d973f1d35d7df544ef0dfb8 Mon Sep 17 00:00:00 2001 From: Anush008 Date: Sun, 17 Dec 2023 17:53:57 +0530 Subject: [PATCH 3/8] chore: lint updates --- .../retrievers/qdrant-sparse.ipynb | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/docs/docs/integrations/retrievers/qdrant-sparse.ipynb b/docs/docs/integrations/retrievers/qdrant-sparse.ipynb index 6b36a2cf77de6..8ae2471729e1c 100644 --- a/docs/docs/integrations/retrievers/qdrant-sparse.ipynb +++ b/docs/docs/integrations/retrievers/qdrant-sparse.ipynb @@ -19,7 +19,7 @@ "id": "c307b082", "metadata": {}, "source": [ - "Set up the retriever:" + "Install the 'qdrant_client' package:" ] }, { @@ -84,6 +84,14 @@ "from langchain_core.documents import Document" ] }, + { + "cell_type": "markdown", + "id": "41baa0d1", + "metadata": {}, + "source": [ + "Create a demo encoder function:" + ] + }, { "cell_type": "code", "execution_count": 24, @@ -93,11 +101,14 @@ "source": [ "import random\n", "\n", + "\n", "def demo_encoder(_: str) -> tuple[list[int], list[float]]:\n", " return (\n", " sorted(random.sample(range(100), 100)),\n", " [random.uniform(0.1, 1.0) for _ in range(100)],\n", " )\n", + "\n", + "\n", "# Create a retriever with a demo encoder\n", "retriever = QdrantSparseVectorRetriever(\n", " client=client,\n", @@ -113,7 +124,7 @@ "id": "b68debff", "metadata": {}, "source": [ - "Add some data:" + "Add some documents:" ] }, { @@ -162,6 +173,14 @@ "]" ] }, + { + "cell_type": "markdown", + "id": "a5e673fa", + "metadata": {}, + "source": [ + "Perform a retrieval:" + ] + }, { "cell_type": "code", "execution_count": 26, From 02cf706cc3a40c6d1fb59d4e9dd3e2e018323b14 Mon Sep 17 00:00:00 2001 From: Anush008 Date: Sun, 17 Dec 2023 18:06:32 +0530 Subject: [PATCH 4/8] test: "QdrantSparseVectorRetriever" in exports --- libs/community/tests/unit_tests/retrievers/test_imports.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/community/tests/unit_tests/retrievers/test_imports.py b/libs/community/tests/unit_tests/retrievers/test_imports.py index 04ebf72d5eae4..2f7875979826c 100644 --- a/libs/community/tests/unit_tests/retrievers/test_imports.py +++ b/libs/community/tests/unit_tests/retrievers/test_imports.py @@ -24,6 +24,7 @@ "OutlineRetriever", "PineconeHybridSearchRetriever", "PubMedRetriever", + "QdrantSparseVectorRetriever", "RemoteLangChainRetriever", "SVMRetriever", "TavilySearchAPIRetriever", From 82d007c6d2f8cf229b47254b360b1942cf9887a1 Mon Sep 17 00:00:00 2001 From: Anush008 Date: Mon, 18 Dec 2023 13:05:51 +0530 Subject: [PATCH 5/8] chore: update test return types --- .../retrievers/test_qdrant_sparse_vector_retriever.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libs/community/tests/integration_tests/retrievers/test_qdrant_sparse_vector_retriever.py b/libs/community/tests/integration_tests/retrievers/test_qdrant_sparse_vector_retriever.py index 505519a1b90ce..bba6a17041af6 100644 --- a/libs/community/tests/integration_tests/retrievers/test_qdrant_sparse_vector_retriever.py +++ b/libs/community/tests/integration_tests/retrievers/test_qdrant_sparse_vector_retriever.py @@ -10,7 +10,7 @@ def consistent_fake_sparse_encoder( - query: str, size: int = 100, density: int = 0.7 + query: str, size: int = 100, density: float = 0.7 ) -> Tuple[List[int], List[float]]: """ Generates a consistent fake sparse vector. @@ -47,7 +47,7 @@ def consistent_fake_sparse_encoder( def retriever() -> QdrantSparseVectorRetriever: from qdrant_client import QdrantClient, models - client = QdrantClient() + client = QdrantClient(location=":memory:") collection_name = uuid.uuid4().hex vector_name = uuid.uuid4().hex @@ -131,7 +131,7 @@ def test_add_texts(retriever: QdrantSparseVectorRetriever) -> None: assert retriever.client.count(retriever.collection_name, exact=True).count == 6 -def test_get_relevant_documents(retriever: QdrantSparseVectorRetriever): +def test_get_relevant_documents(retriever: QdrantSparseVectorRetriever) -> None: retriever.add_texts(["Hai there!", "Hello world!", "Foo bar baz!"]) expected = [Document(page_content="Hai there!")] @@ -144,7 +144,9 @@ def test_get_relevant_documents(retriever: QdrantSparseVectorRetriever): assert retriever.get_relevant_documents("Hai there!") == expected -def test_get_relevant_documents_with_filter(retriever: QdrantSparseVectorRetriever): +def test_get_relevant_documents_with_filter( + retriever: QdrantSparseVectorRetriever, +) -> None: from qdrant_client import models retriever.add_texts( From 5ae1e4705150fa2dce9f048f61070933b43a07dd Mon Sep 17 00:00:00 2001 From: Anush008 Date: Tue, 19 Dec 2023 09:15:26 +0530 Subject: [PATCH 6/8] chore: removed re-export --- .../retrievers/qdrant-sparse.ipynb | 34 +++++++++++++++---- .../qdrant_sparse_vector_retriever.py | 5 --- 2 files changed, 27 insertions(+), 12 deletions(-) delete mode 100644 libs/langchain/langchain/retrievers/qdrant_sparse_vector_retriever.py diff --git a/docs/docs/integrations/retrievers/qdrant-sparse.ipynb b/docs/docs/integrations/retrievers/qdrant-sparse.ipynb index 8ae2471729e1c..4e0356d3dc092 100644 --- a/docs/docs/integrations/retrievers/qdrant-sparse.ipynb +++ b/docs/docs/integrations/retrievers/qdrant-sparse.ipynb @@ -24,12 +24,23 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "bba863a2-977c-4add-b5f4-bfc33a80eae5", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mRunning cells with '.venv' requires the ipykernel package.\n", + "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n", + "\u001b[1;31mCommand: '/Users/anush/Desktop/langchain/libs/community/.venv/bin/python -m pip install ipykernel -U --force-reinstall'" + ] + } + ], "source": [ "#!pip install qdrant_client" ] @@ -73,14 +84,23 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "f47a2bfe", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mRunning cells with '.venv' requires the ipykernel package.\n", + "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n", + "\u001b[1;31mCommand: '/Users/anush/Desktop/langchain/libs/community/.venv/bin/python -m pip install ipykernel -U --force-reinstall'" + ] + } + ], "source": [ - "from langchain.retrievers.qdrant_sparse_vector_retriever import (\n", - " QdrantSparseVectorRetriever,\n", - ")\n", + "from langchain_community.retrievers import QdrantSparseVectorRetriever\n", "from langchain_core.documents import Document" ] }, diff --git a/libs/langchain/langchain/retrievers/qdrant_sparse_vector_retriever.py b/libs/langchain/langchain/retrievers/qdrant_sparse_vector_retriever.py deleted file mode 100644 index 9c701fc1817cb..0000000000000 --- a/libs/langchain/langchain/retrievers/qdrant_sparse_vector_retriever.py +++ /dev/null @@ -1,5 +0,0 @@ -from langchain_community.retrievers.qdrant_sparse_vector_retriever import ( - QdrantSparseVectorRetriever, -) - -__all__ = ["QdrantSparseVectorRetriever"] From 02f62794ce953022c426263c0a2b89492200c608 Mon Sep 17 00:00:00 2001 From: Anush008 Date: Tue, 19 Dec 2023 09:18:09 +0530 Subject: [PATCH 7/8] docs: updated notebook --- .../retrievers/qdrant-sparse.ipynb | 41 ++----------------- 1 file changed, 4 insertions(+), 37 deletions(-) diff --git a/docs/docs/integrations/retrievers/qdrant-sparse.ipynb b/docs/docs/integrations/retrievers/qdrant-sparse.ipynb index 4e0356d3dc092..da2087bb80e0a 100644 --- a/docs/docs/integrations/retrievers/qdrant-sparse.ipynb +++ b/docs/docs/integrations/retrievers/qdrant-sparse.ipynb @@ -29,39 +29,17 @@ "metadata": { "tags": [] }, - "outputs": [ - { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mRunning cells with '.venv' requires the ipykernel package.\n", - "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n", - "\u001b[1;31mCommand: '/Users/anush/Desktop/langchain/libs/community/.venv/bin/python -m pip install ipykernel -U --force-reinstall'" - ] - } - ], + "outputs": [], "source": [ "#!pip install qdrant_client" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "c10dd962", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from qdrant_client import QdrantClient, models\n", "\n", @@ -87,18 +65,7 @@ "execution_count": null, "id": "f47a2bfe", "metadata": {}, - "outputs": [ - { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mRunning cells with '.venv' requires the ipykernel package.\n", - "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n", - "\u001b[1;31mCommand: '/Users/anush/Desktop/langchain/libs/community/.venv/bin/python -m pip install ipykernel -U --force-reinstall'" - ] - } - ], + "outputs": [], "source": [ "from langchain_community.retrievers import QdrantSparseVectorRetriever\n", "from langchain_core.documents import Document" From 9d0a0e71a2c691c06a97362b356608530ce68350 Mon Sep 17 00:00:00 2001 From: Anush008 Date: Tue, 19 Dec 2023 09:23:02 +0530 Subject: [PATCH 8/8] docs: re-run notebook usage example --- .../retrievers/qdrant-sparse.ipynb | 49 ++++++++++++------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/docs/docs/integrations/retrievers/qdrant-sparse.ipynb b/docs/docs/integrations/retrievers/qdrant-sparse.ipynb index da2087bb80e0a..f5241f7f551a5 100644 --- a/docs/docs/integrations/retrievers/qdrant-sparse.ipynb +++ b/docs/docs/integrations/retrievers/qdrant-sparse.ipynb @@ -31,15 +31,26 @@ }, "outputs": [], "source": [ - "#!pip install qdrant_client" + "%pip install qdrant_client" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "c10dd962", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from qdrant_client import QdrantClient, models\n", "\n", @@ -62,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "f47a2bfe", "metadata": {}, "outputs": [], @@ -81,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 7, "id": "f2eff08e", "metadata": {}, "outputs": [], @@ -116,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 8, "id": "cd8a7b17", "metadata": {}, "outputs": [], @@ -170,21 +181,21 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 9, "id": "3c5970db", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['840573bcc59345628dd273be58478f82',\n", - " 'b5301f9b997343f197fbbc52f754477a',\n", - " '9c82735a2d49442ba6540948ef098890',\n", - " '331cf696182b49c4a997485fa82ebd04',\n", - " '06ee682d149a4f9ebbc3afe423fd72d3']" + "['1a3e0d292e6444d39451d0588ce746dc',\n", + " '19b180dd31e749359d49967e5d5dcab7',\n", + " '8de69e56086f47748e32c9e379e6865b',\n", + " 'f528fac385954e46b89cf8607bf0ee5a',\n", + " 'c1a6249d005d4abd9192b1d0b829cebe']" ] }, - "execution_count": 26, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -195,20 +206,20 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 10, "id": "4fffd0af", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[Document(page_content=\"Building upon her previous work, Dr. Foster unveils 'Silent Alchemy,' a profound examination of the covert presence of AI in our daily lives. This illuminating piece reveals the subtle yet impactful ways in which AI invisibly shapes our routines, emphasizing the need for heightened awareness in our technology-driven world.\", metadata={'title': 'Silent Alchemy: Unseen AI Alleviations', 'author': 'Dr. Emily Foster'}),\n", - " Document(page_content=\"In 'Sentient Threads,' Professor Bennett unravels the enigma of AI consciousness, presenting a tapestry of arguments that scrutinize the very essence of machine sentience. The book ignites contemplation on the ethical and philosophical dimensions surrounding the quest for true AI awareness.\", metadata={'title': 'Sentient Threads: Weaving AI Consciousness', 'author': 'Prof. Alexander J. Bennett'}),\n", - " Document(page_content='An in-depth exploration of the fascinating journey of artificial intelligence, narrated by Dr. Mitchell. This captivating account spans the historical roots, current advancements, and speculative futures of AI, offering a gripping narrative that intertwines technology, ethics, and societal implications.', metadata={'title': 'Beyond Horizons: AI Chronicles', 'author': 'Dr. Cassandra Mitchell'}),\n", - " Document(page_content=\"Building upon her previous work, Dr. Foster unveils 'Silent Alchemy,' a profound examination of the covert presence of AI in our daily lives. This illuminating piece reveals the subtle yet impactful ways in which AI invisibly shapes our routines, emphasizing the need for heightened awareness in our technology-driven world.\", metadata={'title': 'Silent Alchemy: Unseen AI Alleviations', 'author': 'Dr. Emily Foster'})]" + "[Document(page_content=\"In 'Sentient Threads,' Professor Bennett unravels the enigma of AI consciousness, presenting a tapestry of arguments that scrutinize the very essence of machine sentience. The book ignites contemplation on the ethical and philosophical dimensions surrounding the quest for true AI awareness.\", metadata={'title': 'Sentient Threads: Weaving AI Consciousness', 'author': 'Prof. Alexander J. Bennett'}),\n", + " Document(page_content=\"Dr. Rodriguez pens an intriguing narrative in 'AI Dilemmas,' probing the uncharted territories of ethical quandaries arising from AI advancements. The book serves as a compass, guiding readers through the complex terrain of moral decisions confronting developers, policymakers, and society as AI evolves.\", metadata={'title': 'AI Dilemmas: Navigating the Unknown', 'author': 'Dr. Elena Rodriguez'}),\n", + " Document(page_content=\"Professor Anderson delves into the synergistic possibilities of human-machine collaboration in 'Synergy Nexus.' The book articulates a vision where humans and AI seamlessly coalesce, creating new dimensions of productivity, creativity, and shared intelligence.\", metadata={'title': 'Synergy Nexus: Merging Minds with Machines', 'author': 'Prof. Benjamin S. Anderson'}),\n", + " Document(page_content='An in-depth exploration of the fascinating journey of artificial intelligence, narrated by Dr. Mitchell. This captivating account spans the historical roots, current advancements, and speculative futures of AI, offering a gripping narrative that intertwines technology, ethics, and societal implications.', metadata={'title': 'Beyond Horizons: AI Chronicles', 'author': 'Dr. Cassandra Mitchell'})]" ] }, - "execution_count": 32, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" }