community[minor]: feat: QdrantTranslator for self-query retrieval (#5163

) * feat: Qdrant self-query retriever * docs: Qdrant self-query retriever * Update lock, fix type * Fix deps * Move to community * Revert * Move * Bump dep --------- Co-authored-by: jacoblee93 <[email protected]>
langchain-ai · Apr 26, 2024 · 916114b · 916114b
1 parent dd7f528
commit 916114b
Show file tree

Hide file tree

Showing 10 changed files with 837 additions and 17 deletions.
diff --git a/...e_docs/docs/modules/data_connection/retrievers/self_query/qdrant-self-query.mdx b/...e_docs/docs/modules/data_connection/retrievers/self_query/qdrant-self-query.mdx
@@ -0,0 +1,53 @@
+# Qdrant Self Query Retriever
+
+This example shows how to use a self query retriever with a Qdrant vector store.
+
+## Usage
+
+import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx";
+
+<IntegrationInstallTooltip></IntegrationInstallTooltip>
+
+```bash npm2yarn
+npm install @langchain/openai @langchain/community @qdrant/js-client-rest
+```
+
+import CodeBlock from "@theme/CodeBlock";
+import Example from "@examples/retrievers/qdrant_self_query.ts";
+
+<CodeBlock language="typescript">{Example}</CodeBlock>
+
+You can also initialize the retriever with default search parameters that apply in
+addition to the generated query:
+
+```typescript
+const selfQueryRetriever = SelfQueryRetriever.fromLLM({
+  llm,
+  vectorStore,
+  documentContents,
+  attributeInfo,
+  /**
+   * We need to create a basic translator that translates the queries into a
+   * filter format that the vector store can understand. We provide a basic translator here.
+   * You can create your own translator by extending BaseTranslator
+   * abstract class. Note that the vector store needs to support filtering on the metadata
+   * attributes you want to query on.
+   */
+  structuredQueryTranslator: new QdrantTranslator(),
+  searchParams: {
+    filter: {
+      must: [
+        {
+          key: "metadata.rating",
+          range: {
+            gt: 8.5,
+          },
+        },
+      ],
+    },
+    mergeFiltersOperator: "and",
+  },
+});
+```
+
+See the [official docs](https://qdrant.tech/documentation/concepts/filtering/) for more on how to construct metadata filters.
diff --git a/examples/package.json b/examples/package.json
@@ -55,6 +55,7 @@
     "@pinecone-database/pinecone": "^2.2.0",
     "@planetscale/database": "^1.8.0",
     "@prisma/client": "^4.11.0",
+    "@qdrant/js-client-rest": "^1.8.2",
     "@raycast/api": "^1.55.2",
     "@rockset/client": "^0.9.1",
     "@supabase/supabase-js": "^2.10.0",

diff --git a/examples/src/retrievers/qdrant_self_query.ts b/examples/src/retrievers/qdrant_self_query.ts
@@ -0,0 +1,134 @@
+import { AttributeInfo } from "langchain/schema/query_constructor";
+import { OpenAIEmbeddings, OpenAI } from "@langchain/openai";
+import { SelfQueryRetriever } from "langchain/retrievers/self_query";
+import { QdrantVectorStore } from "@langchain/community/vectorstores/qdrant";
+import { QdrantTranslator } from "@langchain/community/retrievers/self_query/qdrant";
+import { Document } from "@langchain/core/documents";
+
+import { QdrantClient } from "@qdrant/js-client-rest";
+
+/**
+ * First, we create a bunch of documents. You can load your own documents here instead.
+ * Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below.
+ */
+const docs = [
+  new Document({
+    pageContent:
+      "A bunch of scientists bring back dinosaurs and mayhem breaks loose",
+    metadata: { year: 1993, rating: 7.7, genre: "science fiction" },
+  }),
+  new Document({
+    pageContent:
+      "Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
+    metadata: { year: 2010, director: "Christopher Nolan", rating: 8.2 },
+  }),
+  new Document({
+    pageContent:
+      "A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
+    metadata: { year: 2006, director: "Satoshi Kon", rating: 8.6 },
+  }),
+  new Document({
+    pageContent:
+      "A bunch of normal-sized women are supremely wholesome and some men pine after them",
+    metadata: { year: 2019, director: "Greta Gerwig", rating: 8.3 },
+  }),
+  new Document({
+    pageContent: "Toys come alive and have a blast doing so",
+    metadata: { year: 1995, genre: "animated" },
+  }),
+  new Document({
+    pageContent: "Three men walk into the Zone, three men walk out of the Zone",
+    metadata: {
+      year: 1979,
+      director: "Andrei Tarkovsky",
+      genre: "science fiction",
+      rating: 9.9,
+    },
+  }),
+];
+
+/**
+ * Next, we define the attributes we want to be able to query on.
+ * in this case, we want to be able to query on the genre, year, director, rating, and length of the movie.
+ * We also provide a description of each attribute and the type of the attribute.
+ * This is used to generate the query prompts.
+ */
+const attributeInfo: AttributeInfo[] = [
+  {
+    name: "genre",
+    description: "The genre of the movie",
+    type: "string or array of strings",
+  },
+  {
+    name: "year",
+    description: "The year the movie was released",
+    type: "number",
+  },
+  {
+    name: "director",
+    description: "The director of the movie",
+    type: "string",
+  },
+  {
+    name: "rating",
+    description: "The rating of the movie (1-10)",
+    type: "number",
+  },
+  {
+    name: "length",
+    description: "The length of the movie in minutes",
+    type: "number",
+  },
+];
+
+/**
+ * Next, we instantiate a vector store. This is where we store the embeddings of the documents.
+ * We also need to provide an embeddings object. This is used to embed the documents.
+ */
+
+const QDRANT_URL = "http://127.0.0.1:6333";
+const QDRANT_COLLECTION_NAME = "some-collection-name";
+
+const client = new QdrantClient({ url: QDRANT_URL });
+
+const embeddings = new OpenAIEmbeddings();
+const llm = new OpenAI();
+const documentContents = "Brief summary of a movie";
+const vectorStore = await QdrantVectorStore.fromDocuments(docs, embeddings, {
+  client,
+  collectionName: QDRANT_COLLECTION_NAME,
+});
+const selfQueryRetriever = SelfQueryRetriever.fromLLM({
+  llm,
+  vectorStore,
+  documentContents,
+  attributeInfo,
+  /**
+   * We need to create a basic translator that translates the queries into a
+   * filter format that the vector store can understand. We provide a basic translator
+   * translator here, but you can create your own translator by extending BaseTranslator
+   * abstract class. Note that the vector store needs to support filtering on the metadata
+   * attributes you want to query on.
+   */
+  structuredQueryTranslator: new QdrantTranslator(),
+});
+
+/**
+ * Now we can query the vector store.
+ * We can ask questions like "Which movies are less than 90 minutes?" or "Which movies are rated higher than 8.5?".
+ * We can also ask questions like "Which movies are either comedy or drama and are less than 90 minutes?".
+ * The retriever will automatically convert these questions into queries that can be used to retrieve documents.
+ */
+const query1 = await selfQueryRetriever.getRelevantDocuments(
+  "Which movies are less than 90 minutes?"
+);
+const query2 = await selfQueryRetriever.getRelevantDocuments(
+  "Which movies are rated higher than 8.5?"
+);
+const query3 = await selfQueryRetriever.getRelevantDocuments(
+  "Which cool movies are directed by Greta Gerwig?"
+);
+const query4 = await selfQueryRetriever.getRelevantDocuments(
+  "Which movies are either comedy or drama and are less than 90 minutes?"
+);
+console.log(query1, query2, query3, query4);
diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore
@@ -598,6 +598,10 @@ retrievers/zep.cjs
 retrievers/zep.js
 retrievers/zep.d.ts
 retrievers/zep.d.cts
+retrievers/self_query/qdrant.cjs
+retrievers/self_query/qdrant.js
+retrievers/self_query/qdrant.d.ts
+retrievers/self_query/qdrant.d.cts
 caches/cloudflare_kv.cjs
 caches/cloudflare_kv.js
 caches/cloudflare_kv.d.ts

diff --git a/libs/langchain-community/langchain.config.js b/libs/langchain-community/langchain.config.js
@@ -184,6 +184,7 @@ export const config = {
     "retrievers/vectara_summary": "retrievers/vectara_summary",
     "retrievers/vespa": "retrievers/vespa",
     "retrievers/zep": "retrievers/zep",
+    "retrievers/self_query/qdrant": "retrievers/self_query/qdrant",
     // cache
     "caches/cloudflare_kv": "caches/cloudflare_kv",
     "caches/ioredis": "caches/ioredis",
@@ -335,6 +336,7 @@ export const config = {
     "retrievers/supabase",
     "retrievers/vectara_summary",
     "retrievers/zep",
+    "retrievers/self_query/qdrant",
     "cache/cloudflare_kv",
     "cache/momento",
     "cache/upstash_redis",

diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json
@@ -81,7 +81,7 @@
     "@pinecone-database/pinecone": "^1.1.0",
     "@planetscale/database": "^1.8.0",
     "@premai/prem-sdk": "^0.3.25",
-    "@qdrant/js-client-rest": "^1.2.0",
+    "@qdrant/js-client-rest": "^1.8.2",
     "@raycast/api": "^1.55.2",
     "@rockset/client": "^0.9.1",
     "@smithy/eventstream-codec": "^2.0.5",
@@ -210,7 +210,7 @@
     "@pinecone-database/pinecone": "*",
     "@planetscale/database": "^1.8.0",
     "@premai/prem-sdk": "^0.3.25",
-    "@qdrant/js-client-rest": "^1.2.0",
+    "@qdrant/js-client-rest": "^1.8.2",
     "@raycast/api": "^1.55.2",
     "@rockset/client": "^0.9.1",
     "@smithy/eventstream-codec": "^2.0.5",
@@ -1908,6 +1908,15 @@
       "import": "./retrievers/zep.js",
       "require": "./retrievers/zep.cjs"
     },
+    "./retrievers/self_query/qdrant": {
+      "types": {
+        "import": "./retrievers/self_query/qdrant.d.ts",
+        "require": "./retrievers/self_query/qdrant.d.cts",
+        "default": "./retrievers/self_query/qdrant.d.ts"
+      },
+      "import": "./retrievers/self_query/qdrant.js",
+      "require": "./retrievers/self_query/qdrant.cjs"
+    },
     "./caches/cloudflare_kv": {
       "types": {
         "import": "./caches/cloudflare_kv.d.ts",
@@ -2890,6 +2899,10 @@
     "retrievers/zep.js",
     "retrievers/zep.d.ts",
     "retrievers/zep.d.cts",
+    "retrievers/self_query/qdrant.cjs",
+    "retrievers/self_query/qdrant.js",
+    "retrievers/self_query/qdrant.d.ts",
+    "retrievers/self_query/qdrant.d.cts",
     "caches/cloudflare_kv.cjs",
     "caches/cloudflare_kv.js",
     "caches/cloudflare_kv.d.ts",