diff --git a/docs/core_docs/docs/modules/data_connection/retrievers/self_query/qdrant-self-query.mdx b/docs/core_docs/docs/modules/data_connection/retrievers/self_query/qdrant-self-query.mdx new file mode 100644 index 000000000000..9ac9c20d5785 --- /dev/null +++ b/docs/core_docs/docs/modules/data_connection/retrievers/self_query/qdrant-self-query.mdx @@ -0,0 +1,53 @@ +# Qdrant Self Query Retriever + +This example shows how to use a self query retriever with a Qdrant vector store. + +## Usage + +import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; + + + +```bash npm2yarn +npm install @langchain/openai @langchain/community @qdrant/js-client-rest +``` + +import CodeBlock from "@theme/CodeBlock"; +import Example from "@examples/retrievers/qdrant_self_query.ts"; + +{Example} + +You can also initialize the retriever with default search parameters that apply in +addition to the generated query: + +```typescript +const selfQueryRetriever = SelfQueryRetriever.fromLLM({ + llm, + vectorStore, + documentContents, + attributeInfo, + /** + * We need to create a basic translator that translates the queries into a + * filter format that the vector store can understand. We provide a basic translator here. + * You can create your own translator by extending BaseTranslator + * abstract class. Note that the vector store needs to support filtering on the metadata + * attributes you want to query on. + */ + structuredQueryTranslator: new QdrantTranslator(), + searchParams: { + filter: { + must: [ + { + key: "metadata.rating", + range: { + gt: 8.5, + }, + }, + ], + }, + mergeFiltersOperator: "and", + }, +}); +``` + +See the [official docs](https://qdrant.tech/documentation/concepts/filtering/) for more on how to construct metadata filters. diff --git a/examples/package.json b/examples/package.json index 62a828548b2a..f5350f36b751 100644 --- a/examples/package.json +++ b/examples/package.json @@ -55,6 +55,7 @@ "@pinecone-database/pinecone": "^2.2.0", "@planetscale/database": "^1.8.0", "@prisma/client": "^4.11.0", + "@qdrant/js-client-rest": "^1.8.2", "@raycast/api": "^1.55.2", "@rockset/client": "^0.9.1", "@supabase/supabase-js": "^2.10.0", diff --git a/examples/src/retrievers/qdrant_self_query.ts b/examples/src/retrievers/qdrant_self_query.ts new file mode 100644 index 000000000000..447ce4c318e3 --- /dev/null +++ b/examples/src/retrievers/qdrant_self_query.ts @@ -0,0 +1,134 @@ +import { AttributeInfo } from "langchain/schema/query_constructor"; +import { OpenAIEmbeddings, OpenAI } from "@langchain/openai"; +import { SelfQueryRetriever } from "langchain/retrievers/self_query"; +import { QdrantVectorStore } from "@langchain/community/vectorstores/qdrant"; +import { QdrantTranslator } from "@langchain/community/retrievers/self_query/qdrant"; +import { Document } from "@langchain/core/documents"; + +import { QdrantClient } from "@qdrant/js-client-rest"; + +/** + * First, we create a bunch of documents. You can load your own documents here instead. + * Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below. + */ +const docs = [ + new Document({ + pageContent: + "A bunch of scientists bring back dinosaurs and mayhem breaks loose", + metadata: { year: 1993, rating: 7.7, genre: "science fiction" }, + }), + new Document({ + pageContent: + "Leo DiCaprio gets lost in a dream within a dream within a dream within a ...", + metadata: { year: 2010, director: "Christopher Nolan", rating: 8.2 }, + }), + new Document({ + pageContent: + "A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea", + metadata: { year: 2006, director: "Satoshi Kon", rating: 8.6 }, + }), + new Document({ + pageContent: + "A bunch of normal-sized women are supremely wholesome and some men pine after them", + metadata: { year: 2019, director: "Greta Gerwig", rating: 8.3 }, + }), + new Document({ + pageContent: "Toys come alive and have a blast doing so", + metadata: { year: 1995, genre: "animated" }, + }), + new Document({ + pageContent: "Three men walk into the Zone, three men walk out of the Zone", + metadata: { + year: 1979, + director: "Andrei Tarkovsky", + genre: "science fiction", + rating: 9.9, + }, + }), +]; + +/** + * Next, we define the attributes we want to be able to query on. + * in this case, we want to be able to query on the genre, year, director, rating, and length of the movie. + * We also provide a description of each attribute and the type of the attribute. + * This is used to generate the query prompts. + */ +const attributeInfo: AttributeInfo[] = [ + { + name: "genre", + description: "The genre of the movie", + type: "string or array of strings", + }, + { + name: "year", + description: "The year the movie was released", + type: "number", + }, + { + name: "director", + description: "The director of the movie", + type: "string", + }, + { + name: "rating", + description: "The rating of the movie (1-10)", + type: "number", + }, + { + name: "length", + description: "The length of the movie in minutes", + type: "number", + }, +]; + +/** + * Next, we instantiate a vector store. This is where we store the embeddings of the documents. + * We also need to provide an embeddings object. This is used to embed the documents. + */ + +const QDRANT_URL = "http://127.0.0.1:6333"; +const QDRANT_COLLECTION_NAME = "some-collection-name"; + +const client = new QdrantClient({ url: QDRANT_URL }); + +const embeddings = new OpenAIEmbeddings(); +const llm = new OpenAI(); +const documentContents = "Brief summary of a movie"; +const vectorStore = await QdrantVectorStore.fromDocuments(docs, embeddings, { + client, + collectionName: QDRANT_COLLECTION_NAME, +}); +const selfQueryRetriever = SelfQueryRetriever.fromLLM({ + llm, + vectorStore, + documentContents, + attributeInfo, + /** + * We need to create a basic translator that translates the queries into a + * filter format that the vector store can understand. We provide a basic translator + * translator here, but you can create your own translator by extending BaseTranslator + * abstract class. Note that the vector store needs to support filtering on the metadata + * attributes you want to query on. + */ + structuredQueryTranslator: new QdrantTranslator(), +}); + +/** + * Now we can query the vector store. + * We can ask questions like "Which movies are less than 90 minutes?" or "Which movies are rated higher than 8.5?". + * We can also ask questions like "Which movies are either comedy or drama and are less than 90 minutes?". + * The retriever will automatically convert these questions into queries that can be used to retrieve documents. + */ +const query1 = await selfQueryRetriever.getRelevantDocuments( + "Which movies are less than 90 minutes?" +); +const query2 = await selfQueryRetriever.getRelevantDocuments( + "Which movies are rated higher than 8.5?" +); +const query3 = await selfQueryRetriever.getRelevantDocuments( + "Which cool movies are directed by Greta Gerwig?" +); +const query4 = await selfQueryRetriever.getRelevantDocuments( + "Which movies are either comedy or drama and are less than 90 minutes?" +); +console.log(query1, query2, query3, query4); diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index f67257823787..ceaabbe7c7d2 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -598,6 +598,10 @@ retrievers/zep.cjs retrievers/zep.js retrievers/zep.d.ts retrievers/zep.d.cts +retrievers/self_query/qdrant.cjs +retrievers/self_query/qdrant.js +retrievers/self_query/qdrant.d.ts +retrievers/self_query/qdrant.d.cts caches/cloudflare_kv.cjs caches/cloudflare_kv.js caches/cloudflare_kv.d.ts diff --git a/libs/langchain-community/langchain.config.js b/libs/langchain-community/langchain.config.js index c73fc75a6e09..be25cdaf15b9 100644 --- a/libs/langchain-community/langchain.config.js +++ b/libs/langchain-community/langchain.config.js @@ -184,6 +184,7 @@ export const config = { "retrievers/vectara_summary": "retrievers/vectara_summary", "retrievers/vespa": "retrievers/vespa", "retrievers/zep": "retrievers/zep", + "retrievers/self_query/qdrant": "retrievers/self_query/qdrant", // cache "caches/cloudflare_kv": "caches/cloudflare_kv", "caches/ioredis": "caches/ioredis", @@ -335,6 +336,7 @@ export const config = { "retrievers/supabase", "retrievers/vectara_summary", "retrievers/zep", + "retrievers/self_query/qdrant", "cache/cloudflare_kv", "cache/momento", "cache/upstash_redis", diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 9b4568a8f020..60d9ffb72efa 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -81,7 +81,7 @@ "@pinecone-database/pinecone": "^1.1.0", "@planetscale/database": "^1.8.0", "@premai/prem-sdk": "^0.3.25", - "@qdrant/js-client-rest": "^1.2.0", + "@qdrant/js-client-rest": "^1.8.2", "@raycast/api": "^1.55.2", "@rockset/client": "^0.9.1", "@smithy/eventstream-codec": "^2.0.5", @@ -210,7 +210,7 @@ "@pinecone-database/pinecone": "*", "@planetscale/database": "^1.8.0", "@premai/prem-sdk": "^0.3.25", - "@qdrant/js-client-rest": "^1.2.0", + "@qdrant/js-client-rest": "^1.8.2", "@raycast/api": "^1.55.2", "@rockset/client": "^0.9.1", "@smithy/eventstream-codec": "^2.0.5", @@ -1908,6 +1908,15 @@ "import": "./retrievers/zep.js", "require": "./retrievers/zep.cjs" }, + "./retrievers/self_query/qdrant": { + "types": { + "import": "./retrievers/self_query/qdrant.d.ts", + "require": "./retrievers/self_query/qdrant.d.cts", + "default": "./retrievers/self_query/qdrant.d.ts" + }, + "import": "./retrievers/self_query/qdrant.js", + "require": "./retrievers/self_query/qdrant.cjs" + }, "./caches/cloudflare_kv": { "types": { "import": "./caches/cloudflare_kv.d.ts", @@ -2890,6 +2899,10 @@ "retrievers/zep.js", "retrievers/zep.d.ts", "retrievers/zep.d.cts", + "retrievers/self_query/qdrant.cjs", + "retrievers/self_query/qdrant.js", + "retrievers/self_query/qdrant.d.ts", + "retrievers/self_query/qdrant.d.cts", "caches/cloudflare_kv.cjs", "caches/cloudflare_kv.js", "caches/cloudflare_kv.d.ts", diff --git a/libs/langchain-community/src/retrievers/self_query/qdrant.ts b/libs/langchain-community/src/retrievers/self_query/qdrant.ts new file mode 100644 index 000000000000..109c245138eb --- /dev/null +++ b/libs/langchain-community/src/retrievers/self_query/qdrant.ts @@ -0,0 +1,188 @@ +import { + isFilterEmpty, + castValue, + isInt, + isFloat, + BaseTranslator, + Comparator, + Comparators, + Comparison, + Operation, + Operator, + Operators, + StructuredQuery, + Visitor, +} from "@langchain/core/structured_query"; + +import { + QdrantVectorStore, + QdrantFilter, + QdrantCondition, +} from "../../vectorstores/qdrant.js"; + +/** + * A class that translates or converts `StructuredQuery` to equivalent Qdrant filters. + * @example + * ```typescript + * const selfQueryRetriever = new SelfQueryRetriever({ + * llm: new ChatOpenAI(), + * vectorStore: new QdrantVectorStore(...), + * documentContents: "Brief summary of a movie", + * attributeInfo: [], + * structuredQueryTranslator: new QdrantTranslator(), + * }); + * + * const relevantDocuments = await selfQueryRetriever.getRelevantDocuments( + * "Which movies are rated higher than 8.5?", + * ); + * ``` + */ +export class QdrantTranslator< + T extends QdrantVectorStore +> extends BaseTranslator { + declare VisitOperationOutput: QdrantFilter; + + declare VisitComparisonOutput: QdrantCondition; + + allowedOperators: Operator[] = [Operators.and, Operators.or, Operators.not]; + + allowedComparators: Comparator[] = [ + Comparators.eq, + Comparators.ne, + Comparators.lt, + Comparators.lte, + Comparators.gt, + Comparators.gte, + ]; + + /** + * Visits an operation and returns a QdrantFilter. + * @param operation The operation to visit. + * @returns A QdrantFilter. + */ + visitOperation(operation: Operation): this["VisitOperationOutput"] { + const args = operation.args?.map((arg) => arg.accept(this as Visitor)); + + const operator = { + [Operators.and]: "must", + [Operators.or]: "should", + [Operators.not]: "must_not", + }[operation.operator]; + + return { + [operator]: args, + }; + } + + /** + * Visits a comparison and returns a QdrantCondition. + * The value is casted to the correct type. + * The attribute is prefixed with "metadata.", + * since metadata is nested in the Qdrant payload. + * @param comparison The comparison to visit. + * @returns A QdrantCondition. + */ + visitComparison(comparison: Comparison): this["VisitComparisonOutput"] { + const attribute = `metadata.${comparison.attribute}`; + const value = castValue(comparison.value); + + if (comparison.comparator === "eq") { + return { + key: attribute, + match: { + value, + }, + }; + } else if (comparison.comparator === "ne") { + return { + key: attribute, + match: { + except: [value], + }, + }; + } + + if (!isInt(value) && !isFloat(value)) { + throw new Error("Value for gt, gte, lt, lte must be a number"); + } + + // For gt, gte, lt, lte, we need to use the range filter + return { + key: attribute, + range: { + [comparison.comparator]: value, + }, + }; + } + + /** + * Visits a structured query and returns a VisitStructuredQueryOutput. + * If the query has a filter, it is visited. + * @param query The structured query to visit. + * @returns An instance of VisitStructuredQueryOutput. + */ + visitStructuredQuery( + query: StructuredQuery + ): this["VisitStructuredQueryOutput"] { + let nextArg = {}; + if (query.filter) { + nextArg = { + filter: { must: [query.filter.accept(this as Visitor)] }, + }; + } + return nextArg; + } + + /** + * Merges two filters into one. If both filters are empty, returns + * undefined. If one filter is empty or the merge type is 'replace', + * returns the other filter. If the merge type is 'and' or 'or', returns a + * new filter with the merged results. Throws an error for unknown merge + * types. + * @param defaultFilter The default filter to merge. + * @param generatedFilter The generated filter to merge. + * @param mergeType The type of merge to perform. Can be 'and', 'or', or 'replace'. Defaults to 'and'. + * @param forceDefaultFilter If true, the default filter is always returned if the generated filter is empty. Defaults to false. + * @returns A merged QdrantFilter, or undefined if both filters are empty. + */ + mergeFilters( + defaultFilter: QdrantFilter | undefined, + generatedFilter: QdrantFilter | undefined, + mergeType = "and", + forceDefaultFilter = false + ): QdrantFilter | undefined { + if (isFilterEmpty(defaultFilter) && isFilterEmpty(generatedFilter)) { + return undefined; + } + if (isFilterEmpty(defaultFilter) || mergeType === "replace") { + if (isFilterEmpty(generatedFilter)) { + return undefined; + } + return generatedFilter; + } + if (isFilterEmpty(generatedFilter)) { + if (forceDefaultFilter) { + return defaultFilter; + } + if (mergeType === "and") { + return undefined; + } + return defaultFilter; + } + if (mergeType === "and") { + return { + must: [defaultFilter, generatedFilter], + }; + } else if (mergeType === "or") { + return { + should: [defaultFilter, generatedFilter], + }; + } else { + throw new Error("Unknown merge type"); + } + } + + formatFunction(): string { + throw new Error("Not implemented"); + } +} diff --git a/libs/langchain-community/src/retrievers/self_query/tests/qdrant_self_query.int.test.ts b/libs/langchain-community/src/retrievers/self_query/tests/qdrant_self_query.int.test.ts new file mode 100644 index 000000000000..64d3e0676d8a --- /dev/null +++ b/libs/langchain-community/src/retrievers/self_query/tests/qdrant_self_query.int.test.ts @@ -0,0 +1,408 @@ +// TODO: Add back in in 0.2.0 + +// import { test } from "@jest/globals"; +// import { Document } from "@langchain/core/documents"; +// import { OpenAIEmbeddings, OpenAI } from "@langchain/openai"; +// import { QdrantVectorStore } from "@langchain/community/vectorstores/qdrant"; +// import { AttributeInfo } from "@langchain/core/structured_query"; +// import { QdrantClient } from "@qdrant/js-client-rest"; +// import { SelfQueryRetriever } from "../index.js"; +// import { QdrantTranslator } from "../qdrant.js"; + +// test("Qdrant Vector Store Self Query Retriever Test", async () => { +// const docs = [ +// new Document({ +// pageContent: +// "A bunch of scientists bring back dinosaurs and mayhem breaks loose", +// metadata: { year: 1993, rating: 7.7, genre: "science fiction" }, +// }), +// new Document({ +// pageContent: +// "Leo DiCaprio gets lost in a dream within a dream within a dream within a ...", +// metadata: { year: 2010, director: "Christopher Nolan", rating: 8.2 }, +// }), +// new Document({ +// pageContent: +// "A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea", +// metadata: { year: 2006, director: "Satoshi Kon", rating: 8.6 }, +// }), +// new Document({ +// pageContent: +// "A bunch of normal-sized women are supremely wholesome and some men pine after them", +// metadata: { year: 2019, director: "Greta Gerwig", rating: 8.3 }, +// }), +// new Document({ +// pageContent: "Toys come alive and have a blast doing so", +// metadata: { year: 1995, genre: "animated" }, +// }), +// new Document({ +// pageContent: +// "Three men walk into the Zone, three men walk out of the Zone", +// metadata: { +// year: 1979, +// director: "Andrei Tarkovsky", +// genre: "science fiction", +// rating: 9.9, +// }, +// }), +// ]; + +// const attributeInfo: AttributeInfo[] = [ +// { +// name: "genre", +// description: "The genre of the movie", +// type: "string or array of strings", +// }, +// { +// name: "year", +// description: "The year the movie was released", +// type: "number", +// }, +// { +// name: "director", +// description: "The director of the movie", +// type: "string", +// }, +// { +// name: "rating", +// description: "The rating of the movie (1-10)", +// type: "number", +// }, +// { +// name: "length", +// description: "The length of the movie in minutes", +// type: "number", +// }, +// ]; + +// const embeddings = new OpenAIEmbeddings(); +// const llm = new OpenAI({ +// modelName: "gpt-3.5-turbo", +// temperature: 0, +// }); +// const documentContents = "Brief summary of a movie"; +// const client = new QdrantClient({ url: "http://127.0.0.1:6333" }); +// const vectorStore = await QdrantVectorStore.fromDocuments(docs, embeddings, { +// client, +// collectionName: crypto.randomUUID(), +// }); +// const selfQueryRetriever = SelfQueryRetriever.fromLLM({ +// llm, +// vectorStore, +// documentContents, +// attributeInfo, +// structuredQueryTranslator: new QdrantTranslator(), +// }); + +// const query1 = await selfQueryRetriever.getRelevantDocuments( +// "Which movies are less than 90 minutes?" +// ); + +// expect(query1.length).toEqual(0); + +// const query2 = await selfQueryRetriever.getRelevantDocuments( +// "Which movies are rated higher than 8.5?" +// ); + +// expect(query2.length).toEqual(2); + +// const query3 = await selfQueryRetriever.getRelevantDocuments( +// "Which cool movies are directed by Greta Gerwig?" +// ); + +// expect(query3.length).toEqual(1); +// }); + +// test("Qdrant Vector Store Self Query Retriever Test With Default Filter Or Merge Operator", async () => { +// const docs = [ +// new Document({ +// pageContent: +// "A bunch of scientists bring back dinosaurs and mayhem breaks loose", +// metadata: { +// year: 1993, +// rating: 7.7, +// genre: "science fiction", +// type: "movie", +// }, +// }), +// new Document({ +// pageContent: +// "Leo DiCaprio gets lost in a dream within a dream within a dream within a ...", +// metadata: { +// year: 2010, +// director: "Christopher Nolan", +// rating: 8.2, +// type: "movie", +// }, +// }), +// new Document({ +// pageContent: +// "A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea", +// metadata: { +// year: 2006, +// director: "Satoshi Kon", +// rating: 8.6, +// type: "movie", +// }, +// }), +// new Document({ +// pageContent: +// "A bunch of normal-sized women are supremely wholesome and some men pine after them", +// metadata: { +// year: 2019, +// director: "Greta Gerwig", +// rating: 8.3, +// type: "movie", +// }, +// }), +// new Document({ +// pageContent: "Toys come alive and have a blast doing so", +// metadata: { year: 1995, genre: "animated", type: "movie" }, +// }), +// new Document({ +// pageContent: +// "Three men walk into the Zone, three men walk out of the Zone", +// metadata: { +// year: 1979, +// director: "Andrei Tarkovsky", +// genre: "science fiction", +// rating: 9.9, +// type: "movie", +// }, +// }), +// new Document({ +// pageContent: "10x the previous gecs", +// metadata: { +// year: 2023, +// title: "10000 gecs", +// artist: "100 gecs", +// rating: 9.9, +// type: "album", +// }, +// }), +// ]; + +// const attributeInfo: AttributeInfo[] = [ +// { +// name: "genre", +// description: "The genre of the movie", +// type: "string or array of strings", +// }, +// { +// name: "year", +// description: "The year the movie was released", +// type: "number", +// }, +// { +// name: "director", +// description: "The director of the movie", +// type: "string", +// }, +// { +// name: "rating", +// description: "The rating of the movie (1-10)", +// type: "number", +// }, +// { +// name: "length", +// description: "The length of the movie in minutes", +// type: "number", +// }, +// ]; + +// const embeddings = new OpenAIEmbeddings(); +// const llm = new OpenAI({ +// modelName: "gpt-3.5-turbo", +// }); +// const documentContents = "Brief summary of a movie"; +// const client = new QdrantClient({ url: "http://127.0.0.1:6333" }); +// const vectorStore = await QdrantVectorStore.fromDocuments(docs, embeddings, { +// client, +// collectionName: crypto.randomUUID(), +// }); +// const selfQueryRetriever = SelfQueryRetriever.fromLLM({ +// llm, +// vectorStore, +// documentContents, +// attributeInfo, +// structuredQueryTranslator: new QdrantTranslator(), +// searchParams: { +// filter: { +// must: [{ key: "metadata.type", match: { value: "movie" } }], +// }, +// mergeFiltersOperator: "or", +// k: docs.length, +// }, +// }); + +// const query1 = await selfQueryRetriever.getRelevantDocuments( +// "Which movies are less than 90 minutes?" +// ); + +// expect(query1.length).toEqual(6); + +// const query2 = await selfQueryRetriever.getRelevantDocuments( +// "Which movies are rated higher than 8.5?" +// ); + +// expect(query2.length).toEqual(7); + +// const query3 = await selfQueryRetriever.getRelevantDocuments( +// "Which movies are directed by Greta Gerwig?" +// ); + +// expect(query3.length).toEqual(6); + +// const query4 = await selfQueryRetriever.getRelevantDocuments( +// "Awawawa au au au wawawawa hello?" +// ); + +// expect(query4.length).toEqual(6); // this one should return documents since default filter takes over +// }); + +// test("Qdrant Vector Store Self Query Retriever Test With Default Filter And Merge Operator", async () => { +// const docs = [ +// new Document({ +// pageContent: +// "A bunch of scientists bring back dinosaurs and mayhem breaks loose", +// metadata: { +// year: 1993, +// rating: 7.7, +// genre: "science fiction", +// type: "movie", +// }, +// }), +// new Document({ +// pageContent: +// "Leo DiCaprio gets lost in a dream within a dream within a dream within a ...", +// metadata: { +// year: 2010, +// director: "Christopher Nolan", +// rating: 8.2, +// type: "movie", +// }, +// }), +// new Document({ +// pageContent: +// "A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea", +// metadata: { +// year: 2006, +// director: "Satoshi Kon", +// rating: 8.6, +// type: "movie", +// }, +// }), +// new Document({ +// pageContent: +// "A bunch of normal-sized women are supremely wholesome and some men pine after them", +// metadata: { +// year: 2019, +// director: "Greta Gerwig", +// rating: 8.3, +// type: "movie", +// }, +// }), +// new Document({ +// pageContent: "Toys come alive and have a blast doing so", +// metadata: { year: 1995, genre: "animated", type: "movie" }, +// }), +// new Document({ +// pageContent: +// "Three men walk into the Zone, three men walk out of the Zone", +// metadata: { +// year: 1979, +// director: "Andrei Tarkovsky", +// genre: "science fiction", +// rating: 9.9, +// type: "movie", +// }, +// }), +// new Document({ +// pageContent: "10x the previous gecs", +// metadata: { +// year: 2023, +// title: "10000 gecs", +// artist: "100 gecs", +// rating: 9.9, +// type: "album", +// }, +// }), +// ]; + +// const attributeInfo: AttributeInfo[] = [ +// { +// name: "genre", +// description: "The genre of the movie", +// type: "string or array of strings", +// }, +// { +// name: "year", +// description: "The year the movie was released", +// type: "number", +// }, +// { +// name: "director", +// description: "The director of the movie", +// type: "string", +// }, +// { +// name: "rating", +// description: "The rating of the movie (1-10)", +// type: "number", +// }, +// { +// name: "length", +// description: "The length of the movie in minutes", +// type: "number", +// }, +// ]; + +// const embeddings = new OpenAIEmbeddings(); +// const llm = new OpenAI({ +// modelName: "gpt-3.5-turbo", +// }); +// const documentContents = "Brief summary of a movie"; +// const client = new QdrantClient({ url: "http://127.0.0.1:6333" }); +// const vectorStore = await QdrantVectorStore.fromDocuments(docs, embeddings, { +// client, +// collectionName: crypto.randomUUID(), +// }); +// const selfQueryRetriever = SelfQueryRetriever.fromLLM({ +// llm, +// vectorStore, +// documentContents, +// attributeInfo, +// structuredQueryTranslator: new QdrantTranslator(), +// searchParams: { +// filter: { +// must: [{ key: "metadata.type", match: { value: "movie" } }], +// }, +// mergeFiltersOperator: "and", +// k: docs.length, +// }, +// }); + +// const query1 = await selfQueryRetriever.getRelevantDocuments( +// "Which movies are less than 90 minutes?" +// ); + +// expect(query1.length).toEqual(0); + +// const query2 = await selfQueryRetriever.getRelevantDocuments( +// "Which movies are rated higher than 8.5?" +// ); + +// expect(query2.length).toEqual(2); + +// const query3 = await selfQueryRetriever.getRelevantDocuments( +// "Which cool movies are directed by Greta Gerwig?" +// ); + +// expect(query3.length).toEqual(1); + +// const query4 = await selfQueryRetriever.getRelevantDocuments( +// "Awawawa au au au wawawawa hello?" +// ); + +// expect(query4.length).toBeGreaterThan(0); // this one should return documents since default filter takes over +// }); diff --git a/libs/langchain-community/src/vectorstores/qdrant.ts b/libs/langchain-community/src/vectorstores/qdrant.ts index e2b6d5bc8024..e1b3e0919978 100644 --- a/libs/langchain-community/src/vectorstores/qdrant.ts +++ b/libs/langchain-community/src/vectorstores/qdrant.ts @@ -32,6 +32,10 @@ export type QdrantAddDocumentOptions = { customPayload: Record[]; }; +export type QdrantFilter = QdrantSchemas["Filter"]; + +export type QdrantCondition = QdrantSchemas["FieldCondition"]; + /** * Type for the response returned by a search operation in the Qdrant * database. It includes the score and payload (metadata and content) for @@ -51,6 +55,8 @@ type QdrantSearchResponse = QdrantSchemas["ScoredPoint"] & { * existence of a collection in the database. */ export class QdrantVectorStore extends VectorStore { + declare FilterType: QdrantFilter; + get lc_secrets(): { [key: string]: string } { return { apiKey: "QDRANT_API_KEY", @@ -176,7 +182,7 @@ export class QdrantVectorStore extends VectorStore { async similaritySearchVectorWithScore( query: number[], k?: number, - filter?: QdrantSchemas["Filter"] + filter?: this["FilterType"] ): Promise<[Document, number][]> { if (!query) { return []; diff --git a/yarn.lock b/yarn.lock index 6dfb086f7690..9b39a7732595 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8959,7 +8959,7 @@ __metadata: "@pinecone-database/pinecone": ^1.1.0 "@planetscale/database": ^1.8.0 "@premai/prem-sdk": ^0.3.25 - "@qdrant/js-client-rest": ^1.2.0 + "@qdrant/js-client-rest": ^1.8.2 "@raycast/api": ^1.55.2 "@rockset/client": ^0.9.1 "@smithy/eventstream-codec": ^2.0.5 @@ -9093,7 +9093,7 @@ __metadata: "@pinecone-database/pinecone": "*" "@planetscale/database": ^1.8.0 "@premai/prem-sdk": ^0.3.25 - "@qdrant/js-client-rest": ^1.2.0 + "@qdrant/js-client-rest": ^1.8.2 "@raycast/api": ^1.55.2 "@rockset/client": ^0.9.1 "@smithy/eventstream-codec": ^2.0.5 @@ -10975,22 +10975,23 @@ __metadata: languageName: node linkType: hard -"@qdrant/js-client-rest@npm:^1.2.0": - version: 1.2.0 - resolution: "@qdrant/js-client-rest@npm:1.2.0" +"@qdrant/js-client-rest@npm:^1.8.2": + version: 1.9.0 + resolution: "@qdrant/js-client-rest@npm:1.9.0" dependencies: - "@qdrant/openapi-typescript-fetch": ^1.2.0 - "@sevinf/maybe": ^0.5.0 + "@qdrant/openapi-typescript-fetch": 1.2.6 + "@sevinf/maybe": 0.5.0 + undici: ~5.28.4 peerDependencies: - typescript: ">=4.1" - checksum: 2cf7b33cbd3c912b5e063542e320b853486277afd847761abfe76c3917f02f1f80386e542e1b616ed04835af1cea7a0f5fbb4d0d56ab7213ad0e9ae44959cedf + typescript: ">=4.7" + checksum: 3ac6526a961f85e96eaa0cd17059cafcca208c7aaa6df96fa99caab75c53c79903eafa611be99b52732970f4bef65d93922537916194fe0b9edf0f256e42fa9a languageName: node linkType: hard -"@qdrant/openapi-typescript-fetch@npm:^1.2.0": - version: 1.2.0 - resolution: "@qdrant/openapi-typescript-fetch@npm:1.2.0" - checksum: dfddfe29bb843d957dc043b111618c7ae9ca7163fff9eca04c2d9ba2cd2fc00003548a2623e3ac0a1c79f2c9aae164eab0739ccb7d4d956d744d2263692a8d31 +"@qdrant/openapi-typescript-fetch@npm:1.2.6": + version: 1.2.6 + resolution: "@qdrant/openapi-typescript-fetch@npm:1.2.6" + checksum: 038c26b64da656ec6c16a92c2d90ad13bb154312ea0442f49c476d31ba66e7ac43344dcc3e580980c0bfba9ad37266f4341c254fb648197f92872e694b9205d7 languageName: node linkType: hard @@ -11263,7 +11264,7 @@ __metadata: languageName: node linkType: hard -"@sevinf/maybe@npm:^0.5.0": +"@sevinf/maybe@npm:0.5.0": version: 0.5.0 resolution: "@sevinf/maybe@npm:0.5.0" checksum: 406151dde7af0e05c51f2650eb0ccff429afe776f88e7c3cecb72197f28eb96f906eacace17d780052ce5aa6d2cd4b04a045a5ffabe3364ad6cdb9dfe42af3eb @@ -21783,6 +21784,7 @@ __metadata: "@pinecone-database/pinecone": ^2.2.0 "@planetscale/database": ^1.8.0 "@prisma/client": ^4.11.0 + "@qdrant/js-client-rest": ^1.8.2 "@raycast/api": ^1.55.2 "@rockset/client": ^0.9.1 "@supabase/supabase-js": ^2.10.0 @@ -35459,6 +35461,15 @@ __metadata: languageName: node linkType: hard +"undici@npm:~5.28.4": + version: 5.28.4 + resolution: "undici@npm:5.28.4" + dependencies: + "@fastify/busboy": ^2.0.0 + checksum: a8193132d84540e4dc1895ecc8dbaa176e8a49d26084d6fbe48a292e28397cd19ec5d13bc13e604484e76f94f6e334b2bdc740d5f06a6e50c44072818d0c19f9 + languageName: node + linkType: hard + "unherit@npm:^1.0.4": version: 1.1.3 resolution: "unherit@npm:1.1.3"