Skip to content

Commit

Permalink
community[minor]: feat: QdrantTranslator for self-query retrieval (#5163
Browse files Browse the repository at this point in the history
)

* feat: Qdrant self-query retriever

* docs: Qdrant self-query retriever

* Update lock, fix type

* Fix deps

* Move to community

* Revert

* Move

* Bump dep

---------

Co-authored-by: jacoblee93 <[email protected]>
  • Loading branch information
Anush008 and jacoblee93 authored Apr 26, 2024
1 parent dd7f528 commit 916114b
Show file tree
Hide file tree
Showing 10 changed files with 837 additions and 17 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Qdrant Self Query Retriever

This example shows how to use a self query retriever with a Qdrant vector store.

## Usage

import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx";

<IntegrationInstallTooltip></IntegrationInstallTooltip>

```bash npm2yarn
npm install @langchain/openai @langchain/community @qdrant/js-client-rest
```

import CodeBlock from "@theme/CodeBlock";
import Example from "@examples/retrievers/qdrant_self_query.ts";

<CodeBlock language="typescript">{Example}</CodeBlock>

You can also initialize the retriever with default search parameters that apply in
addition to the generated query:

```typescript
const selfQueryRetriever = SelfQueryRetriever.fromLLM({
llm,
vectorStore,
documentContents,
attributeInfo,
/**
* We need to create a basic translator that translates the queries into a
* filter format that the vector store can understand. We provide a basic translator here.
* You can create your own translator by extending BaseTranslator
* abstract class. Note that the vector store needs to support filtering on the metadata
* attributes you want to query on.
*/
structuredQueryTranslator: new QdrantTranslator(),
searchParams: {
filter: {
must: [
{
key: "metadata.rating",
range: {
gt: 8.5,
},
},
],
},
mergeFiltersOperator: "and",
},
});
```

See the [official docs](https://qdrant.tech/documentation/concepts/filtering/) for more on how to construct metadata filters.
1 change: 1 addition & 0 deletions examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
"@pinecone-database/pinecone": "^2.2.0",
"@planetscale/database": "^1.8.0",
"@prisma/client": "^4.11.0",
"@qdrant/js-client-rest": "^1.8.2",
"@raycast/api": "^1.55.2",
"@rockset/client": "^0.9.1",
"@supabase/supabase-js": "^2.10.0",
Expand Down
134 changes: 134 additions & 0 deletions examples/src/retrievers/qdrant_self_query.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import { AttributeInfo } from "langchain/schema/query_constructor";
import { OpenAIEmbeddings, OpenAI } from "@langchain/openai";
import { SelfQueryRetriever } from "langchain/retrievers/self_query";
import { QdrantVectorStore } from "@langchain/community/vectorstores/qdrant";
import { QdrantTranslator } from "@langchain/community/retrievers/self_query/qdrant";
import { Document } from "@langchain/core/documents";

import { QdrantClient } from "@qdrant/js-client-rest";

/**
* First, we create a bunch of documents. You can load your own documents here instead.
* Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below.
*/
const docs = [
new Document({
pageContent:
"A bunch of scientists bring back dinosaurs and mayhem breaks loose",
metadata: { year: 1993, rating: 7.7, genre: "science fiction" },
}),
new Document({
pageContent:
"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
metadata: { year: 2010, director: "Christopher Nolan", rating: 8.2 },
}),
new Document({
pageContent:
"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
metadata: { year: 2006, director: "Satoshi Kon", rating: 8.6 },
}),
new Document({
pageContent:
"A bunch of normal-sized women are supremely wholesome and some men pine after them",
metadata: { year: 2019, director: "Greta Gerwig", rating: 8.3 },
}),
new Document({
pageContent: "Toys come alive and have a blast doing so",
metadata: { year: 1995, genre: "animated" },
}),
new Document({
pageContent: "Three men walk into the Zone, three men walk out of the Zone",
metadata: {
year: 1979,
director: "Andrei Tarkovsky",
genre: "science fiction",
rating: 9.9,
},
}),
];

/**
* Next, we define the attributes we want to be able to query on.
* in this case, we want to be able to query on the genre, year, director, rating, and length of the movie.
* We also provide a description of each attribute and the type of the attribute.
* This is used to generate the query prompts.
*/
const attributeInfo: AttributeInfo[] = [
{
name: "genre",
description: "The genre of the movie",
type: "string or array of strings",
},
{
name: "year",
description: "The year the movie was released",
type: "number",
},
{
name: "director",
description: "The director of the movie",
type: "string",
},
{
name: "rating",
description: "The rating of the movie (1-10)",
type: "number",
},
{
name: "length",
description: "The length of the movie in minutes",
type: "number",
},
];

/**
* Next, we instantiate a vector store. This is where we store the embeddings of the documents.
* We also need to provide an embeddings object. This is used to embed the documents.
*/

const QDRANT_URL = "http://127.0.0.1:6333";
const QDRANT_COLLECTION_NAME = "some-collection-name";

const client = new QdrantClient({ url: QDRANT_URL });

const embeddings = new OpenAIEmbeddings();
const llm = new OpenAI();
const documentContents = "Brief summary of a movie";
const vectorStore = await QdrantVectorStore.fromDocuments(docs, embeddings, {
client,
collectionName: QDRANT_COLLECTION_NAME,
});
const selfQueryRetriever = SelfQueryRetriever.fromLLM({
llm,
vectorStore,
documentContents,
attributeInfo,
/**
* We need to create a basic translator that translates the queries into a
* filter format that the vector store can understand. We provide a basic translator
* translator here, but you can create your own translator by extending BaseTranslator
* abstract class. Note that the vector store needs to support filtering on the metadata
* attributes you want to query on.
*/
structuredQueryTranslator: new QdrantTranslator(),
});

/**
* Now we can query the vector store.
* We can ask questions like "Which movies are less than 90 minutes?" or "Which movies are rated higher than 8.5?".
* We can also ask questions like "Which movies are either comedy or drama and are less than 90 minutes?".
* The retriever will automatically convert these questions into queries that can be used to retrieve documents.
*/
const query1 = await selfQueryRetriever.getRelevantDocuments(
"Which movies are less than 90 minutes?"
);
const query2 = await selfQueryRetriever.getRelevantDocuments(
"Which movies are rated higher than 8.5?"
);
const query3 = await selfQueryRetriever.getRelevantDocuments(
"Which cool movies are directed by Greta Gerwig?"
);
const query4 = await selfQueryRetriever.getRelevantDocuments(
"Which movies are either comedy or drama and are less than 90 minutes?"
);
console.log(query1, query2, query3, query4);
4 changes: 4 additions & 0 deletions libs/langchain-community/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,10 @@ retrievers/zep.cjs
retrievers/zep.js
retrievers/zep.d.ts
retrievers/zep.d.cts
retrievers/self_query/qdrant.cjs
retrievers/self_query/qdrant.js
retrievers/self_query/qdrant.d.ts
retrievers/self_query/qdrant.d.cts
caches/cloudflare_kv.cjs
caches/cloudflare_kv.js
caches/cloudflare_kv.d.ts
Expand Down
2 changes: 2 additions & 0 deletions libs/langchain-community/langchain.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ export const config = {
"retrievers/vectara_summary": "retrievers/vectara_summary",
"retrievers/vespa": "retrievers/vespa",
"retrievers/zep": "retrievers/zep",
"retrievers/self_query/qdrant": "retrievers/self_query/qdrant",
// cache
"caches/cloudflare_kv": "caches/cloudflare_kv",
"caches/ioredis": "caches/ioredis",
Expand Down Expand Up @@ -335,6 +336,7 @@ export const config = {
"retrievers/supabase",
"retrievers/vectara_summary",
"retrievers/zep",
"retrievers/self_query/qdrant",
"cache/cloudflare_kv",
"cache/momento",
"cache/upstash_redis",
Expand Down
17 changes: 15 additions & 2 deletions libs/langchain-community/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
"@pinecone-database/pinecone": "^1.1.0",
"@planetscale/database": "^1.8.0",
"@premai/prem-sdk": "^0.3.25",
"@qdrant/js-client-rest": "^1.2.0",
"@qdrant/js-client-rest": "^1.8.2",
"@raycast/api": "^1.55.2",
"@rockset/client": "^0.9.1",
"@smithy/eventstream-codec": "^2.0.5",
Expand Down Expand Up @@ -210,7 +210,7 @@
"@pinecone-database/pinecone": "*",
"@planetscale/database": "^1.8.0",
"@premai/prem-sdk": "^0.3.25",
"@qdrant/js-client-rest": "^1.2.0",
"@qdrant/js-client-rest": "^1.8.2",
"@raycast/api": "^1.55.2",
"@rockset/client": "^0.9.1",
"@smithy/eventstream-codec": "^2.0.5",
Expand Down Expand Up @@ -1908,6 +1908,15 @@
"import": "./retrievers/zep.js",
"require": "./retrievers/zep.cjs"
},
"./retrievers/self_query/qdrant": {
"types": {
"import": "./retrievers/self_query/qdrant.d.ts",
"require": "./retrievers/self_query/qdrant.d.cts",
"default": "./retrievers/self_query/qdrant.d.ts"
},
"import": "./retrievers/self_query/qdrant.js",
"require": "./retrievers/self_query/qdrant.cjs"
},
"./caches/cloudflare_kv": {
"types": {
"import": "./caches/cloudflare_kv.d.ts",
Expand Down Expand Up @@ -2890,6 +2899,10 @@
"retrievers/zep.js",
"retrievers/zep.d.ts",
"retrievers/zep.d.cts",
"retrievers/self_query/qdrant.cjs",
"retrievers/self_query/qdrant.js",
"retrievers/self_query/qdrant.d.ts",
"retrievers/self_query/qdrant.d.cts",
"caches/cloudflare_kv.cjs",
"caches/cloudflare_kv.js",
"caches/cloudflare_kv.d.ts",
Expand Down
Loading

0 comments on commit 916114b

Please sign in to comment.