Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(community): add mmr search to pgvector #7438

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 53 additions & 2 deletions libs/langchain-community/src/vectorstores/pgvector.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import pg, { type Pool, type PoolClient, type PoolConfig } from "pg";
import { VectorStore } from "@langchain/core/vectorstores";
import {
MaxMarginalRelevanceSearchOptions,
VectorStore,
} from "@langchain/core/vectorstores";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import { Document } from "@langchain/core/documents";
import { getEnvironmentVariable } from "@langchain/core/utils/env";
import { maximalMarginalRelevance } from "@langchain/core/utils/math";

type Metadata = Record<string, unknown>;

Expand Down Expand Up @@ -609,12 +613,14 @@ export class PGVectorStore extends VectorStore {
* @param query - Query vector.
* @param k - Number of most similar documents to return.
* @param filter - Optional filter to apply to the search.
* @param includeEmbedding Whether to include the embedding vectors in the results.
* @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score.
*/
async similaritySearchVectorWithScore(
query: number[],
k: number,
filter?: this["FilterType"]
filter?: this["FilterType"],
includeEmbedding?: boolean
): Promise<[Document, number][]> {
const embeddingString = `[${query.join(",")}]`;
const _filter: this["FilterType"] = filter ?? {};
Expand Down Expand Up @@ -694,6 +700,9 @@ export class PGVectorStore extends VectorStore {
metadata: doc[this.metadataColumnName],
id: doc[this.idColumnName],
});
if (includeEmbedding) {
document.metadata[this.vectorColumnName] = doc[this.vectorColumnName];
}
results.push([document, doc._distance]);
}
}
Expand Down Expand Up @@ -885,4 +894,46 @@ export class PGVectorStore extends VectorStore {
);
}
}

/**
* Return documents selected using the maximal marginal relevance.
* Maximal marginal relevance optimizes for similarity to the query AND
* diversity among selected documents.
* @param query Text to look up documents similar to.
* @param options.k=4 Number of documents to return.
* @param options.fetchK=20 Number of documents to fetch before passing to
* the MMR algorithm.
* @param options.lambda=0.5 Number between 0 and 1 that determines the
* degree of diversity among the results, where 0 corresponds to maximum
* diversity and 1 to minimum diversity.
* @returns List of documents selected by maximal marginal relevance.
*/
async maxMarginalRelevanceSearch(
query: string,
options: MaxMarginalRelevanceSearchOptions<this["FilterType"]>
): Promise<Document[]> {
const { k = 4, fetchK = 20, lambda = 0.5, filter } = options;
const queryEmbedding = await this.embeddings.embedQuery(query);

const docs = await this.similaritySearchVectorWithScore(
queryEmbedding,
fetchK,
filter,
true
);

const embeddingList = docs.map((doc) =>
JSON.parse(doc[0].metadata[this.vectorColumnName])
);

const mmrIndexes = maximalMarginalRelevance(
queryEmbedding,
embeddingList,
lambda,
k
);

const mmrDocs = mmrIndexes.map((index) => docs[index][0]);
return mmrDocs;
}
}