From dd376f7127a41a1947ec578b2417d66215769bb5 Mon Sep 17 00:00:00 2001 From: anadi45 Date: Fri, 27 Dec 2024 20:08:57 +0530 Subject: [PATCH 1/3] feat: add mmr search to pgvector --- .../src/vectorstores/pgvector.ts | 67 +++++++++++++++++-- 1 file changed, 62 insertions(+), 5 deletions(-) diff --git a/libs/langchain-community/src/vectorstores/pgvector.ts b/libs/langchain-community/src/vectorstores/pgvector.ts index 4b5e39b95827..0557274998bb 100644 --- a/libs/langchain-community/src/vectorstores/pgvector.ts +++ b/libs/langchain-community/src/vectorstores/pgvector.ts @@ -1,8 +1,12 @@ import pg, { type Pool, type PoolClient, type PoolConfig } from "pg"; -import { VectorStore } from "@langchain/core/vectorstores"; +import { + MaxMarginalRelevanceSearchOptions, + VectorStore, +} from "@langchain/core/vectorstores"; import type { EmbeddingsInterface } from "@langchain/core/embeddings"; import { Document } from "@langchain/core/documents"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { maximalMarginalRelevance } from "@langchain/core/utils/math"; type Metadata = Record; @@ -261,9 +265,15 @@ export class PGVectorStore extends VectorStore { this.chunkSize = config.chunkSize ?? 500; this.distanceStrategy = config.distanceStrategy ?? this.distanceStrategy; - this._verbose = - getEnvironmentVariable("LANGCHAIN_VERBOSE") === "true" ?? - !!config.verbose; + const langchainVerbose = getEnvironmentVariable("LANGCHAIN_VERBOSE"); + + if (langchainVerbose === "true") { + this._verbose = true; + } else if (langchainVerbose === "false") { + this._verbose = false; + } else { + this._verbose = config.verbose; + } } get computedTableName() { @@ -603,12 +613,14 @@ export class PGVectorStore extends VectorStore { * @param query - Query vector. * @param k - Number of most similar documents to return. * @param filter - Optional filter to apply to the search. + * @param includeEmbedding Whether to include the embedding vectors in the results. * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score. */ async similaritySearchVectorWithScore( query: number[], k: number, - filter?: this["FilterType"] + filter?: this["FilterType"], + includeEmbedding?: boolean ): Promise<[Document, number][]> { const embeddingString = `[${query.join(",")}]`; const _filter: this["FilterType"] = filter ?? {}; @@ -688,6 +700,9 @@ export class PGVectorStore extends VectorStore { metadata: doc[this.metadataColumnName], id: doc[this.idColumnName], }); + if (includeEmbedding) { + document.metadata[this.vectorColumnName] = doc[this.vectorColumnName]; + } results.push([document, doc._distance]); } } @@ -879,4 +894,46 @@ export class PGVectorStore extends VectorStore { ); } } + + /** + * Return documents selected using the maximal marginal relevance. + * Maximal marginal relevance optimizes for similarity to the query AND + * diversity among selected documents. + * @param query Text to look up documents similar to. + * @param options.k=4 Number of documents to return. + * @param options.fetchK=20 Number of documents to fetch before passing to + * the MMR algorithm. + * @param options.lambda=0.5 Number between 0 and 1 that determines the + * degree of diversity among the results, where 0 corresponds to maximum + * diversity and 1 to minimum diversity. + * @returns List of documents selected by maximal marginal relevance. + */ + async maxMarginalRelevanceSearch( + query: string, + options: MaxMarginalRelevanceSearchOptions + ): Promise { + const { k = 4, fetchK = 20, lambda = 0.5, filter } = options; + const queryEmbedding = await this.embeddings.embedQuery(query); + + const docs = await this.similaritySearchVectorWithScore( + queryEmbedding, + fetchK, + filter, + true + ); + + const embeddingList = docs.map( + (doc) => doc[0].metadata[this.vectorColumnName] + ); + + const mmrIndexes = maximalMarginalRelevance( + queryEmbedding, + embeddingList, + lambda, + k + ); + + const mmrDocs = mmrIndexes.map((index) => docs[index][0]); + return mmrDocs; + } } From 5edb746eb321a085062f67365f443c65202939b4 Mon Sep 17 00:00:00 2001 From: anadi45 Date: Fri, 27 Dec 2024 20:13:36 +0530 Subject: [PATCH 2/3] fix: parse array from string --- libs/langchain-community/src/vectorstores/pgvector.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/langchain-community/src/vectorstores/pgvector.ts b/libs/langchain-community/src/vectorstores/pgvector.ts index 0557274998bb..de74f4f09163 100644 --- a/libs/langchain-community/src/vectorstores/pgvector.ts +++ b/libs/langchain-community/src/vectorstores/pgvector.ts @@ -922,8 +922,8 @@ export class PGVectorStore extends VectorStore { true ); - const embeddingList = docs.map( - (doc) => doc[0].metadata[this.vectorColumnName] + const embeddingList = docs.map((doc) => + JSON.parse(doc[0].metadata[this.vectorColumnName]) ); const mmrIndexes = maximalMarginalRelevance( From 0827785a3c6b8f36ea5ca9cfe350017d50a023e6 Mon Sep 17 00:00:00 2001 From: anadi45 Date: Tue, 31 Dec 2024 23:11:50 +0530 Subject: [PATCH 3/3] refactor: private method for similarity search --- .../src/vectorstores/pgvector.ts | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/libs/langchain-community/src/vectorstores/pgvector.ts b/libs/langchain-community/src/vectorstores/pgvector.ts index de74f4f09163..b41b714038b6 100644 --- a/libs/langchain-community/src/vectorstores/pgvector.ts +++ b/libs/langchain-community/src/vectorstores/pgvector.ts @@ -606,17 +606,14 @@ export class PGVectorStore extends VectorStore { } /** - * Method to perform a similarity search in the vector store. It returns - * the `k` most similar documents to the query vector, along with their - * similarity scores. - * + * Method to perform a similarity search in the vector store. It returns the `k` most similar documents to the query text. * @param query - Query vector. * @param k - Number of most similar documents to return. * @param filter - Optional filter to apply to the search. * @param includeEmbedding Whether to include the embedding vectors in the results. * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score. */ - async similaritySearchVectorWithScore( + private async searchPostgres( query: number[], k: number, filter?: this["FilterType"], @@ -709,6 +706,23 @@ export class PGVectorStore extends VectorStore { return results; } + /** + * Method to perform a similarity search in the vector store. It returns + * the `k` most similar documents to the query vector, along with their + * similarity scores. + * @param query - Query vector. + * @param k - Number of most similar documents to return. + * @param filter - Optional filter to apply to the search. + * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise<[Document, number][]> { + return this.searchPostgres(query, k, filter, false); + } + /** * Method to ensure the existence of the table in the database. It creates * the table if it does not already exist. @@ -915,7 +929,7 @@ export class PGVectorStore extends VectorStore { const { k = 4, fetchK = 20, lambda = 0.5, filter } = options; const queryEmbedding = await this.embeddings.embedQuery(query); - const docs = await this.similaritySearchVectorWithScore( + const docs = await this.searchPostgres( queryEmbedding, fetchK, filter,