From d625ddd5fd55577f85c41e31687e1ed1a00d775b Mon Sep 17 00:00:00 2001 From: Kshitij Wadhwa <kw@rockset.com> Date: Thu, 16 Nov 2023 17:44:38 -0800 Subject: [PATCH] Integrate Rockset as a vector store (#3231) * Integrate Rockset as a vector store * address comments * Mark Rockset as requiring optional dep * Fix lint * Fix build * Format --------- Co-authored-by: jacoblee93 <jacoblee93@gmail.com> --- docs/api_refs/typedoc.json | 1 + .../integrations/vectorstores/rockset.mdx | 26 + examples/package.json | 1 + examples/src/indexes/vector_stores/rockset.ts | 31 ++ langchain/.env.example | 3 + langchain/.gitignore | 3 + langchain/package.json | 13 + langchain/scripts/check-tree-shaking.js | 1 + langchain/scripts/create-entrypoints.js | 2 + langchain/src/load/import_constants.ts | 1 + langchain/src/load/import_type.d.ts | 3 + langchain/src/vectorstores/rockset.ts | 453 ++++++++++++++++++ .../vectorstores/tests/rockset.int.test.ts | 89 ++++ yarn.lock | 50 +- 14 files changed, 675 insertions(+), 2 deletions(-) create mode 100644 docs/core_docs/docs/integrations/vectorstores/rockset.mdx create mode 100644 examples/src/indexes/vector_stores/rockset.ts create mode 100644 langchain/src/vectorstores/rockset.ts create mode 100644 langchain/src/vectorstores/tests/rockset.int.test.ts diff --git a/docs/api_refs/typedoc.json b/docs/api_refs/typedoc.json index 1709b7ea552f..0b191f95bb14 100644 --- a/docs/api_refs/typedoc.json +++ b/docs/api_refs/typedoc.json @@ -120,6 +120,7 @@ "./langchain/src/vectorstores/typeorm.ts", "./langchain/src/vectorstores/myscale.ts", "./langchain/src/vectorstores/redis.ts", + "./langchain/src/vectorstores/rockset.ts", "./langchain/src/vectorstores/typesense.ts", "./langchain/src/vectorstores/singlestore.ts", "./langchain/src/vectorstores/tigris.ts", diff --git a/docs/core_docs/docs/integrations/vectorstores/rockset.mdx b/docs/core_docs/docs/integrations/vectorstores/rockset.mdx new file mode 100644 index 000000000000..a59f60dd4610 --- /dev/null +++ b/docs/core_docs/docs/integrations/vectorstores/rockset.mdx @@ -0,0 +1,26 @@ +--- +sidebar_class_name: node-only +--- + +import CodeBlock from "@theme/CodeBlock"; + +# Rockset + +[Rockset](https://rockset.com) is a real-time analyitics SQL database that runs in the cloud. +Rockset provides vector search capabilities, in the form of [SQL functions](https://rockset.com/docs/vector-functions/#vector-distance-functions), to support AI applications that rely on text similarity. + +## Setup + +Install the rockset client. + +```bash +yarn add @rockset/client +``` + +### Usage + +import UsageExample from "@examples/indexes/vector_stores/rockset.ts"; + +Below is an example showcasing how to use OpenAI and Rockset to answer questions about a text file: + +<CodeBlock language="typescript">{UsageExample}</CodeBlock> diff --git a/examples/package.json b/examples/package.json index 41045cc37526..6f60d363cf5e 100644 --- a/examples/package.json +++ b/examples/package.json @@ -32,6 +32,7 @@ "@planetscale/database": "^1.8.0", "@prisma/client": "^4.11.0", "@raycast/api": "^1.55.2", + "@rockset/client": "^0.9.1", "@supabase/supabase-js": "^2.10.0", "@tensorflow/tfjs-backend-cpu": "^4.4.0", "@upstash/redis": "^1.20.6", diff --git a/examples/src/indexes/vector_stores/rockset.ts b/examples/src/indexes/vector_stores/rockset.ts new file mode 100644 index 000000000000..037868ff67a4 --- /dev/null +++ b/examples/src/indexes/vector_stores/rockset.ts @@ -0,0 +1,31 @@ +import * as rockset from "@rockset/client"; +import { ChatOpenAI } from "langchain/chat_models/openai"; +import { RetrievalQAChain } from "langchain/chains"; +import { OpenAIEmbeddings } from "langchain/embeddings/openai"; +import { RocksetStore } from "langchain/vectorstores/rockset"; +import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; +import { readFileSync } from "fs"; + +export const run = async () => { + const store = await RocksetStore.withNewCollection(new OpenAIEmbeddings(), { + client: rockset.default.default( + process.env.ROCKSET_API_KEY ?? "", + `https://api.${process.env.ROCKSET_API_REGION ?? "usw2a1"}.rockset.com` + ), + collectionName: "langchain_demo", + }); + + const model = new ChatOpenAI({ modelName: "gpt-3.5-turbo" }); + const chain = RetrievalQAChain.fromLLM(model, store.asRetriever()); + const text = readFileSync("state_of_the_union.txt", "utf8"); + const docs = await new RecursiveCharacterTextSplitter().createDocuments([ + text, + ]); + + await store.addDocuments(docs); + const response = await chain.call({ + query: "What is America's role in Ukraine?", + }); + console.log(response.text); + await store.destroy(); +}; diff --git a/langchain/.env.example b/langchain/.env.example index 1a3a21e60da0..5c9c2524bcd1 100644 --- a/langchain/.env.example +++ b/langchain/.env.example @@ -40,6 +40,9 @@ MYSCALE_USERNAME=ADD_YOURS_HERE MYSCALE_PASSWORD=ADD_YOURS_HERE FIGMA_ACCESS_TOKEN=ADD_YOURS_HERE REDIS_URL=ADD_YOURS_HERE +ROCKSET_API_KEY=ADD_YOURS_HERE +# defaults to "usw2a1" (oregon) +ROCKSET_REGION=ADD_YOURS_HERE SINGLESTORE_HOST=ADD_YOURS_HERE SINGLESTORE_PORT=ADD_YOURS_HERE SINGLESTORE_USERNAME=ADD_YOURS_HERE diff --git a/langchain/.gitignore b/langchain/.gitignore index 648193b1d593..16476fc15bff 100644 --- a/langchain/.gitignore +++ b/langchain/.gitignore @@ -304,6 +304,9 @@ vectorstores/myscale.d.ts vectorstores/redis.cjs vectorstores/redis.js vectorstores/redis.d.ts +vectorstores/rockset.cjs +vectorstores/rockset.js +vectorstores/rockset.d.ts vectorstores/typesense.cjs vectorstores/typesense.js vectorstores/typesense.d.ts diff --git a/langchain/package.json b/langchain/package.json index 653d8888f6c3..a151813e0fb5 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -316,6 +316,9 @@ "vectorstores/redis.cjs", "vectorstores/redis.js", "vectorstores/redis.d.ts", + "vectorstores/rockset.cjs", + "vectorstores/rockset.js", + "vectorstores/rockset.d.ts", "vectorstores/typesense.cjs", "vectorstores/typesense.js", "vectorstores/typesense.d.ts", @@ -857,6 +860,7 @@ "@planetscale/database": "^1.8.0", "@qdrant/js-client-rest": "^1.2.0", "@raycast/api": "^1.55.2", + "@rockset/client": "^0.9.1", "@smithy/eventstream-codec": "^2.0.5", "@smithy/protocol-http": "^3.0.6", "@smithy/signature-v4": "^2.0.10", @@ -993,6 +997,7 @@ "@planetscale/database": "^1.8.0", "@qdrant/js-client-rest": "^1.2.0", "@raycast/api": "^1.55.2", + "@rockset/client": "^0.9.1", "@smithy/eventstream-codec": "^2.0.5", "@smithy/protocol-http": "^3.0.6", "@smithy/signature-v4": "^2.0.10", @@ -1148,6 +1153,9 @@ "@raycast/api": { "optional": true }, + "@rockset/client": { + "optional": true + }, "@smithy/eventstream-codec": { "optional": true }, @@ -1907,6 +1915,11 @@ "import": "./vectorstores/redis.js", "require": "./vectorstores/redis.cjs" }, + "./vectorstores/rockset": { + "types": "./vectorstores/rockset.d.ts", + "import": "./vectorstores/rockset.js", + "require": "./vectorstores/rockset.cjs" + }, "./vectorstores/typesense": { "types": "./vectorstores/typesense.d.ts", "import": "./vectorstores/typesense.js", diff --git a/langchain/scripts/check-tree-shaking.js b/langchain/scripts/check-tree-shaking.js index 5fa95b186b99..106a43421af4 100644 --- a/langchain/scripts/check-tree-shaking.js +++ b/langchain/scripts/check-tree-shaking.js @@ -31,6 +31,7 @@ export function listExternals() { "convex", "convex/server", "convex/values", + "@rockset/client/dist/codegen/api.js", "mysql2/promise", "pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js", "@zilliz/milvus2-sdk-node/dist/milvus/const/Milvus.js", diff --git a/langchain/scripts/create-entrypoints.js b/langchain/scripts/create-entrypoints.js index 8d680b5097c4..2e098a0e9e81 100644 --- a/langchain/scripts/create-entrypoints.js +++ b/langchain/scripts/create-entrypoints.js @@ -119,6 +119,7 @@ const entrypoints = { "vectorstores/typeorm": "vectorstores/typeorm", "vectorstores/myscale": "vectorstores/myscale", "vectorstores/redis": "vectorstores/redis", + "vectorstores/rockset": "vectorstores/rockset", "vectorstores/typesense": "vectorstores/typesense", "vectorstores/singlestore": "vectorstores/singlestore", "vectorstores/tigris": "vectorstores/tigris", @@ -393,6 +394,7 @@ const requiresOptionalDependency = [ "vectorstores/myscale", "vectorstores/neo4j_vector", "vectorstores/redis", + "vectorstores/rockset", "vectorstores/singlestore", "vectorstores/typesense", "vectorstores/tigris", diff --git a/langchain/src/load/import_constants.ts b/langchain/src/load/import_constants.ts index d49c73496311..0663a2e468f2 100644 --- a/langchain/src/load/import_constants.ts +++ b/langchain/src/load/import_constants.ts @@ -65,6 +65,7 @@ export const optionalImportEntrypoints = [ "langchain/vectorstores/typeorm", "langchain/vectorstores/myscale", "langchain/vectorstores/redis", + "langchain/vectorstores/rockset", "langchain/vectorstores/typesense", "langchain/vectorstores/singlestore", "langchain/vectorstores/tigris", diff --git a/langchain/src/load/import_type.d.ts b/langchain/src/load/import_type.d.ts index 04db2e05870f..8afff8e158cc 100644 --- a/langchain/src/load/import_type.d.ts +++ b/langchain/src/load/import_type.d.ts @@ -193,6 +193,9 @@ export interface OptionalImportMap { "langchain/vectorstores/redis"?: | typeof import("../vectorstores/redis.js") | Promise<typeof import("../vectorstores/redis.js")>; + "langchain/vectorstores/rockset"?: + | typeof import("../vectorstores/rockset.js") + | Promise<typeof import("../vectorstores/rockset.js")>; "langchain/vectorstores/typesense"?: | typeof import("../vectorstores/typesense.js") | Promise<typeof import("../vectorstores/typesense.js")>; diff --git a/langchain/src/vectorstores/rockset.ts b/langchain/src/vectorstores/rockset.ts new file mode 100644 index 000000000000..38a4f21dc5e3 --- /dev/null +++ b/langchain/src/vectorstores/rockset.ts @@ -0,0 +1,453 @@ +import { MainApi } from "@rockset/client"; +import type { CreateCollectionRequest } from "@rockset/client/dist/codegen/api.d.ts"; +import { Collection } from "@rockset/client/dist/codegen/api.js"; + +import { VectorStore } from "./base.js"; +import { Embeddings } from "../embeddings/base.js"; +import { Document } from "../document.js"; + +/** + * Generic Rockset vector storage error + */ +export class RocksetStoreError extends Error { + /** + * Constructs a RocksetStoreError + * @param message The error message + */ + constructor(message: string) { + super(message); + this.name = this.constructor.name; + } +} + +/** + * Error that is thrown when a RocksetStore function is called + * after `destroy()` is called (meaning the collection would be + * deleted). + */ +export class RocksetStoreDestroyedError extends RocksetStoreError { + constructor() { + super("The Rockset store has been destroyed"); + this.name = this.constructor.name; + } +} + +/** + * Functions to measure vector distance/similarity by. + * See https://rockset.com/docs/vector-functions/#vector-distance-functions + * @enum SimilarityMetric + */ +export const SimilarityMetric = { + CosineSimilarity: "COSINE_SIM", + EuclideanDistance: "EUCLIDEAN_DIST", + DotProduct: "DOT_PRODUCT", +} as const; + +export type SimilarityMetric = + (typeof SimilarityMetric)[keyof typeof SimilarityMetric]; + +interface CollectionNotFoundError { + message_key: string; +} + +/** + * Vector store arguments + * @interface RocksetStore + */ +export interface RocksetLibArgs { + /** + * The rockset client object constructed with `rocksetConfigure` + * @type {MainAPI} + */ + client: MainApi; + /** + * The name of the Rockset collection to store vectors + * @type {string} + */ + collectionName: string; + /** + * The name of othe Rockset workspace that holds @member collectionName + * @type {string} + */ + workspaceName?: string; + /** + * The name of the collection column to contain page contnent of documents + * @type {string} + */ + textKey?: string; + /** + * The name of the collection column to contain vectors + * @type {string} + */ + embeddingKey?: string; + /** + * The SQL `WHERE` clause to filter by + * @type {string} + */ + filter?: string; + /** + * The metric used to measure vector relationship + * @type {SimilarityMetric} + */ + similarityMetric?: SimilarityMetric; +} + +/** + * Exposes Rockset's vector store/search functionality + */ +export class RocksetStore extends VectorStore { + declare FilterType: string; + + client: MainApi; + + collectionName: string; + + workspaceName: string; + + textKey: string; + + embeddingKey: string; + + filter?: string; + + private _similarityMetric: SimilarityMetric; + + private similarityOrder: "ASC" | "DESC"; + + private destroyed: boolean; + + /** + * Gets a string representation of the type of this VectorStore + * @returns {"rockset"} + */ + _vectorstoreType(): "rockset" { + return "rockset"; + } + + /** + * Constructs a new RocksetStore + * @param {Embeddings} embeddings Object used to embed queries and + * page content + * @param {RocksetLibArgs} args + */ + constructor(embeddings: Embeddings, args: RocksetLibArgs) { + super(embeddings, args); + + this.embeddings = embeddings; + this.client = args.client; + this.collectionName = args.collectionName; + this.workspaceName = args.workspaceName ?? "commons"; + this.textKey = args.textKey ?? "text"; + this.embeddingKey = args.embeddingKey ?? "embedding"; + this.filter = args.filter; + this.similarityMetric = + args.similarityMetric ?? SimilarityMetric.CosineSimilarity; + this.setSimilarityOrder(); + } + + /** + * Sets the object's similarity order based on what + * SimilarityMetric is being used + */ + private setSimilarityOrder() { + this.checkIfDestroyed(); + this.similarityOrder = + this.similarityMetric === SimilarityMetric.EuclideanDistance + ? "ASC" + : "DESC"; + } + + /** + * Embeds and adds Documents to the store. + * @param {Documents[]} documents The documents to store + * @returns {Promise<string[]?>} The _id's of the documents added + */ + async addDocuments(documents: Document[]): Promise<string[] | undefined> { + const texts = documents.map(({ pageContent }) => pageContent); + return await this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + /** + * Adds vectors to the store given their corresponding Documents + * @param {number[][]} vectors The vectors to store + * @param {Document[]} documents The Documents they represent + * @return {Promise<string[]?>} The _id's of the added documents + */ + async addVectors(vectors: number[][], documents: Document[]) { + this.checkIfDestroyed(); + const rocksetDocs = []; + for (let i = 0; i < documents.length; i += 1) { + const currDoc = documents[i]; + const currVector = vectors[i]; + rocksetDocs.push({ + [this.textKey]: currDoc.pageContent, + [this.embeddingKey]: currVector, + ...currDoc.metadata, + }); + } + + return ( + await this.client.documents.addDocuments( + this.workspaceName, + this.collectionName, + { + data: rocksetDocs, + } + ) + ).data?.map((docStatus) => docStatus._id || ""); + } + + /** + * Deletes Rockset documements given their _id's + * @param {string[]} ids The IDS to remove documents with + */ + async delete(ids: string[]): Promise<void> { + this.checkIfDestroyed(); + await this.client.documents.deleteDocuments( + this.workspaceName, + this.collectionName, + { + data: ids.map((id) => ({ _id: id })), + } + ); + } + + /** + * Gets the most relevant documents to a query along + * with their similarity score. The returned documents + * are ordered by similarity (most similar at the first + * index) + * @param {number[]} query The embedded query to search + * the store by + * @param {number} k The number of documents to retreive + * @param {string?} filter The SQL `WHERE` clause to filter by + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: string + ): Promise<[Document, number][]> { + this.checkIfDestroyed(); + if (filter && this.filter) { + throw new RocksetStoreError( + "cannot provide both `filter` and `this.filter`" + ); + } + const similarityKey = "similarity"; + const _filter = filter ?? this.filter; + return ( + ( + await this.client.queries.query({ + sql: { + query: ` + SELECT + * EXCEPT("${this.embeddingKey}"), + "${this.textKey}", + ${this.similarityMetric}(:query, "${ + this.embeddingKey + }") AS "${similarityKey}" + FROM + "${this.workspaceName}"."${this.collectionName}" + ${_filter ? `WHERE ${_filter}` : ""} + ORDER BY + "${similarityKey}" ${this.similarityOrder} + LIMIT + ${k} + `, + parameters: [ + { + name: "query", + type: "", + value: `[${query.toString()}]`, + }, + ], + }, + }) + ).results?.map((rocksetDoc) => [ + new Document<Record<string, object>>({ + pageContent: rocksetDoc[this.textKey], + metadata: (({ + [this.textKey]: t, + [similarityKey]: s, + ...rocksetDoc + }) => rocksetDoc)(rocksetDoc), + }), + rocksetDoc[similarityKey] as number, + ]) ?? [] + ); + } + + /** + * Constructs and returns a RocksetStore object given texts to store. + * @param {string[]} texts The texts to store + * @param {object[] | object} metadatas The metadatas that correspond + * to @param texts + * @param {Embeddings} embeddings The object used to embed queries + * and page content + * @param {RocksetLibArgs} dbConfig The options to be passed into the + * RocksetStore constructor + * @returns {RocksetStore} + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: RocksetLibArgs + ): Promise<RocksetStore> { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + + return RocksetStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Constructs, adds docs to, and returns a RocksetStore object + * @param {Document[]} docs The Documents to store + * @param {Embeddings} embeddings The object used to embed queries + * and page content + * @param {RocksetLibArgs} dbConfig The options to be passed into the + * RocksetStore constructor + * @returns {RocksetStore} + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: RocksetLibArgs + ): Promise<RocksetStore> { + const args = { ...dbConfig, textKey: dbConfig.textKey ?? "text" }; + const instance = new this(embeddings, args); + await instance.addDocuments(docs); + return instance; + } + + /** + * Checks if a Rockset collection exists. + * @param {RocksetLibArgs} dbConfig The object containing the collection + * and workspace names + * @return {boolean} whether the collection exists + */ + private static async collectionExists(dbConfig: RocksetLibArgs) { + try { + await dbConfig.client.collections.getCollection( + dbConfig.workspaceName ?? "commons", + dbConfig.collectionName + ); + } catch (err) { + if ( + (err as CollectionNotFoundError).message_key === + "COLLECTION_DOES_NOT_EXIST" + ) { + return false; + } + throw err; + } + return true; + } + + /** + * Checks whether a Rockset collection is ready to be queried. + * @param {RocksetLibArgs} dbConfig The object containing the collection + * name and workspace + * @return {boolean} whether the collection is ready + */ + private static async collectionReady(dbConfig: RocksetLibArgs) { + return ( + ( + await dbConfig.client.collections.getCollection( + dbConfig.workspaceName ?? "commons", + dbConfig.collectionName + ) + ).data?.status === Collection.StatusEnum.READY + ); + } + + /** + * Deletes the collection this RocksetStore uses + * @param {boolean?} waitUntilDeletion Whether to sleep until the + * collection is ready to be + * queried + */ + async destroy(waitUntilDeletion?: boolean) { + await this.client.collections.deleteCollection( + this.workspaceName, + this.collectionName + ); + this.destroyed = true; + if (waitUntilDeletion) { + while ( + await RocksetStore.collectionExists({ + collectionName: this.collectionName, + client: this.client, + }) + ); + } + } + + /** + * Checks if this RocksetStore has been destroyed. + * @throws {RocksetStoreDestroyederror} if it has. + */ + private checkIfDestroyed() { + if (this.destroyed) { + throw new RocksetStoreDestroyedError(); + } + } + + /** + * Creates a new Rockset collection and returns a RocksetStore that + * uses it + * @param {Embeddings} embeddings Object used to embed queries and + * page content + * @param {RocksetLibArgs} dbConfig The options to be passed into the + * RocksetStore constructor + * @param {CreateCollectionRequest?} collectionOptions The arguments to sent with the + * HTTP request when creating the + * collection. Setting a field mapping + * that `VECTOR_ENFORCE`s is recommended + * when using this function. See + * https://rockset.com/docs/vector-functions/#vector_enforce + * @returns {RocsketStore} + */ + static async withNewCollection( + embeddings: Embeddings, + dbConfig: RocksetLibArgs, + collectionOptions?: CreateCollectionRequest + ): Promise<RocksetStore> { + if ( + collectionOptions?.name && + dbConfig.collectionName !== collectionOptions?.name + ) { + throw new RocksetStoreError( + "`dbConfig.name` and `collectionOptions.name` do not match" + ); + } + await dbConfig.client.collections.createCollection( + dbConfig.workspaceName ?? "commons", + collectionOptions || { name: dbConfig.collectionName } + ); + while ( + !(await this.collectionExists(dbConfig)) || + !(await this.collectionReady(dbConfig)) + ); + return new this(embeddings, dbConfig); + } + + public get similarityMetric() { + return this._similarityMetric; + } + + public set similarityMetric(metric: SimilarityMetric) { + this._similarityMetric = metric; + this.setSimilarityOrder(); + } +} diff --git a/langchain/src/vectorstores/tests/rockset.int.test.ts b/langchain/src/vectorstores/tests/rockset.int.test.ts new file mode 100644 index 000000000000..88d93bac8283 --- /dev/null +++ b/langchain/src/vectorstores/tests/rockset.int.test.ts @@ -0,0 +1,89 @@ +/* eslint-disable no-process-env */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ +import rockset from "@rockset/client"; +import { test, expect } from "@jest/globals"; +import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { RocksetStore, SimilarityMetric } from "../rockset.js"; +import { Document } from "../../document.js"; +import { formatDocumentsAsString } from "../../util/document.js"; + +const getPageContents = formatDocumentsAsString; + +const embeddings = new OpenAIEmbeddings(); +let store: RocksetStore | undefined; + +const docs = [ + new Document({ + pageContent: "Tomatoes are red.", + metadata: { subject: "tomatoes" }, + }), + new Document({ + pageContent: "Tomatoes are small.", + metadata: { subject: "tomatoes" }, + }), + new Document({ + pageContent: "Apples are juicy.", + metadata: { subject: "apples" }, + }), +]; + +test.skip("create new collection as a RocksetVectorStore", async () => { + store = await RocksetStore.withNewCollection(embeddings, { + collectionName: "langchain_demo", + client: rockset.default( + process.env.ROCKSET_API_KEY ?? "", + `https://api.${process.env.ROCKSET_API_REGION ?? "usw2a1"}.rockset.com` + ), + }); +}); + +test.skip("add to RocksetVectorStore", async () => { + expect(store).toBeDefined(); + expect((await store!.addDocuments(docs))?.length).toBe(docs.length); +}); + +test.skip("query RocksetVectorStore with cosine sim", async () => { + expect(store).toBeDefined(); + const relevantDocs = await store!.similaritySearch( + "What color are tomatoes?" + ); + expect(getPageContents(relevantDocs)).toEqual(getPageContents(relevantDocs)); +}); + +test.skip("query RocksetVectorStore with dot product", async () => { + expect(store).toBeDefined(); + store!.similarityMetric = SimilarityMetric.DotProduct; + const relevantDocs = await store!.similaritySearch( + "What color are tomatoes?" + ); + expect(getPageContents(relevantDocs)).toEqual(getPageContents(relevantDocs)); +}); + +test.skip("query RocksetVectorStore with euclidean distance", async () => { + expect(store).toBeDefined(); + store!.similarityMetric = SimilarityMetric.EuclideanDistance; + const relevantDocs = await store!.similaritySearch( + "What color are tomatoes?" + ); + expect(getPageContents(relevantDocs)).toEqual(getPageContents(relevantDocs)); +}); + +test.skip("query RocksetVectorStore with metadata filter", async () => { + expect(store).toBeDefined(); + const relevantDocs = await store!.similaritySearch( + "What color are tomatoes?", + undefined, + "subject='apples'" + ); + expect(relevantDocs.length).toBe(1); + expect(getPageContents(relevantDocs)).toEqual(getPageContents([docs[2]])); +}); + +test.skip("query RocksetVectorStore with k", async () => { + expect(store).toBeDefined(); + const relevantDocs = await store!.similaritySearch( + "What color are tomatoes?", + 1 + ); + expect(relevantDocs.length).toBe(1); +}); diff --git a/yarn.lock b/yarn.lock index d93ca570c2e6..5aadd9a48b25 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8918,6 +8918,18 @@ __metadata: languageName: node linkType: hard +"@rockset/client@npm:^0.9.1": + version: 0.9.1 + resolution: "@rockset/client@npm:0.9.1" + dependencies: + "@types/node-fetch": ^2.5.3 + fetch-ponyfill: ^7.1.0 + node-fetch: ^2.6.7 + url: ^0.11.0 + checksum: eed056b229d47ba0de4e092c1157061c0602ae62f9a650a8a089a1977a86f40a9465d8286ca5fc7075d315f0444d60ea13d39e39934373472e7dc9c72f6375fc + languageName: node + linkType: hard + "@rushstack/eslint-patch@npm:^1.3.3": version: 1.5.1 resolution: "@rushstack/eslint-patch@npm:1.5.1" @@ -11427,6 +11439,16 @@ __metadata: languageName: node linkType: hard +"@types/node-fetch@npm:^2.5.3": + version: 2.6.9 + resolution: "@types/node-fetch@npm:2.6.9" + dependencies: + "@types/node": "*" + form-data: ^4.0.0 + checksum: 212269aff4b251477c13c33cee6cea23e4fd630be6c0bfa3714968cce7efd7055b52f2f82aab3394596d8c758335cc802e7c5fa3f775e7f2a472fa914c90dc15 + languageName: node + linkType: hard + "@types/node@npm:*": version: 18.14.6 resolution: "@types/node@npm:18.14.6" @@ -17693,6 +17715,7 @@ __metadata: "@planetscale/database": ^1.8.0 "@prisma/client": ^4.11.0 "@raycast/api": ^1.55.2 + "@rockset/client": ^0.9.1 "@supabase/supabase-js": ^2.10.0 "@tensorflow/tfjs-backend-cpu": ^4.4.0 "@tsconfig/recommended": ^1.0.2 @@ -18166,6 +18189,15 @@ __metadata: languageName: node linkType: hard +"fetch-ponyfill@npm:^7.1.0": + version: 7.1.0 + resolution: "fetch-ponyfill@npm:7.1.0" + dependencies: + node-fetch: ~2.6.1 + checksum: 7fd497dd5f7db890e80193de5bc1cd0115a62400272cd9a992849288e66886fcdb0724ea1ed161be7b8db2daeafda8c58d0259acdda42d6561155dbcdbb0720a + languageName: node + linkType: hard + "fflate@npm:^0.4.1": version: 0.4.8 resolution: "fflate@npm:0.4.8" @@ -22228,6 +22260,7 @@ __metadata: "@planetscale/database": ^1.8.0 "@qdrant/js-client-rest": ^1.2.0 "@raycast/api": ^1.55.2 + "@rockset/client": ^0.9.1 "@smithy/eventstream-codec": ^2.0.5 "@smithy/protocol-http": ^3.0.6 "@smithy/signature-v4": ^2.0.10 @@ -22383,6 +22416,7 @@ __metadata: "@planetscale/database": ^1.8.0 "@qdrant/js-client-rest": ^1.2.0 "@raycast/api": ^1.55.2 + "@rockset/client": ^0.9.1 "@smithy/eventstream-codec": ^2.0.5 "@smithy/protocol-http": ^3.0.6 "@smithy/signature-v4": ^2.0.10 @@ -22509,6 +22543,8 @@ __metadata: optional: true "@raycast/api": optional: true + "@rockset/client": + optional: true "@smithy/eventstream-codec": optional: true "@smithy/protocol-http": @@ -26835,7 +26871,7 @@ __metadata: languageName: node linkType: hard -"punycode@npm:^1.3.2": +"punycode@npm:^1.3.2, punycode@npm:^1.4.1": version: 1.4.1 resolution: "punycode@npm:1.4.1" checksum: fa6e698cb53db45e4628559e557ddaf554103d2a96a1d62892c8f4032cd3bc8871796cae9eabc1bc700e2b6677611521ce5bb1d9a27700086039965d0cf34518 @@ -26927,7 +26963,7 @@ __metadata: languageName: node linkType: hard -"qs@npm:^6.7.0": +"qs@npm:^6.11.2, qs@npm:^6.7.0": version: 6.11.2 resolution: "qs@npm:6.11.2" dependencies: @@ -30994,6 +31030,16 @@ __metadata: languageName: node linkType: hard +"url@npm:^0.11.0": + version: 0.11.3 + resolution: "url@npm:0.11.3" + dependencies: + punycode: ^1.4.1 + qs: ^6.11.2 + checksum: f9e7886f46a16f96d2e42fbcc5d682c231c55ef5442c1ff66150c0f6556f6e3a97d094a84f51be15ec2432711d212eb60426659ce418f5fcadeaa3f601532c4e + languageName: node + linkType: hard + "use-composed-ref@npm:^1.3.0": version: 1.3.0 resolution: "use-composed-ref@npm:1.3.0"