From d625ddd5fd55577f85c41e31687e1ed1a00d775b Mon Sep 17 00:00:00 2001
From: Kshitij Wadhwa <kw@rockset.com>
Date: Thu, 16 Nov 2023 17:44:38 -0800
Subject: [PATCH] Integrate Rockset as a vector store (#3231)

* Integrate Rockset as a vector store

* address comments

* Mark Rockset as requiring optional dep

* Fix lint

* Fix build

* Format

---------

Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
---
 docs/api_refs/typedoc.json                    |   1 +
 .../integrations/vectorstores/rockset.mdx     |  26 +
 examples/package.json                         |   1 +
 examples/src/indexes/vector_stores/rockset.ts |  31 ++
 langchain/.env.example                        |   3 +
 langchain/.gitignore                          |   3 +
 langchain/package.json                        |  13 +
 langchain/scripts/check-tree-shaking.js       |   1 +
 langchain/scripts/create-entrypoints.js       |   2 +
 langchain/src/load/import_constants.ts        |   1 +
 langchain/src/load/import_type.d.ts           |   3 +
 langchain/src/vectorstores/rockset.ts         | 453 ++++++++++++++++++
 .../vectorstores/tests/rockset.int.test.ts    |  89 ++++
 yarn.lock                                     |  50 +-
 14 files changed, 675 insertions(+), 2 deletions(-)
 create mode 100644 docs/core_docs/docs/integrations/vectorstores/rockset.mdx
 create mode 100644 examples/src/indexes/vector_stores/rockset.ts
 create mode 100644 langchain/src/vectorstores/rockset.ts
 create mode 100644 langchain/src/vectorstores/tests/rockset.int.test.ts

diff --git a/docs/api_refs/typedoc.json b/docs/api_refs/typedoc.json
index 1709b7ea552f..0b191f95bb14 100644
--- a/docs/api_refs/typedoc.json
+++ b/docs/api_refs/typedoc.json
@@ -120,6 +120,7 @@
     "./langchain/src/vectorstores/typeorm.ts",
     "./langchain/src/vectorstores/myscale.ts",
     "./langchain/src/vectorstores/redis.ts",
+    "./langchain/src/vectorstores/rockset.ts",
     "./langchain/src/vectorstores/typesense.ts",
     "./langchain/src/vectorstores/singlestore.ts",
     "./langchain/src/vectorstores/tigris.ts",
diff --git a/docs/core_docs/docs/integrations/vectorstores/rockset.mdx b/docs/core_docs/docs/integrations/vectorstores/rockset.mdx
new file mode 100644
index 000000000000..a59f60dd4610
--- /dev/null
+++ b/docs/core_docs/docs/integrations/vectorstores/rockset.mdx
@@ -0,0 +1,26 @@
+---
+sidebar_class_name: node-only
+---
+
+import CodeBlock from "@theme/CodeBlock";
+
+# Rockset
+
+[Rockset](https://rockset.com) is a real-time analyitics SQL database that runs in the cloud.
+Rockset provides vector search capabilities, in the form of [SQL functions](https://rockset.com/docs/vector-functions/#vector-distance-functions), to support AI applications that rely on text similarity.
+
+## Setup
+
+Install the rockset client.
+
+```bash
+yarn add @rockset/client
+```
+
+### Usage
+
+import UsageExample from "@examples/indexes/vector_stores/rockset.ts";
+
+Below is an example showcasing how to use OpenAI and Rockset to answer questions about a text file:
+
+<CodeBlock language="typescript">{UsageExample}</CodeBlock>
diff --git a/examples/package.json b/examples/package.json
index 41045cc37526..6f60d363cf5e 100644
--- a/examples/package.json
+++ b/examples/package.json
@@ -32,6 +32,7 @@
     "@planetscale/database": "^1.8.0",
     "@prisma/client": "^4.11.0",
     "@raycast/api": "^1.55.2",
+    "@rockset/client": "^0.9.1",
     "@supabase/supabase-js": "^2.10.0",
     "@tensorflow/tfjs-backend-cpu": "^4.4.0",
     "@upstash/redis": "^1.20.6",
diff --git a/examples/src/indexes/vector_stores/rockset.ts b/examples/src/indexes/vector_stores/rockset.ts
new file mode 100644
index 000000000000..037868ff67a4
--- /dev/null
+++ b/examples/src/indexes/vector_stores/rockset.ts
@@ -0,0 +1,31 @@
+import * as rockset from "@rockset/client";
+import { ChatOpenAI } from "langchain/chat_models/openai";
+import { RetrievalQAChain } from "langchain/chains";
+import { OpenAIEmbeddings } from "langchain/embeddings/openai";
+import { RocksetStore } from "langchain/vectorstores/rockset";
+import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
+import { readFileSync } from "fs";
+
+export const run = async () => {
+  const store = await RocksetStore.withNewCollection(new OpenAIEmbeddings(), {
+    client: rockset.default.default(
+      process.env.ROCKSET_API_KEY ?? "",
+      `https://api.${process.env.ROCKSET_API_REGION ?? "usw2a1"}.rockset.com`
+    ),
+    collectionName: "langchain_demo",
+  });
+
+  const model = new ChatOpenAI({ modelName: "gpt-3.5-turbo" });
+  const chain = RetrievalQAChain.fromLLM(model, store.asRetriever());
+  const text = readFileSync("state_of_the_union.txt", "utf8");
+  const docs = await new RecursiveCharacterTextSplitter().createDocuments([
+    text,
+  ]);
+
+  await store.addDocuments(docs);
+  const response = await chain.call({
+    query: "What is America's role in Ukraine?",
+  });
+  console.log(response.text);
+  await store.destroy();
+};
diff --git a/langchain/.env.example b/langchain/.env.example
index 1a3a21e60da0..5c9c2524bcd1 100644
--- a/langchain/.env.example
+++ b/langchain/.env.example
@@ -40,6 +40,9 @@ MYSCALE_USERNAME=ADD_YOURS_HERE
 MYSCALE_PASSWORD=ADD_YOURS_HERE
 FIGMA_ACCESS_TOKEN=ADD_YOURS_HERE
 REDIS_URL=ADD_YOURS_HERE
+ROCKSET_API_KEY=ADD_YOURS_HERE
+# defaults to "usw2a1" (oregon)
+ROCKSET_REGION=ADD_YOURS_HERE
 SINGLESTORE_HOST=ADD_YOURS_HERE
 SINGLESTORE_PORT=ADD_YOURS_HERE
 SINGLESTORE_USERNAME=ADD_YOURS_HERE
diff --git a/langchain/.gitignore b/langchain/.gitignore
index 648193b1d593..16476fc15bff 100644
--- a/langchain/.gitignore
+++ b/langchain/.gitignore
@@ -304,6 +304,9 @@ vectorstores/myscale.d.ts
 vectorstores/redis.cjs
 vectorstores/redis.js
 vectorstores/redis.d.ts
+vectorstores/rockset.cjs
+vectorstores/rockset.js
+vectorstores/rockset.d.ts
 vectorstores/typesense.cjs
 vectorstores/typesense.js
 vectorstores/typesense.d.ts
diff --git a/langchain/package.json b/langchain/package.json
index 653d8888f6c3..a151813e0fb5 100644
--- a/langchain/package.json
+++ b/langchain/package.json
@@ -316,6 +316,9 @@
     "vectorstores/redis.cjs",
     "vectorstores/redis.js",
     "vectorstores/redis.d.ts",
+    "vectorstores/rockset.cjs",
+    "vectorstores/rockset.js",
+    "vectorstores/rockset.d.ts",
     "vectorstores/typesense.cjs",
     "vectorstores/typesense.js",
     "vectorstores/typesense.d.ts",
@@ -857,6 +860,7 @@
     "@planetscale/database": "^1.8.0",
     "@qdrant/js-client-rest": "^1.2.0",
     "@raycast/api": "^1.55.2",
+    "@rockset/client": "^0.9.1",
     "@smithy/eventstream-codec": "^2.0.5",
     "@smithy/protocol-http": "^3.0.6",
     "@smithy/signature-v4": "^2.0.10",
@@ -993,6 +997,7 @@
     "@planetscale/database": "^1.8.0",
     "@qdrant/js-client-rest": "^1.2.0",
     "@raycast/api": "^1.55.2",
+    "@rockset/client": "^0.9.1",
     "@smithy/eventstream-codec": "^2.0.5",
     "@smithy/protocol-http": "^3.0.6",
     "@smithy/signature-v4": "^2.0.10",
@@ -1148,6 +1153,9 @@
     "@raycast/api": {
       "optional": true
     },
+    "@rockset/client": {
+      "optional": true
+    },
     "@smithy/eventstream-codec": {
       "optional": true
     },
@@ -1907,6 +1915,11 @@
       "import": "./vectorstores/redis.js",
       "require": "./vectorstores/redis.cjs"
     },
+    "./vectorstores/rockset": {
+      "types": "./vectorstores/rockset.d.ts",
+      "import": "./vectorstores/rockset.js",
+      "require": "./vectorstores/rockset.cjs"
+    },
     "./vectorstores/typesense": {
       "types": "./vectorstores/typesense.d.ts",
       "import": "./vectorstores/typesense.js",
diff --git a/langchain/scripts/check-tree-shaking.js b/langchain/scripts/check-tree-shaking.js
index 5fa95b186b99..106a43421af4 100644
--- a/langchain/scripts/check-tree-shaking.js
+++ b/langchain/scripts/check-tree-shaking.js
@@ -31,6 +31,7 @@ export function listExternals() {
     "convex",
     "convex/server",
     "convex/values",
+    "@rockset/client/dist/codegen/api.js",
     "mysql2/promise",
     "pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js",
     "@zilliz/milvus2-sdk-node/dist/milvus/const/Milvus.js",
diff --git a/langchain/scripts/create-entrypoints.js b/langchain/scripts/create-entrypoints.js
index 8d680b5097c4..2e098a0e9e81 100644
--- a/langchain/scripts/create-entrypoints.js
+++ b/langchain/scripts/create-entrypoints.js
@@ -119,6 +119,7 @@ const entrypoints = {
   "vectorstores/typeorm": "vectorstores/typeorm",
   "vectorstores/myscale": "vectorstores/myscale",
   "vectorstores/redis": "vectorstores/redis",
+  "vectorstores/rockset": "vectorstores/rockset",
   "vectorstores/typesense": "vectorstores/typesense",
   "vectorstores/singlestore": "vectorstores/singlestore",
   "vectorstores/tigris": "vectorstores/tigris",
@@ -393,6 +394,7 @@ const requiresOptionalDependency = [
   "vectorstores/myscale",
   "vectorstores/neo4j_vector",
   "vectorstores/redis",
+  "vectorstores/rockset",
   "vectorstores/singlestore",
   "vectorstores/typesense",
   "vectorstores/tigris",
diff --git a/langchain/src/load/import_constants.ts b/langchain/src/load/import_constants.ts
index d49c73496311..0663a2e468f2 100644
--- a/langchain/src/load/import_constants.ts
+++ b/langchain/src/load/import_constants.ts
@@ -65,6 +65,7 @@ export const optionalImportEntrypoints = [
   "langchain/vectorstores/typeorm",
   "langchain/vectorstores/myscale",
   "langchain/vectorstores/redis",
+  "langchain/vectorstores/rockset",
   "langchain/vectorstores/typesense",
   "langchain/vectorstores/singlestore",
   "langchain/vectorstores/tigris",
diff --git a/langchain/src/load/import_type.d.ts b/langchain/src/load/import_type.d.ts
index 04db2e05870f..8afff8e158cc 100644
--- a/langchain/src/load/import_type.d.ts
+++ b/langchain/src/load/import_type.d.ts
@@ -193,6 +193,9 @@ export interface OptionalImportMap {
   "langchain/vectorstores/redis"?:
     | typeof import("../vectorstores/redis.js")
     | Promise<typeof import("../vectorstores/redis.js")>;
+  "langchain/vectorstores/rockset"?:
+    | typeof import("../vectorstores/rockset.js")
+    | Promise<typeof import("../vectorstores/rockset.js")>;
   "langchain/vectorstores/typesense"?:
     | typeof import("../vectorstores/typesense.js")
     | Promise<typeof import("../vectorstores/typesense.js")>;
diff --git a/langchain/src/vectorstores/rockset.ts b/langchain/src/vectorstores/rockset.ts
new file mode 100644
index 000000000000..38a4f21dc5e3
--- /dev/null
+++ b/langchain/src/vectorstores/rockset.ts
@@ -0,0 +1,453 @@
+import { MainApi } from "@rockset/client";
+import type { CreateCollectionRequest } from "@rockset/client/dist/codegen/api.d.ts";
+import { Collection } from "@rockset/client/dist/codegen/api.js";
+
+import { VectorStore } from "./base.js";
+import { Embeddings } from "../embeddings/base.js";
+import { Document } from "../document.js";
+
+/**
+ * Generic Rockset vector storage error
+ */
+export class RocksetStoreError extends Error {
+  /**
+   * Constructs a RocksetStoreError
+   * @param message   The error message
+   */
+  constructor(message: string) {
+    super(message);
+    this.name = this.constructor.name;
+  }
+}
+
+/**
+ * Error that is thrown when a RocksetStore function is called
+ * after `destroy()` is called (meaning the collection would be
+ * deleted).
+ */
+export class RocksetStoreDestroyedError extends RocksetStoreError {
+  constructor() {
+    super("The Rockset store has been destroyed");
+    this.name = this.constructor.name;
+  }
+}
+
+/**
+ * Functions to measure vector distance/similarity by.
+ * See https://rockset.com/docs/vector-functions/#vector-distance-functions
+ * @enum SimilarityMetric
+ */
+export const SimilarityMetric = {
+  CosineSimilarity: "COSINE_SIM",
+  EuclideanDistance: "EUCLIDEAN_DIST",
+  DotProduct: "DOT_PRODUCT",
+} as const;
+
+export type SimilarityMetric =
+  (typeof SimilarityMetric)[keyof typeof SimilarityMetric];
+
+interface CollectionNotFoundError {
+  message_key: string;
+}
+
+/**
+ * Vector store arguments
+ * @interface RocksetStore
+ */
+export interface RocksetLibArgs {
+  /**
+   * The rockset client object constructed with `rocksetConfigure`
+   * @type {MainAPI}
+   */
+  client: MainApi;
+  /**
+   * The name of the Rockset collection to store vectors
+   * @type {string}
+   */
+  collectionName: string;
+  /**
+   * The name of othe Rockset workspace that holds @member collectionName
+   * @type {string}
+   */
+  workspaceName?: string;
+  /**
+   * The name of the collection column to contain page contnent of documents
+   * @type {string}
+   */
+  textKey?: string;
+  /**
+   * The name of the collection column to contain vectors
+   * @type {string}
+   */
+  embeddingKey?: string;
+  /**
+   * The SQL `WHERE` clause to filter by
+   * @type {string}
+   */
+  filter?: string;
+  /**
+   * The metric used to measure vector relationship
+   * @type {SimilarityMetric}
+   */
+  similarityMetric?: SimilarityMetric;
+}
+
+/**
+ * Exposes Rockset's vector store/search functionality
+ */
+export class RocksetStore extends VectorStore {
+  declare FilterType: string;
+
+  client: MainApi;
+
+  collectionName: string;
+
+  workspaceName: string;
+
+  textKey: string;
+
+  embeddingKey: string;
+
+  filter?: string;
+
+  private _similarityMetric: SimilarityMetric;
+
+  private similarityOrder: "ASC" | "DESC";
+
+  private destroyed: boolean;
+
+  /**
+   * Gets a string representation of the type of this VectorStore
+   * @returns {"rockset"}
+   */
+  _vectorstoreType(): "rockset" {
+    return "rockset";
+  }
+
+  /**
+   * Constructs a new RocksetStore
+   * @param {Embeddings} embeddings  Object used to embed queries and
+   *                                 page content
+   * @param {RocksetLibArgs} args
+   */
+  constructor(embeddings: Embeddings, args: RocksetLibArgs) {
+    super(embeddings, args);
+
+    this.embeddings = embeddings;
+    this.client = args.client;
+    this.collectionName = args.collectionName;
+    this.workspaceName = args.workspaceName ?? "commons";
+    this.textKey = args.textKey ?? "text";
+    this.embeddingKey = args.embeddingKey ?? "embedding";
+    this.filter = args.filter;
+    this.similarityMetric =
+      args.similarityMetric ?? SimilarityMetric.CosineSimilarity;
+    this.setSimilarityOrder();
+  }
+
+  /**
+   * Sets the object's similarity order based on what
+   * SimilarityMetric is being used
+   */
+  private setSimilarityOrder() {
+    this.checkIfDestroyed();
+    this.similarityOrder =
+      this.similarityMetric === SimilarityMetric.EuclideanDistance
+        ? "ASC"
+        : "DESC";
+  }
+
+  /**
+   * Embeds and adds Documents to the store.
+   * @param {Documents[]} documents  The documents to store
+   * @returns {Promise<string[]?>}   The _id's of the documents added
+   */
+  async addDocuments(documents: Document[]): Promise<string[] | undefined> {
+    const texts = documents.map(({ pageContent }) => pageContent);
+    return await this.addVectors(
+      await this.embeddings.embedDocuments(texts),
+      documents
+    );
+  }
+
+  /**
+   * Adds vectors to the store given their corresponding Documents
+   * @param {number[][]} vectors   The vectors to store
+   * @param {Document[]} documents The Documents they represent
+   * @return {Promise<string[]?>}  The _id's of the added documents
+   */
+  async addVectors(vectors: number[][], documents: Document[]) {
+    this.checkIfDestroyed();
+    const rocksetDocs = [];
+    for (let i = 0; i < documents.length; i += 1) {
+      const currDoc = documents[i];
+      const currVector = vectors[i];
+      rocksetDocs.push({
+        [this.textKey]: currDoc.pageContent,
+        [this.embeddingKey]: currVector,
+        ...currDoc.metadata,
+      });
+    }
+
+    return (
+      await this.client.documents.addDocuments(
+        this.workspaceName,
+        this.collectionName,
+        {
+          data: rocksetDocs,
+        }
+      )
+    ).data?.map((docStatus) => docStatus._id || "");
+  }
+
+  /**
+   * Deletes Rockset documements given their _id's
+   * @param {string[]} ids  The IDS to remove documents with
+   */
+  async delete(ids: string[]): Promise<void> {
+    this.checkIfDestroyed();
+    await this.client.documents.deleteDocuments(
+      this.workspaceName,
+      this.collectionName,
+      {
+        data: ids.map((id) => ({ _id: id })),
+      }
+    );
+  }
+
+  /**
+   * Gets the most relevant documents to a query along
+   * with their similarity score. The returned documents
+   * are ordered by similarity (most similar at the first
+   * index)
+   * @param {number[]} query  The embedded query to search
+   *                          the store by
+   * @param {number} k        The number of documents to retreive
+   * @param {string?} filter  The SQL `WHERE` clause to filter by
+   */
+  async similaritySearchVectorWithScore(
+    query: number[],
+    k: number,
+    filter?: string
+  ): Promise<[Document, number][]> {
+    this.checkIfDestroyed();
+    if (filter && this.filter) {
+      throw new RocksetStoreError(
+        "cannot provide both `filter` and `this.filter`"
+      );
+    }
+    const similarityKey = "similarity";
+    const _filter = filter ?? this.filter;
+    return (
+      (
+        await this.client.queries.query({
+          sql: {
+            query: `
+          SELECT
+            * EXCEPT("${this.embeddingKey}"),
+            "${this.textKey}",
+            ${this.similarityMetric}(:query, "${
+              this.embeddingKey
+            }") AS "${similarityKey}"
+          FROM 
+            "${this.workspaceName}"."${this.collectionName}"
+          ${_filter ? `WHERE ${_filter}` : ""}
+          ORDER BY
+            "${similarityKey}" ${this.similarityOrder}
+          LIMIT
+            ${k}
+        `,
+            parameters: [
+              {
+                name: "query",
+                type: "",
+                value: `[${query.toString()}]`,
+              },
+            ],
+          },
+        })
+      ).results?.map((rocksetDoc) => [
+        new Document<Record<string, object>>({
+          pageContent: rocksetDoc[this.textKey],
+          metadata: (({
+            [this.textKey]: t,
+            [similarityKey]: s,
+            ...rocksetDoc
+          }) => rocksetDoc)(rocksetDoc),
+        }),
+        rocksetDoc[similarityKey] as number,
+      ]) ?? []
+    );
+  }
+
+  /**
+   * Constructs and returns a RocksetStore object given texts to store.
+   * @param {string[]} texts               The texts to store
+   * @param {object[] | object} metadatas  The metadatas that correspond
+   *                                       to @param texts
+   * @param {Embeddings} embeddings        The object used to embed queries
+   *                                       and page content
+   * @param {RocksetLibArgs} dbConfig      The options to be passed into the
+   *                                       RocksetStore constructor
+   * @returns {RocksetStore}
+   */
+  static async fromTexts(
+    texts: string[],
+    metadatas: object[] | object,
+    embeddings: Embeddings,
+    dbConfig: RocksetLibArgs
+  ): Promise<RocksetStore> {
+    const docs: Document[] = [];
+    for (let i = 0; i < texts.length; i += 1) {
+      const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
+      const newDoc = new Document({
+        pageContent: texts[i],
+        metadata,
+      });
+      docs.push(newDoc);
+    }
+
+    return RocksetStore.fromDocuments(docs, embeddings, dbConfig);
+  }
+
+  /**
+   * Constructs, adds docs to, and returns a RocksetStore object
+   * @param {Document[]} docs          The Documents to store
+   * @param {Embeddings} embeddings    The object used to embed queries
+   *                                   and page content
+   * @param {RocksetLibArgs} dbConfig  The options to be passed into the
+   *                                   RocksetStore constructor
+   * @returns {RocksetStore}
+   */
+  static async fromDocuments(
+    docs: Document[],
+    embeddings: Embeddings,
+    dbConfig: RocksetLibArgs
+  ): Promise<RocksetStore> {
+    const args = { ...dbConfig, textKey: dbConfig.textKey ?? "text" };
+    const instance = new this(embeddings, args);
+    await instance.addDocuments(docs);
+    return instance;
+  }
+
+  /**
+   * Checks if a Rockset collection exists.
+   * @param {RocksetLibArgs} dbConfig  The object containing the collection
+   *                                   and workspace names
+   * @return {boolean}                 whether the collection exists
+   */
+  private static async collectionExists(dbConfig: RocksetLibArgs) {
+    try {
+      await dbConfig.client.collections.getCollection(
+        dbConfig.workspaceName ?? "commons",
+        dbConfig.collectionName
+      );
+    } catch (err) {
+      if (
+        (err as CollectionNotFoundError).message_key ===
+        "COLLECTION_DOES_NOT_EXIST"
+      ) {
+        return false;
+      }
+      throw err;
+    }
+    return true;
+  }
+
+  /**
+   * Checks whether a Rockset collection is ready to be queried.
+   * @param {RocksetLibArgs} dbConfig  The object containing the collection
+   *                                   name and workspace
+   * @return {boolean}                 whether the collection is ready
+   */
+  private static async collectionReady(dbConfig: RocksetLibArgs) {
+    return (
+      (
+        await dbConfig.client.collections.getCollection(
+          dbConfig.workspaceName ?? "commons",
+          dbConfig.collectionName
+        )
+      ).data?.status === Collection.StatusEnum.READY
+    );
+  }
+
+  /**
+   * Deletes the collection this RocksetStore uses
+   * @param {boolean?} waitUntilDeletion  Whether to sleep until the
+   *                                      collection is ready to be
+   *                                      queried
+   */
+  async destroy(waitUntilDeletion?: boolean) {
+    await this.client.collections.deleteCollection(
+      this.workspaceName,
+      this.collectionName
+    );
+    this.destroyed = true;
+    if (waitUntilDeletion) {
+      while (
+        await RocksetStore.collectionExists({
+          collectionName: this.collectionName,
+          client: this.client,
+        })
+      );
+    }
+  }
+
+  /**
+   * Checks if this RocksetStore has been destroyed.
+   * @throws {RocksetStoreDestroyederror} if it has.
+   */
+  private checkIfDestroyed() {
+    if (this.destroyed) {
+      throw new RocksetStoreDestroyedError();
+    }
+  }
+
+  /**
+   * Creates a new Rockset collection and returns a RocksetStore that
+   * uses it
+   * @param {Embeddings} embeddings    Object used to embed queries and
+   *                                   page content
+   * @param {RocksetLibArgs} dbConfig  The options to be passed into the
+   *                                   RocksetStore constructor
+   * @param {CreateCollectionRequest?} collectionOptions  The arguments to sent with the
+   *                                                      HTTP request when creating the
+   *                                                      collection. Setting a field mapping
+   *                                                      that `VECTOR_ENFORCE`s is recommended
+   *                                                      when using this function. See
+   *                                                      https://rockset.com/docs/vector-functions/#vector_enforce
+   * @returns {RocsketStore}
+   */
+  static async withNewCollection(
+    embeddings: Embeddings,
+    dbConfig: RocksetLibArgs,
+    collectionOptions?: CreateCollectionRequest
+  ): Promise<RocksetStore> {
+    if (
+      collectionOptions?.name &&
+      dbConfig.collectionName !== collectionOptions?.name
+    ) {
+      throw new RocksetStoreError(
+        "`dbConfig.name` and `collectionOptions.name` do not match"
+      );
+    }
+    await dbConfig.client.collections.createCollection(
+      dbConfig.workspaceName ?? "commons",
+      collectionOptions || { name: dbConfig.collectionName }
+    );
+    while (
+      !(await this.collectionExists(dbConfig)) ||
+      !(await this.collectionReady(dbConfig))
+    );
+    return new this(embeddings, dbConfig);
+  }
+
+  public get similarityMetric() {
+    return this._similarityMetric;
+  }
+
+  public set similarityMetric(metric: SimilarityMetric) {
+    this._similarityMetric = metric;
+    this.setSimilarityOrder();
+  }
+}
diff --git a/langchain/src/vectorstores/tests/rockset.int.test.ts b/langchain/src/vectorstores/tests/rockset.int.test.ts
new file mode 100644
index 000000000000..88d93bac8283
--- /dev/null
+++ b/langchain/src/vectorstores/tests/rockset.int.test.ts
@@ -0,0 +1,89 @@
+/* eslint-disable no-process-env */
+/* eslint-disable @typescript-eslint/no-non-null-assertion */
+import rockset from "@rockset/client";
+import { test, expect } from "@jest/globals";
+import { OpenAIEmbeddings } from "../../embeddings/openai.js";
+import { RocksetStore, SimilarityMetric } from "../rockset.js";
+import { Document } from "../../document.js";
+import { formatDocumentsAsString } from "../../util/document.js";
+
+const getPageContents = formatDocumentsAsString;
+
+const embeddings = new OpenAIEmbeddings();
+let store: RocksetStore | undefined;
+
+const docs = [
+  new Document({
+    pageContent: "Tomatoes are red.",
+    metadata: { subject: "tomatoes" },
+  }),
+  new Document({
+    pageContent: "Tomatoes are small.",
+    metadata: { subject: "tomatoes" },
+  }),
+  new Document({
+    pageContent: "Apples are juicy.",
+    metadata: { subject: "apples" },
+  }),
+];
+
+test.skip("create new collection as a RocksetVectorStore", async () => {
+  store = await RocksetStore.withNewCollection(embeddings, {
+    collectionName: "langchain_demo",
+    client: rockset.default(
+      process.env.ROCKSET_API_KEY ?? "",
+      `https://api.${process.env.ROCKSET_API_REGION ?? "usw2a1"}.rockset.com`
+    ),
+  });
+});
+
+test.skip("add to RocksetVectorStore", async () => {
+  expect(store).toBeDefined();
+  expect((await store!.addDocuments(docs))?.length).toBe(docs.length);
+});
+
+test.skip("query RocksetVectorStore with cosine sim", async () => {
+  expect(store).toBeDefined();
+  const relevantDocs = await store!.similaritySearch(
+    "What color are tomatoes?"
+  );
+  expect(getPageContents(relevantDocs)).toEqual(getPageContents(relevantDocs));
+});
+
+test.skip("query RocksetVectorStore with dot product", async () => {
+  expect(store).toBeDefined();
+  store!.similarityMetric = SimilarityMetric.DotProduct;
+  const relevantDocs = await store!.similaritySearch(
+    "What color are tomatoes?"
+  );
+  expect(getPageContents(relevantDocs)).toEqual(getPageContents(relevantDocs));
+});
+
+test.skip("query RocksetVectorStore with euclidean distance", async () => {
+  expect(store).toBeDefined();
+  store!.similarityMetric = SimilarityMetric.EuclideanDistance;
+  const relevantDocs = await store!.similaritySearch(
+    "What color are tomatoes?"
+  );
+  expect(getPageContents(relevantDocs)).toEqual(getPageContents(relevantDocs));
+});
+
+test.skip("query RocksetVectorStore with metadata filter", async () => {
+  expect(store).toBeDefined();
+  const relevantDocs = await store!.similaritySearch(
+    "What color are tomatoes?",
+    undefined,
+    "subject='apples'"
+  );
+  expect(relevantDocs.length).toBe(1);
+  expect(getPageContents(relevantDocs)).toEqual(getPageContents([docs[2]]));
+});
+
+test.skip("query RocksetVectorStore with k", async () => {
+  expect(store).toBeDefined();
+  const relevantDocs = await store!.similaritySearch(
+    "What color are tomatoes?",
+    1
+  );
+  expect(relevantDocs.length).toBe(1);
+});
diff --git a/yarn.lock b/yarn.lock
index d93ca570c2e6..5aadd9a48b25 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -8918,6 +8918,18 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@rockset/client@npm:^0.9.1":
+  version: 0.9.1
+  resolution: "@rockset/client@npm:0.9.1"
+  dependencies:
+    "@types/node-fetch": ^2.5.3
+    fetch-ponyfill: ^7.1.0
+    node-fetch: ^2.6.7
+    url: ^0.11.0
+  checksum: eed056b229d47ba0de4e092c1157061c0602ae62f9a650a8a089a1977a86f40a9465d8286ca5fc7075d315f0444d60ea13d39e39934373472e7dc9c72f6375fc
+  languageName: node
+  linkType: hard
+
 "@rushstack/eslint-patch@npm:^1.3.3":
   version: 1.5.1
   resolution: "@rushstack/eslint-patch@npm:1.5.1"
@@ -11427,6 +11439,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/node-fetch@npm:^2.5.3":
+  version: 2.6.9
+  resolution: "@types/node-fetch@npm:2.6.9"
+  dependencies:
+    "@types/node": "*"
+    form-data: ^4.0.0
+  checksum: 212269aff4b251477c13c33cee6cea23e4fd630be6c0bfa3714968cce7efd7055b52f2f82aab3394596d8c758335cc802e7c5fa3f775e7f2a472fa914c90dc15
+  languageName: node
+  linkType: hard
+
 "@types/node@npm:*":
   version: 18.14.6
   resolution: "@types/node@npm:18.14.6"
@@ -17693,6 +17715,7 @@ __metadata:
     "@planetscale/database": ^1.8.0
     "@prisma/client": ^4.11.0
     "@raycast/api": ^1.55.2
+    "@rockset/client": ^0.9.1
     "@supabase/supabase-js": ^2.10.0
     "@tensorflow/tfjs-backend-cpu": ^4.4.0
     "@tsconfig/recommended": ^1.0.2
@@ -18166,6 +18189,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"fetch-ponyfill@npm:^7.1.0":
+  version: 7.1.0
+  resolution: "fetch-ponyfill@npm:7.1.0"
+  dependencies:
+    node-fetch: ~2.6.1
+  checksum: 7fd497dd5f7db890e80193de5bc1cd0115a62400272cd9a992849288e66886fcdb0724ea1ed161be7b8db2daeafda8c58d0259acdda42d6561155dbcdbb0720a
+  languageName: node
+  linkType: hard
+
 "fflate@npm:^0.4.1":
   version: 0.4.8
   resolution: "fflate@npm:0.4.8"
@@ -22228,6 +22260,7 @@ __metadata:
     "@planetscale/database": ^1.8.0
     "@qdrant/js-client-rest": ^1.2.0
     "@raycast/api": ^1.55.2
+    "@rockset/client": ^0.9.1
     "@smithy/eventstream-codec": ^2.0.5
     "@smithy/protocol-http": ^3.0.6
     "@smithy/signature-v4": ^2.0.10
@@ -22383,6 +22416,7 @@ __metadata:
     "@planetscale/database": ^1.8.0
     "@qdrant/js-client-rest": ^1.2.0
     "@raycast/api": ^1.55.2
+    "@rockset/client": ^0.9.1
     "@smithy/eventstream-codec": ^2.0.5
     "@smithy/protocol-http": ^3.0.6
     "@smithy/signature-v4": ^2.0.10
@@ -22509,6 +22543,8 @@ __metadata:
       optional: true
     "@raycast/api":
       optional: true
+    "@rockset/client":
+      optional: true
     "@smithy/eventstream-codec":
       optional: true
     "@smithy/protocol-http":
@@ -26835,7 +26871,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"punycode@npm:^1.3.2":
+"punycode@npm:^1.3.2, punycode@npm:^1.4.1":
   version: 1.4.1
   resolution: "punycode@npm:1.4.1"
   checksum: fa6e698cb53db45e4628559e557ddaf554103d2a96a1d62892c8f4032cd3bc8871796cae9eabc1bc700e2b6677611521ce5bb1d9a27700086039965d0cf34518
@@ -26927,7 +26963,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"qs@npm:^6.7.0":
+"qs@npm:^6.11.2, qs@npm:^6.7.0":
   version: 6.11.2
   resolution: "qs@npm:6.11.2"
   dependencies:
@@ -30994,6 +31030,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"url@npm:^0.11.0":
+  version: 0.11.3
+  resolution: "url@npm:0.11.3"
+  dependencies:
+    punycode: ^1.4.1
+    qs: ^6.11.2
+  checksum: f9e7886f46a16f96d2e42fbcc5d682c231c55ef5442c1ff66150c0f6556f6e3a97d094a84f51be15ec2432711d212eb60426659ce418f5fcadeaa3f601532c4e
+  languageName: node
+  linkType: hard
+
 "use-composed-ref@npm:^1.3.0":
   version: 1.3.0
   resolution: "use-composed-ref@npm:1.3.0"