From 82908e4096cb2ccefb262c0c3347be8d16568881 Mon Sep 17 00:00:00 2001
From: Henry Heng <hzj94@hotmail.com>
Date: Mon, 22 Apr 2024 21:15:34 +0100
Subject: [PATCH 01/18] google-genai[patch]: update google/generative-ai
 version (#5115)

* update google/generative-ai version

* update leading string check gemini-1.5

* Format

---------

Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
---
 examples/package.json                         |  2 +-
 libs/langchain-google-genai/package.json      |  2 +-
 .../langchain-google-genai/src/chat_models.ts |  2 +-
 libs/langchain-google-genai/src/utils.ts      |  9 +++++++--
 yarn.lock                                     | 19 ++++++-------------
 5 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/examples/package.json b/examples/package.json
index 7225d3940ca0..62a828548b2a 100644
--- a/examples/package.json
+++ b/examples/package.json
@@ -29,7 +29,7 @@
     "@getmetal/metal-sdk": "^4.0.0",
     "@getzep/zep-js": "^0.9.0",
     "@gomomento/sdk": "^1.51.1",
-    "@google/generative-ai": "^0.1.0",
+    "@google/generative-ai": "^0.7.0",
     "@langchain/anthropic": "workspace:*",
     "@langchain/azure-openai": "workspace:*",
     "@langchain/cloudflare": "workspace:*",
diff --git a/libs/langchain-google-genai/package.json b/libs/langchain-google-genai/package.json
index aeef9d7f8912..aa89ae872530 100644
--- a/libs/langchain-google-genai/package.json
+++ b/libs/langchain-google-genai/package.json
@@ -39,7 +39,7 @@
   "author": "LangChain",
   "license": "MIT",
   "dependencies": {
-    "@google/generative-ai": "^0.1.3",
+    "@google/generative-ai": "^0.7.0",
     "@langchain/core": "~0.1.5"
   },
   "devDependencies": {
diff --git a/libs/langchain-google-genai/src/chat_models.ts b/libs/langchain-google-genai/src/chat_models.ts
index 5e62472c4c48..fca4a2be5898 100644
--- a/libs/langchain-google-genai/src/chat_models.ts
+++ b/libs/langchain-google-genai/src/chat_models.ts
@@ -186,7 +186,7 @@ export class ChatGoogleGenerativeAI
   private client: GenerativeModel;
 
   get _isMultimodalModel() {
-    return this.model.includes("vision");
+    return this.model.includes("vision") || this.model.startsWith("gemini-1.5");
   }
 
   constructor(fields?: GoogleGenerativeAIChatInput) {
diff --git a/libs/langchain-google-genai/src/utils.ts b/libs/langchain-google-genai/src/utils.ts
index 509359ed9790..6cbe4a1b94b5 100644
--- a/libs/langchain-google-genai/src/utils.ts
+++ b/libs/langchain-google-genai/src/utils.ts
@@ -67,10 +67,15 @@ export function convertMessageContentToParts(
       if (!isMultimodalModel) {
         throw new Error(`This model does not support images`);
       }
-      if (typeof c.image_url !== "string") {
+      let source;
+      if (typeof c.image_url === "string") {
+        source = c.image_url;
+      } else if (typeof c.image_url === "object" && "url" in c.image_url) {
+        source = c.image_url.url;
+      } else {
         throw new Error("Please provide image as base64 encoded data URL");
       }
-      const [dm, data] = c.image_url.split(",");
+      const [dm, data] = source.split(",");
       if (!dm.startsWith("data:")) {
         throw new Error("Please provide image as base64 encoded data URL");
       }
diff --git a/yarn.lock b/yarn.lock
index 7db4a01f6c6a..dfa1792aa94d 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -8124,17 +8124,10 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@google/generative-ai@npm:^0.1.0":
-  version: 0.1.0
-  resolution: "@google/generative-ai@npm:0.1.0"
-  checksum: 80a3eebcd831ad894e6c7a828a0519c7974eee10aa5c765aa837dc7dab36017e5dc3a08470acc4955a5b44013b77358760d5b197e83016f416d1c3b39d265a79
-  languageName: node
-  linkType: hard
-
-"@google/generative-ai@npm:^0.1.3":
-  version: 0.1.3
-  resolution: "@google/generative-ai@npm:0.1.3"
-  checksum: 6ab4e214c5f792c9dce66aa00268dd75295be093ec9305ccb8e2251210e5a6680a17ec9f041d8108ee3a2ce49e2f26bc9a30ef97e17a2d83818a92a824f6efd1
+"@google/generative-ai@npm:^0.7.0":
+  version: 0.7.1
+  resolution: "@google/generative-ai@npm:0.7.1"
+  checksum: 536c7c75545c93731f0ab1fa9be6c88c64ead6ab6b24e70763e592e163041444f9ae78e2095019cd0e27fc18cbdc1ecaf1fdfd3561ca0a61577f720ddbaba1f2
   languageName: node
   linkType: hard
 
@@ -9496,7 +9489,7 @@ __metadata:
   version: 0.0.0-use.local
   resolution: "@langchain/google-genai@workspace:libs/langchain-google-genai"
   dependencies:
-    "@google/generative-ai": ^0.1.3
+    "@google/generative-ai": ^0.7.0
     "@jest/globals": ^29.5.0
     "@langchain/core": ~0.1.5
     "@langchain/scripts": ~0.0
@@ -21723,7 +21716,7 @@ __metadata:
     "@getmetal/metal-sdk": ^4.0.0
     "@getzep/zep-js": ^0.9.0
     "@gomomento/sdk": ^1.51.1
-    "@google/generative-ai": ^0.1.0
+    "@google/generative-ai": ^0.7.0
     "@langchain/anthropic": "workspace:*"
     "@langchain/azure-openai": "workspace:*"
     "@langchain/cloudflare": "workspace:*"

From 93070985a4f21d32a1a7cd8d5dc9c3902cf1d6d4 Mon Sep 17 00:00:00 2001
From: Mohammed Bilal Shareef <bilalshareef.h@gmail.com>
Date: Tue, 23 Apr 2024 01:48:32 +0530
Subject: [PATCH 02/18] community[patch]: Invoke toTitleCase only when string
 is present to avoid errors (#5145)

* Invoke toTitleCase only when string is present to avoid errors

* fix: prettier formatting issues

---------

Co-authored-by: Brace Sproul <braceasproul@gmail.com>
---
 .../src/experimental/graph_transformers/llm.ts         | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/libs/langchain-community/src/experimental/graph_transformers/llm.ts b/libs/langchain-community/src/experimental/graph_transformers/llm.ts
index fdb6ed75d127..41167e09ad6f 100644
--- a/libs/langchain-community/src/experimental/graph_transformers/llm.ts
+++ b/libs/langchain-community/src/experimental/graph_transformers/llm.ts
@@ -129,7 +129,7 @@ function createSchema(allowedNodes: string[], allowedRelationships: string[]) {
 function mapToBaseNode(node: any): Node {
   return new Node({
     id: node.id,
-    type: toTitleCase(node.type || ""),
+    type: node.type ? toTitleCase(node.type) : "",
   });
 }
 
@@ -138,11 +138,15 @@ function mapToBaseRelationship(relationship: any): Relationship {
   return new Relationship({
     source: new Node({
       id: relationship.sourceNodeId,
-      type: toTitleCase(relationship.sourceNodeType || ""),
+      type: relationship.sourceNodeType
+        ? toTitleCase(relationship.sourceNodeType)
+        : "",
     }),
     target: new Node({
       id: relationship.targetNodeId,
-      type: toTitleCase(relationship.targetNodeType || ""),
+      type: relationship.targetNodeType
+        ? toTitleCase(relationship.targetNodeType)
+        : "",
     }),
     type: relationship.relationshipType.replace(" ", "_").toUpperCase(),
   });

From ed05a2fd85c5f2d11b89f5d83c7bb8c21f021d20 Mon Sep 17 00:00:00 2001
From: Jacob Lee <jacoblee93@gmail.com>
Date: Mon, 22 Apr 2024 14:19:34 -0700
Subject: [PATCH 03/18] google-genai[patch]: Release 0.0.12 (#5174)

---
 libs/langchain-google-genai/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/langchain-google-genai/package.json b/libs/langchain-google-genai/package.json
index aa89ae872530..02c10361b130 100644
--- a/libs/langchain-google-genai/package.json
+++ b/libs/langchain-google-genai/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@langchain/google-genai",
-  "version": "0.0.11",
+  "version": "0.0.12",
   "description": "Sample integration for LangChain.js",
   "type": "module",
   "engines": {

From 99bfed20382d32fbf70aecede3cc305479ef470d Mon Sep 17 00:00:00 2001
From: Sam Trost <trost.sam@gmail.com>
Date: Mon, 22 Apr 2024 17:51:44 -0400
Subject: [PATCH 04/18] community[patch]: allow dynamic opensearch vector,
 text, and metadata field names (#5165)

* allow dynamic opensearch vector, text, and metadata field names

* Fix typing

---------

Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
---
 .../src/vectorstores/opensearch.ts            | 36 ++++++++++++-------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/libs/langchain-community/src/vectorstores/opensearch.ts b/libs/langchain-community/src/vectorstores/opensearch.ts
index 79c7360d010b..2f434928e245 100644
--- a/libs/langchain-community/src/vectorstores/opensearch.ts
+++ b/libs/langchain-community/src/vectorstores/opensearch.ts
@@ -29,6 +29,9 @@ interface VectorSearchOptions {
  */
 export interface OpenSearchClientArgs {
   readonly client: Client;
+  readonly vectorFieldName?: string;
+  readonly textFieldName?: string;
+  readonly metadataFieldName?: string;
   readonly service?: "es" | "aoss";
   readonly indexName?: string;
 
@@ -91,6 +94,12 @@ export class OpenSearchVectorStore extends VectorStore {
 
   private readonly m: number;
 
+  private readonly vectorFieldName: string;
+
+  private readonly textFieldName: string;
+
+  private readonly metadataFieldName: string;
+
   _vectorstoreType(): string {
     return "opensearch";
   }
@@ -105,6 +114,9 @@ export class OpenSearchVectorStore extends VectorStore {
     this.efSearch = args.vectorSearchOptions?.efSearch ?? 512;
     this.numberOfShards = args.vectorSearchOptions?.numberOfShards ?? 5;
     this.numberOfReplicas = args.vectorSearchOptions?.numberOfReplicas ?? 1;
+    this.vectorFieldName = args.vectorFieldName ?? "embedding";
+    this.textFieldName = args.textFieldName ?? "text";
+    this.metadataFieldName = args.metadataFieldName ?? "metadata";
 
     this.client = args.client;
     this.indexName = args.indexName ?? "documents";
@@ -161,9 +173,9 @@ export class OpenSearchVectorStore extends VectorStore {
           },
         },
         {
-          embedding,
-          metadata: documents[idx].metadata,
-          text: documents[idx].pageContent,
+          [this.vectorFieldName]: embedding,
+          [this.textFieldName]: documents[idx].pageContent,
+          [this.metadataFieldName]: documents[idx].metadata,
         },
       ];
 
@@ -204,7 +216,7 @@ export class OpenSearchVectorStore extends VectorStore {
             must: [
               {
                 knn: {
-                  embedding: { vector: query, k },
+                  [this.vectorFieldName]: { vector: query, k },
                 },
               },
             ],
@@ -219,8 +231,8 @@ export class OpenSearchVectorStore extends VectorStore {
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
     return body.hits.hits.map((hit: any) => [
       new Document({
-        pageContent: hit._source.text,
-        metadata: hit._source.metadata,
+        pageContent: hit._source[this.textFieldName],
+        metadata: hit._source[this.metadataFieldName],
       }),
       hit._score,
     ]);
@@ -306,22 +318,22 @@ export class OpenSearchVectorStore extends VectorStore {
         dynamic_templates: [
           {
             // map all metadata properties to be keyword
-            "metadata.*": {
+            [`${this.metadataFieldName}.*`]: {
               match_mapping_type: "string",
               mapping: { type: "keyword" },
             },
           },
           {
-            "metadata.loc": {
+            [`${this.metadataFieldName}.loc`]: {
               match_mapping_type: "object",
               mapping: { type: "object" },
             },
           },
         ],
         properties: {
-          text: { type: "text" },
-          metadata: { type: "object" },
-          embedding: {
+          [this.textFieldName]: { type: "text" },
+          [this.metadataFieldName]: { type: "object" },
+          [this.vectorFieldName]: {
             type: "knn_vector",
             dimension,
             method: {
@@ -373,7 +385,7 @@ export class OpenSearchVectorStore extends VectorStore {
     const must = [];
     const must_not = [];
     for (const [key, value] of Object.entries(filter)) {
-      const metadataKey = `metadata.${key}`;
+      const metadataKey = `${this.metadataFieldName}.${key}`;
       if (value) {
         if (typeof value === "object" && !Array.isArray(value)) {
           if ("exists" in value) {

From fcb2797eccab0d462e3262e6933beebdfede1373 Mon Sep 17 00:00:00 2001
From: Anush <anushshetty90@gmail.com>
Date: Tue, 23 Apr 2024 03:24:10 +0530
Subject: [PATCH 05/18] refactor: configurable keys Qdrant (#5172)

---
 .../docs/integrations/vectorstores/qdrant.mdx |  4 ----
 .../src/vectorstores/qdrant.ts                | 22 +++++++++++++++----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx b/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx
index 2d0467357d05..1c616c2cf940 100644
--- a/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx
+++ b/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx
@@ -6,10 +6,6 @@ sidebar_class_name: node-only
 
 [Qdrant](https://qdrant.tech/) is a vector similarity search engine. It provides a production-ready service with a convenient API to store, search, and manage points - vectors with an additional payload.
 
-:::tip Compatibility
-Only available on Node.js.
-:::
-
 ## Setup
 
 1. Run a Qdrant instance with Docker on your computer by following the [Qdrant setup instructions](https://qdrant.tech/documentation/install/).
diff --git a/libs/langchain-community/src/vectorstores/qdrant.ts b/libs/langchain-community/src/vectorstores/qdrant.ts
index 7925c09bdae1..e2b6d5bc8024 100644
--- a/libs/langchain-community/src/vectorstores/qdrant.ts
+++ b/libs/langchain-community/src/vectorstores/qdrant.ts
@@ -6,6 +6,9 @@ import { VectorStore } from "@langchain/core/vectorstores";
 import { Document } from "@langchain/core/documents";
 import { getEnvironmentVariable } from "@langchain/core/utils/env";
 
+const CONTENT_KEY = "content";
+const METADATA_KEY = "metadata";
+
 /**
  * Interface for the arguments that can be passed to the
  * `QdrantVectorStore` constructor. It includes options for specifying a
@@ -20,6 +23,8 @@ export interface QdrantLibArgs {
   collectionConfig?: QdrantSchemas["CreateCollection"];
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
   customPayload?: Record<string, any>[];
+  contentPayloadKey?: string;
+  metadataPayloadKey?: string;
 }
 
 export type QdrantAddDocumentOptions = {
@@ -59,6 +64,10 @@ export class QdrantVectorStore extends VectorStore {
 
   collectionConfig?: QdrantSchemas["CreateCollection"];
 
+  contentPayloadKey: string;
+
+  metadataPayloadKey: string;
+
   _vectorstoreType(): string {
     return "qdrant";
   }
@@ -83,6 +92,10 @@ export class QdrantVectorStore extends VectorStore {
     this.collectionName = args.collectionName ?? "documents";
 
     this.collectionConfig = args.collectionConfig;
+
+    this.contentPayloadKey = args.contentPayloadKey ?? CONTENT_KEY;
+
+    this.metadataPayloadKey = args.metadataPayloadKey ?? METADATA_KEY;
   }
 
   /**
@@ -129,8 +142,8 @@ export class QdrantVectorStore extends VectorStore {
       id: uuid(),
       vector: embedding,
       payload: {
-        content: documents[idx].pageContent,
-        metadata: documents[idx].metadata,
+        [this.contentPayloadKey]: documents[idx].pageContent,
+        [this.metadataPayloadKey]: documents[idx].metadata,
         customPayload: documentOptions?.customPayload[idx],
       },
     }));
@@ -181,8 +194,9 @@ export class QdrantVectorStore extends VectorStore {
       results as QdrantSearchResponse[]
     ).map((res) => [
       new Document({
-        metadata: res.payload.metadata,
-        pageContent: res.payload.content,
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        metadata: res.payload[this.metadataPayloadKey] as Record<string, any>,
+        pageContent: res.payload[this.contentPayloadKey] as string,
       }),
       res.score,
     ]);

From b0c383dfa9391b55bd296cbc3fd35065d3d381fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Mon, 22 Apr 2024 23:54:25 +0200
Subject: [PATCH 06/18] community[patch]: AstraDB: add option to skip create
 collection call (#5170)

* community[feat]: AstraDB: add option to skip create collection call

* test
---
 .../src/vectorstores/astradb.ts               | 20 ++++++++++---
 .../vectorstores/tests/astradb.int.test.ts    | 29 +++++++++++++++++++
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/libs/langchain-community/src/vectorstores/astradb.ts b/libs/langchain-community/src/vectorstores/astradb.ts
index 60f3d66d3b56..cffe092cfaee 100644
--- a/libs/langchain-community/src/vectorstores/astradb.ts
+++ b/libs/langchain-community/src/vectorstores/astradb.ts
@@ -29,6 +29,7 @@ export interface AstraLibArgs extends AsyncCallerParams {
   namespace?: string;
   idKey?: string;
   contentKey?: string;
+  skipCollectionProvisioning?: boolean;
   collectionOptions?: CreateCollectionOptions<any>;
   batchSize?: number;
 }
@@ -56,6 +57,8 @@ export class AstraDBVectorStore extends VectorStore {
 
   caller: AsyncCaller;
 
+  private readonly skipCollectionProvisioning: boolean;
+
   _vectorstoreType(): string {
     return "astradb";
   }
@@ -72,6 +75,7 @@ export class AstraDBVectorStore extends VectorStore {
       idKey,
       contentKey,
       batchSize,
+      skipCollectionProvisioning,
       ...callerArgs
     } = args;
     const dataAPIClient = new DataAPIClient(token, { caller: ["langchainjs"] });
@@ -91,6 +95,12 @@ export class AstraDBVectorStore extends VectorStore {
     this.contentKey = contentKey ?? "text";
     this.batchSize = batchSize && batchSize <= 20 ? batchSize : 20;
     this.caller = new AsyncCaller(callerArgs);
+    this.skipCollectionProvisioning = skipCollectionProvisioning ?? false;
+    if (this.skipCollectionProvisioning && this.collectionOptions) {
+      throw new Error(
+        "If 'skipCollectionProvisioning' has been set to true, 'collectionOptions' must not be defined"
+      );
+    }
   }
 
   /**
@@ -100,10 +110,12 @@ export class AstraDBVectorStore extends VectorStore {
    * @returns Promise that resolves if connected to the collection.
    */
   async initialize(): Promise<void> {
-    await this.astraDBClient.createCollection(
-      this.collectionName,
-      this.collectionOptions
-    );
+    if (!this.skipCollectionProvisioning) {
+      await this.astraDBClient.createCollection(
+        this.collectionName,
+        this.collectionOptions
+      );
+    }
     this.collection = await this.astraDBClient.collection(this.collectionName);
     console.debug("Connected to Astra DB collection");
   }
diff --git a/libs/langchain-community/src/vectorstores/tests/astradb.int.test.ts b/libs/langchain-community/src/vectorstores/tests/astradb.int.test.ts
index 16e41f1a473c..c13555ebd22b 100644
--- a/libs/langchain-community/src/vectorstores/tests/astradb.int.test.ts
+++ b/libs/langchain-community/src/vectorstores/tests/astradb.int.test.ts
@@ -163,4 +163,33 @@ describe.skip("AstraDBVectorStore", () => {
       );
     }
   }, 60000);
+
+  test("skipCollectionProvisioning", async () => {
+    let store = new AstraDBVectorStore(new FakeEmbeddings(), {
+      ...astraConfig,
+      skipCollectionProvisioning: true,
+      collectionOptions: undefined,
+    });
+    await store.initialize();
+    try {
+      await store.similaritySearch("test");
+      fail("Should have thrown error");
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    } catch (e: any) {
+      expect(e.message).toContain("'default_keyspace.langchain_test'");
+    }
+    store = new AstraDBVectorStore(new FakeEmbeddings(), {
+      ...astraConfig,
+      skipCollectionProvisioning: false,
+      collectionOptions: {
+        checkExists: false,
+        vector: {
+          dimension: 4,
+          metric: "cosine",
+        },
+      },
+    });
+    await store.initialize();
+    await store.similaritySearch("test");
+  });
 });

From 137a3be855716d33601eb3e93e44d59b423b0533 Mon Sep 17 00:00:00 2001
From: davidfant <david@fant.io>
Date: Mon, 22 Apr 2024 23:16:17 +0100
Subject: [PATCH 07/18] langchain[patch]: Parallelize applyEvaluators used by
 `runOnDataset` (#5127)

* parallelize applyEvaluators

* Add concurrency with AsyncCaller

---------

Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
---
 langchain/src/smith/runner_utils.ts | 85 +++++++++++++++++++----------
 1 file changed, 56 insertions(+), 29 deletions(-)

diff --git a/langchain/src/smith/runner_utils.ts b/langchain/src/smith/runner_utils.ts
index c6d013438250..4377ec7a9523 100644
--- a/langchain/src/smith/runner_utils.ts
+++ b/langchain/src/smith/runner_utils.ts
@@ -10,6 +10,7 @@ import {
 import { LangChainTracer } from "@langchain/core/tracers/tracer_langchain";
 import { BaseTracer } from "@langchain/core/tracers/base";
 import { ChainValues } from "@langchain/core/utils/types";
+import { AsyncCaller } from "@langchain/core/utils/async_caller";
 import {
   Client,
   Example,
@@ -522,46 +523,71 @@ const applyEvaluators = async ({
   runs,
   examples,
   client,
+  maxConcurrency,
 }: {
   evaluation: LoadedEvalConfig;
   runs: Run[];
   examples: Example[];
   client: Client;
-}) => {
+  maxConcurrency: number;
+}): Promise<{
+  [key: string]: {
+    execution_time?: number;
+    run_id: string;
+    feedback: Feedback[];
+  };
+}> => {
   // TODO: Parallelize and/or put in callbacks to speed up evals.
   const { evaluators } = evaluation;
   const progress = new ProgressBar({
     total: examples.length,
     format: "Running Evaluators: {bar} {percentage}% | {value}/{total}\n",
   });
-  const results: Record<
-    string,
-    { run_id: string; execution_time?: number; feedback: Feedback[] }
-  > = {};
-  for (let i = 0; i < runs.length; i += 1) {
-    const run = runs[i];
-    const example = examples[i];
-    const evaluatorResults = await Promise.allSettled(
-      evaluators.map((evaluator) =>
-        client.evaluateRun(run, evaluator, {
-          referenceExample: example,
-          loadChildRuns: false,
-        })
-      )
-    );
-    progress.increment();
-    results[example.id] = {
-      execution_time:
-        run?.end_time && run.start_time
-          ? run.end_time - run.start_time
-          : undefined,
-      feedback: evaluatorResults.map((evalResult) =>
-        evalResult.status === "fulfilled" ? evalResult.value : evalResult.reason
-      ),
-      run_id: run.id,
-    };
-  }
-  return results;
+  const caller = new AsyncCaller({
+    maxConcurrency,
+  });
+  const requests = runs.map(
+    async (
+      run,
+      i
+    ): Promise<{
+      run_id: string;
+      execution_time?: number;
+      feedback: Feedback[];
+    }> =>
+      caller.call(async () => {
+        const evaluatorResults = await Promise.allSettled(
+          evaluators.map((evaluator) =>
+            client.evaluateRun(run, evaluator, {
+              referenceExample: examples[i],
+              loadChildRuns: false,
+            })
+          )
+        );
+        progress.increment();
+        return {
+          execution_time:
+            run?.end_time && run.start_time
+              ? run.end_time - run.start_time
+              : undefined,
+          feedback: evaluatorResults.map((evalResult) =>
+            evalResult.status === "fulfilled"
+              ? evalResult.value
+              : evalResult.reason
+          ),
+          run_id: run.id,
+        };
+      })
+  );
+  const results = await Promise.all(requests);
+
+  return results.reduce(
+    (acc, result, i) => ({
+      ...acc,
+      [examples[i].id]: result,
+    }),
+    {}
+  );
 };
 
 export type EvalResults = {
@@ -733,6 +759,7 @@ export async function runOnDataset(
       runs,
       examples,
       client: testClient,
+      maxConcurrency: testConcurrency,
     });
   }
   const results: EvalResults = {

From 81a5195c742865a2c6fda4a862221a1e2df8c0cd Mon Sep 17 00:00:00 2001
From: Katarina Supe <61758502+katarinasupe@users.noreply.github.com>
Date: Tue, 23 Apr 2024 00:16:32 +0200
Subject: [PATCH 08/18] docs[patch]: Update Memgraph docs (#5171)

* Update Memgraph docs

* Format

---------

Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
---
 .../experimental/graph_databases/memgraph.mdx        | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/docs/core_docs/docs/modules/data_connection/experimental/graph_databases/memgraph.mdx b/docs/core_docs/docs/modules/data_connection/experimental/graph_databases/memgraph.mdx
index 2582b0cc4078..f4e4e10e9bea 100644
--- a/docs/core_docs/docs/modules/data_connection/experimental/graph_databases/memgraph.mdx
+++ b/docs/core_docs/docs/modules/data_connection/experimental/graph_databases/memgraph.mdx
@@ -18,7 +18,7 @@ npm install @langchain/openai neo4j-driver @langchain/community
 
 Memgraph bundles the database along with various analytical tools into distinct
 Docker images. If you're new to Memgraph or you're in a developing stage, we
-recommend using the `memgraph-platform` image. Besides the database, it also
+recommend running Memgraph Platform with Docker Compose. Besides the database, it also
 includes all the tools you might need to analyze your data, such as command-line
 interface [mgconsole](https://memgraph.com/docs/getting-started/cli), web
 interface [Memgraph Lab](https://memgraph.com/docs/data-visualization) and a
@@ -28,8 +28,16 @@ complete set of algorithms within a
 With the Docker running in the background, run the following command in the
 console:
 
+Linux/MacOS:
+
 ```bash
-docker run -p 7687:7687 -p 7444:7444 -p 3000:3000 --name memgraph memgraph/memgraph-platform
+curl https://install.memgraph.com | sh
+```
+
+Windows:
+
+```
+iwr https://windows.memgraph.com | iex
 ```
 
 For other options of installation, check the [Getting started guide](https://memgraph.com/docs/getting-started).

From 39bd3c2a247601aa9ef11fe43f1c23134aea8b7d Mon Sep 17 00:00:00 2001
From: Jacob Lee <jacoblee93@gmail.com>
Date: Mon, 22 Apr 2024 15:30:58 -0700
Subject: [PATCH 09/18] community[patch]: Release 0.0.51 (#5176)

---
 libs/langchain-community/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json
index 4ace57e72241..eb46050b7254 100644
--- a/libs/langchain-community/package.json
+++ b/libs/langchain-community/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@langchain/community",
-  "version": "0.0.50",
+  "version": "0.0.51",
   "description": "Third-party integrations for LangChain.js",
   "type": "module",
   "engines": {

From a8e3e2d7ea190ee50baf4531feec94c98883d7f5 Mon Sep 17 00:00:00 2001
From: Brace Sproul <braceasproul@gmail.com>
Date: Mon, 22 Apr 2024 15:41:20 -0700
Subject: [PATCH 10/18] docs[minor]: Make providers index page show all
 integration pkgs (#5175)

* docs[minor]: Make providers index page show all integrations

* cr

* nit
---
 docs/core_docs/.gitignore                     |  6 ++--
 .../docs/integrations/platforms/index.mdx     | 30 +++++++++++++++++++
 docs/core_docs/sidebars.js                    |  7 ++---
 3 files changed, 37 insertions(+), 6 deletions(-)
 create mode 100644 docs/core_docs/docs/integrations/platforms/index.mdx

diff --git a/docs/core_docs/.gitignore b/docs/core_docs/.gitignore
index 3277be5e78ff..310e12e5ff76 100644
--- a/docs/core_docs/.gitignore
+++ b/docs/core_docs/.gitignore
@@ -109,10 +109,12 @@ docs/use_cases/extraction/how_to/examples.md
 docs/use_cases/extraction/how_to/examples.mdx
 docs/modules/model_io/output_parsers/custom.md
 docs/modules/model_io/output_parsers/custom.mdx
-docs/modules/model_io/chat/function_calling.md
-docs/modules/model_io/chat/function_calling.mdx
 docs/modules/memory/chat_messages/custom.md
 docs/modules/memory/chat_messages/custom.mdx
+docs/modules/model_io/chat/response_metadata.md
+docs/modules/model_io/chat/response_metadata.mdx
+docs/modules/model_io/chat/function_calling.md
+docs/modules/model_io/chat/function_calling.mdx
 docs/modules/data_connection/vectorstores/custom.md
 docs/modules/data_connection/vectorstores/custom.mdx
 docs/modules/agents/agent_types/tool_calling.md
diff --git a/docs/core_docs/docs/integrations/platforms/index.mdx b/docs/core_docs/docs/integrations/platforms/index.mdx
new file mode 100644
index 000000000000..3cdd03e83d2e
--- /dev/null
+++ b/docs/core_docs/docs/integrations/platforms/index.mdx
@@ -0,0 +1,30 @@
+---
+sidebar_position: 0
+sidebar_class_name: hidden
+---
+
+# Providers
+
+LangChain integrates with many providers.
+
+## Partner Packages
+
+These providers have standalone `@langchain/{provider}` packages for improved versioning, dependency management and testing.
+
+- [Anthropic](https://www.npmjs.com/package/@langchain/anthropic)
+- [Azure OpenAI](https://www.npmjs.com/package/@langchain/azure-openai)
+- [Cloudflare](https://www.npmjs.com/package/@langchain/cloudflare)
+- [Cohere](https://www.npmjs.com/package/@langchain/cohere)
+- [Exa](https://www.npmjs.com/package/@langchain/exa)
+- [Google GenAI](https://www.npmjs.com/package/@langchain/google-genai)
+- [Google VertexAI](https://www.npmjs.com/package/@langchain/google-vertexai)
+- [Google VertexAI Web](https://www.npmjs.com/package/@langchain/google-vertexai-web)
+- [Groq](https://www.npmjs.com/package/@langchain/groq)
+- [MistralAI](https://www.npmjs.com/package/@langchain/mistralai)
+- [MongoDB](https://www.npmjs.com/package/@langchain/mongodb)
+- [Nomic](https://www.npmjs.com/package/@langchain/nomic)
+- [OpenAI](https://www.npmjs.com/package/@langchain/openai)
+- [Pinecone](https://www.npmjs.com/package/@langchain/pinecone)
+- [Redis](https://www.npmjs.com/package/@langchain/redis)
+- [Weaviate](https://www.npmjs.com/package/@langchain/weaviate)
+- [Yandex](https://www.npmjs.com/package/@langchain/yandex)
diff --git a/docs/core_docs/sidebars.js b/docs/core_docs/sidebars.js
index 01c76b7ebde3..c21b26ea5201 100644
--- a/docs/core_docs/sidebars.js
+++ b/docs/core_docs/sidebars.js
@@ -200,12 +200,11 @@ module.exports = {
     {
       type: "category",
       label: "Providers",
-      collapsed: true,
+      collapsed: false,
       items: [{ type: "autogenerated", dirName: "integrations/platforms" }],
       link: {
-        type: "generated-index",
-        description: "LangChain.js integration providers.",
-        slug: "integrations/platforms",
+        type: "doc",
+        id: "integrations/platforms/index",
       },
     },
     {

From d3339ae12c69444563c891d098f3130b6434f534 Mon Sep 17 00:00:00 2001
From: Jacob Lee <jacoblee93@gmail.com>
Date: Mon, 22 Apr 2024 15:55:08 -0700
Subject: [PATCH 11/18] cohere[patch]: Make CohereRerank extend
 BaseDocumentCompressor (#5177)

* Make Cohere rerank extend BaseDocumentCompressor

* Bump dep
---
 libs/langchain-cohere/package.json  | 2 +-
 libs/langchain-cohere/src/rerank.ts | 4 +++-
 yarn.lock                           | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/libs/langchain-cohere/package.json b/libs/langchain-cohere/package.json
index e306e02224e0..eff56785e32b 100644
--- a/libs/langchain-cohere/package.json
+++ b/libs/langchain-cohere/package.json
@@ -38,7 +38,7 @@
   "author": "LangChain",
   "license": "MIT",
   "dependencies": {
-    "@langchain/core": "~0.1",
+    "@langchain/core": "~0.1.58",
     "cohere-ai": "^7.9.3"
   },
   "devDependencies": {
diff --git a/libs/langchain-cohere/src/rerank.ts b/libs/langchain-cohere/src/rerank.ts
index 4fbb86a3b8bd..2b29f19dbfb4 100644
--- a/libs/langchain-cohere/src/rerank.ts
+++ b/libs/langchain-cohere/src/rerank.ts
@@ -1,4 +1,5 @@
 import { DocumentInterface } from "@langchain/core/documents";
+import { BaseDocumentCompressor } from "@langchain/core/retrievers/document_compressors";
 import { getEnvironmentVariable } from "@langchain/core/utils/env";
 import { CohereClient } from "cohere-ai";
 
@@ -27,7 +28,7 @@ export interface CohereRerankArgs {
 /**
  * Document compressor that uses `Cohere Rerank API`.
  */
-export class CohereRerank {
+export class CohereRerank extends BaseDocumentCompressor {
   model = "rerank-english-v2.0";
 
   topN = 3;
@@ -37,6 +38,7 @@ export class CohereRerank {
   maxChunksPerDoc: number | undefined;
 
   constructor(fields?: CohereRerankArgs) {
+    super();
     const token = fields?.apiKey ?? getEnvironmentVariable("COHERE_API_KEY");
     if (!token) {
       throw new Error("No API key provided for CohereRerank.");
diff --git a/yarn.lock b/yarn.lock
index dfa1792aa94d..67b2d17cfde0 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -8895,7 +8895,7 @@ __metadata:
   resolution: "@langchain/cohere@workspace:libs/langchain-cohere"
   dependencies:
     "@jest/globals": ^29.5.0
-    "@langchain/core": ~0.1
+    "@langchain/core": ~0.1.58
     "@langchain/scripts": ~0.0
     "@swc/core": ^1.3.90
     "@swc/jest": ^0.2.29
@@ -9346,7 +9346,7 @@ __metadata:
   languageName: unknown
   linkType: soft
 
-"@langchain/core@workspace:*, @langchain/core@workspace:langchain-core, @langchain/core@~0.1, @langchain/core@~0.1.5, @langchain/core@~0.1.56, @langchain/core@~0.1.9":
+"@langchain/core@workspace:*, @langchain/core@workspace:langchain-core, @langchain/core@~0.1, @langchain/core@~0.1.5, @langchain/core@~0.1.56, @langchain/core@~0.1.58, @langchain/core@~0.1.9":
   version: 0.0.0-use.local
   resolution: "@langchain/core@workspace:langchain-core"
   dependencies:

From c35fd25628a483611c8425241aba462578436fb8 Mon Sep 17 00:00:00 2001
From: Jacob Lee <jacoblee93@gmail.com>
Date: Mon, 22 Apr 2024 15:58:25 -0700
Subject: [PATCH 12/18] cohere[patch]: Release 0.0.8 (#5178)

---
 libs/langchain-cohere/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/langchain-cohere/package.json b/libs/langchain-cohere/package.json
index eff56785e32b..31f156982f1e 100644
--- a/libs/langchain-cohere/package.json
+++ b/libs/langchain-cohere/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@langchain/cohere",
-  "version": "0.0.7",
+  "version": "0.0.8",
   "description": "Cohere integration for LangChain.js",
   "type": "module",
   "engines": {

From 6407078ce6bfa4989c45d7f1c96df9ee8baea5e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Karol=20Fabja=C5=84czuk?= <karol-f@users.noreply.github.com>
Date: Tue, 23 Apr 2024 01:26:18 +0200
Subject: [PATCH 13/18] langchain[patch]: Add possibility to rerank retrieved
 docs in ParentDocumentRetriever and MultiQueryRetriever (#4738)

* feat: add Document Compressor to chain to allow rerank

* feat: add example

* fix: typo

* feat: rerank child documents instead of parents one

* feat: improve example

* feat: add compressor to Multi Query Retreiver

* feat: remove example

* feat: remove example

* fix: missing docs

* feat: remove default value of threshold score to adjust to new Cohere models better

* feat: make filtering optional and configurable

* docs: add examples

* fix: type checking so it allow for build

* Fix lint

---------

Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
---
 .../retrievers/parent-document-retriever.mdx  | 10 ++
 .../parent_document_retriever_rerank.ts       | 93 +++++++++++++++++++
 langchain/src/retrievers/multi_query.ts       | 42 +++++++--
 langchain/src/retrievers/parent_document.ts   | 22 ++++-
 4 files changed, 157 insertions(+), 10 deletions(-)
 create mode 100644 examples/src/retrievers/parent_document_retriever_rerank.ts

diff --git a/docs/core_docs/docs/modules/data_connection/retrievers/parent-document-retriever.mdx b/docs/core_docs/docs/modules/data_connection/retrievers/parent-document-retriever.mdx
index 6233ce57896f..a8855a2edbaa 100644
--- a/docs/core_docs/docs/modules/data_connection/retrievers/parent-document-retriever.mdx
+++ b/docs/core_docs/docs/modules/data_connection/retrievers/parent-document-retriever.mdx
@@ -6,6 +6,7 @@ import CodeBlock from "@theme/CodeBlock";
 import Example from "@examples/retrievers/parent_document_retriever.ts";
 import ExampleWithScoreThreshold from "@examples/retrievers/parent_document_retriever_score_threshold.ts";
 import ExampleWithChunkHeader from "@examples/retrievers/parent_document_retriever_chunk_header.ts";
+import ExampleWithRerank from "@examples/retrievers/parent_document_retriever_rerank.ts";
 
 # Parent Document Retriever
 
@@ -50,3 +51,12 @@ Tagging each document with metadata is a solution if you know what to filter aga
 This is particularly important if you have several fine-grained child chunks that need to be correctly retrieved from the vector store.
 
 <CodeBlock language="typescript">{ExampleWithChunkHeader}</CodeBlock>
+
+## With Reranking
+
+With many documents from the vector store that are passed to LLM, final answers sometimes consist of information from
+irrelevant chunks, making it less precise and sometimes incorrect. Also, passing multiple irrelevant documents makes it
+more expensive.
+So there are two reasons to use rerank - precision and costs.
+
+<CodeBlock language="typescript">{ExampleWithRerank}</CodeBlock>
diff --git a/examples/src/retrievers/parent_document_retriever_rerank.ts b/examples/src/retrievers/parent_document_retriever_rerank.ts
new file mode 100644
index 000000000000..bce20352a4f4
--- /dev/null
+++ b/examples/src/retrievers/parent_document_retriever_rerank.ts
@@ -0,0 +1,93 @@
+import { OpenAIEmbeddings } from "@langchain/openai";
+import { CohereRerank } from "@langchain/cohere";
+import { HNSWLib } from "@langchain/community/vectorstores/hnswlib";
+import { InMemoryStore } from "langchain/storage/in_memory";
+import {
+  ParentDocumentRetriever,
+  type SubDocs,
+} from "langchain/retrievers/parent_document";
+import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
+
+// init Cohere Rerank. Remember to add COHERE_API_KEY to your .env
+const reranker = new CohereRerank({
+  topN: 50,
+  model: "rerank-multilingual-v2.0",
+});
+
+export function documentCompressorFiltering({
+  relevanceScore,
+}: { relevanceScore?: number } = {}) {
+  return (docs: SubDocs) => {
+    let outputDocs = docs;
+
+    if (relevanceScore) {
+      const docsRelevanceScoreValues = docs.map(
+        (doc) => doc?.metadata?.relevanceScore
+      );
+      outputDocs = docs.filter(
+        (_doc, index) =>
+          (docsRelevanceScoreValues?.[index] || 1) >= relevanceScore
+      );
+    }
+
+    return outputDocs;
+  };
+}
+
+const splitter = new RecursiveCharacterTextSplitter({
+  chunkSize: 500,
+  chunkOverlap: 0,
+});
+
+const jimDocs = await splitter.createDocuments([`Jim favorite color is blue.`]);
+
+const pamDocs = await splitter.createDocuments([`Pam favorite color is red.`]);
+
+const vectorstore = await HNSWLib.fromDocuments([], new OpenAIEmbeddings());
+const docstore = new InMemoryStore();
+
+const retriever = new ParentDocumentRetriever({
+  vectorstore,
+  docstore,
+  // Very small chunks for demo purposes.
+  // Use a bigger chunk size for serious use-cases.
+  childSplitter: new RecursiveCharacterTextSplitter({
+    chunkSize: 10,
+    chunkOverlap: 0,
+  }),
+  childK: 50,
+  parentK: 5,
+  // We add Reranker
+  documentCompressor: reranker,
+  documentCompressorFilteringFn: documentCompressorFiltering({
+    relevanceScore: 0.3,
+  }),
+});
+
+const docs = jimDocs.concat(pamDocs);
+await retriever.addDocuments(docs);
+
+// This will search for documents in vector store and return for LLM already reranked and sorted document
+// with appropriate minimum relevance score
+const retrievedDocs = await retriever.getRelevantDocuments(
+  "What is Pam's favorite color?"
+);
+
+// Pam's favorite color is returned first!
+console.log(JSON.stringify(retrievedDocs, null, 2));
+/*
+  [
+    {
+      "pageContent": "My favorite color is red.",
+      "metadata": {
+        "relevanceScore": 0.9
+        "loc": {
+          "lines": {
+            "from": 1,
+            "to": 1
+          }
+        }
+      }
+    }
+  ]
+*/
diff --git a/langchain/src/retrievers/multi_query.ts b/langchain/src/retrievers/multi_query.ts
index 085ac2ef82b6..14d38f628de7 100644
--- a/langchain/src/retrievers/multi_query.ts
+++ b/langchain/src/retrievers/multi_query.ts
@@ -9,11 +9,15 @@ import { BaseOutputParser } from "@langchain/core/output_parsers";
 import { PromptTemplate, BasePromptTemplate } from "@langchain/core/prompts";
 import { CallbackManagerForRetrieverRun } from "@langchain/core/callbacks/manager";
 import { LLMChain } from "../chains/llm_chain.js";
+import type { BaseDocumentCompressor } from "./document_compressors/index.js";
 
 interface LineList {
   lines: string[];
 }
 
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+export type MultiDocs = Document<Record<string, any>>[];
+
 class LineListOutputParser extends BaseOutputParser<LineList> {
   static lc_name() {
     return "LineListOutputParser";
@@ -66,6 +70,8 @@ export interface MultiQueryRetrieverInput extends BaseRetrieverInput {
   llmChain: LLMChain<LineList>;
   queryCount?: number;
   parserKey?: string;
+  documentCompressor?: BaseDocumentCompressor | undefined;
+  documentCompressorFilteringFn?: (docs: MultiDocs) => MultiDocs;
 }
 
 /**
@@ -96,12 +102,18 @@ export class MultiQueryRetriever extends BaseRetriever {
 
   private parserKey = "lines";
 
+  documentCompressor: BaseDocumentCompressor | undefined;
+
+  documentCompressorFilteringFn?: MultiQueryRetrieverInput["documentCompressorFilteringFn"];
+
   constructor(fields: MultiQueryRetrieverInput) {
     super(fields);
     this.retriever = fields.retriever;
     this.llmChain = fields.llmChain;
     this.queryCount = fields.queryCount ?? this.queryCount;
     this.parserKey = fields.parserKey ?? this.parserKey;
+    this.documentCompressor = fields.documentCompressor;
+    this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
   }
 
   static fromLLM(
@@ -145,13 +157,15 @@ export class MultiQueryRetriever extends BaseRetriever {
     runManager?: CallbackManagerForRetrieverRun
   ): Promise<Document[]> {
     const documents: Document[] = [];
-    for (const query of queries) {
-      const docs = await this.retriever.getRelevantDocuments(
-        query,
-        runManager?.getChild()
-      );
-      documents.push(...docs);
-    }
+    await Promise.all(
+      queries.map(async (query) => {
+        const docs = await this.retriever.getRelevantDocuments(
+          query,
+          runManager?.getChild()
+        );
+        documents.push(...docs);
+      })
+    );
     return documents;
   }
 
@@ -177,6 +191,18 @@ export class MultiQueryRetriever extends BaseRetriever {
     const queries = await this._generateQueries(question, runManager);
     const documents = await this._retrieveDocuments(queries, runManager);
     const uniqueDocuments = this._uniqueUnion(documents);
-    return uniqueDocuments;
+
+    let outputDocs = uniqueDocuments;
+    if (this.documentCompressor && uniqueDocuments.length) {
+      outputDocs = await this.documentCompressor.compressDocuments(
+        uniqueDocuments,
+        question
+      );
+      if (this.documentCompressorFilteringFn) {
+        outputDocs = this.documentCompressorFilteringFn(outputDocs);
+      }
+    }
+
+    return outputDocs;
   }
 }
diff --git a/langchain/src/retrievers/parent_document.ts b/langchain/src/retrievers/parent_document.ts
index eb05475bfc04..ca06d4b760ea 100644
--- a/langchain/src/retrievers/parent_document.ts
+++ b/langchain/src/retrievers/parent_document.ts
@@ -5,6 +5,7 @@ import {
   type VectorStoreRetrieverInterface,
 } from "@langchain/core/vectorstores";
 import { Document } from "@langchain/core/documents";
+import type { BaseDocumentCompressor } from "./document_compressors/index.js";
 import {
   TextSplitter,
   TextSplitterChunkHeaderOptions,
@@ -14,6 +15,9 @@ import {
   type MultiVectorRetrieverInput,
 } from "./multi_vector.js";
 
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+export type SubDocs = Document<Record<string, any>>[];
+
 /**
  * Interface for the fields required to initialize a
  * ParentDocumentRetriever instance.
@@ -26,6 +30,8 @@ export type ParentDocumentRetrieverFields = MultiVectorRetrieverInput & {
    * the `.similaritySearch` method of the vectorstore.
    */
   childDocumentRetriever?: VectorStoreRetrieverInterface<VectorStoreInterface>;
+  documentCompressor?: BaseDocumentCompressor | undefined;
+  documentCompressorFilteringFn?: (docs: SubDocs) => SubDocs;
 };
 
 /**
@@ -81,6 +87,10 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
     | VectorStoreRetrieverInterface<VectorStoreInterface>
     | undefined;
 
+  documentCompressor: BaseDocumentCompressor | undefined;
+
+  documentCompressorFilteringFn?: ParentDocumentRetrieverFields["documentCompressorFilteringFn"];
+
   constructor(fields: ParentDocumentRetrieverFields) {
     super(fields);
     this.vectorstore = fields.vectorstore;
@@ -90,17 +100,25 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
     this.childK = fields.childK;
     this.parentK = fields.parentK;
     this.childDocumentRetriever = fields.childDocumentRetriever;
+    this.documentCompressor = fields.documentCompressor;
+    this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn;
   }
 
   async _getRelevantDocuments(query: string): Promise<Document[]> {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    let subDocs: Document<Record<string, any>>[] = [];
+    let subDocs: SubDocs = [];
     if (this.childDocumentRetriever) {
       subDocs = await this.childDocumentRetriever.getRelevantDocuments(query);
     } else {
       subDocs = await this.vectorstore.similaritySearch(query, this.childK);
     }
 
+    if (this.documentCompressor && subDocs.length) {
+      subDocs = await this.documentCompressor.compressDocuments(subDocs, query);
+      if (this.documentCompressorFilteringFn) {
+        subDocs = this.documentCompressorFilteringFn(subDocs);
+      }
+    }
+
     // Maintain order
     const parentDocIds: string[] = [];
     for (const doc of subDocs) {

From e5b03e5ff13f87bdec7dbb2601a212b2fd13f230 Mon Sep 17 00:00:00 2001
From: jacoblee93 <jacoblee93@gmail.com>
Date: Mon, 22 Apr 2024 16:31:11 -0700
Subject: [PATCH 14/18] Release 0.1.35

---
 langchain/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langchain/package.json b/langchain/package.json
index 79770e815452..d3afe39b6231 100644
--- a/langchain/package.json
+++ b/langchain/package.json
@@ -1,6 +1,6 @@
 {
   "name": "langchain",
-  "version": "0.1.34",
+  "version": "0.1.35",
   "description": "Typescript bindings for langchain",
   "type": "module",
   "engines": {

From 8b7865952655a7b730f799421249cab48cc16f08 Mon Sep 17 00:00:00 2001
From: Mauricio Cirelli <cirelli.mauricio@gmail.com>
Date: Mon, 22 Apr 2024 20:50:04 -0300
Subject: [PATCH 15/18] core[patch]: Passing the input object to the Retry
 Attempt Handler. (#5081)

* Passing the input object to the Retry Attempt Handler.

* Adds test

* Lint

* Passing just the first input that caused exception on batch calls.

* Fixing test case and formatting.

* Fixing lint issue.

---------

Co-authored-by: Mauricio Cirelli <mauricio.cirelli@quantatec.com.br>
Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
---
 langchain-core/src/runnables/base.ts          | 20 ++++++--
 .../runnables/tests/runnable_retry.test.ts    | 46 +++++++++++++++++++
 2 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/langchain-core/src/runnables/base.ts b/langchain-core/src/runnables/base.ts
index 103c4fc2eb82..181831391365 100644
--- a/langchain-core/src/runnables/base.ts
+++ b/langchain-core/src/runnables/base.ts
@@ -61,8 +61,13 @@ export type RunnableLike<RunInput = any, RunOutput = any> =
   | RunnableFunc<RunInput, RunOutput>
   | RunnableMapLike<RunInput, RunOutput>;
 
-// eslint-disable-next-line @typescript-eslint/no-explicit-any
-export type RunnableRetryFailedAttemptHandler = (error: any) => any;
+export type RunnableRetryFailedAttemptHandler = (
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  error: any,
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  input: any
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+) => any;
 
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 export function _coerceToDict(value: any, defaultKey: string) {
@@ -1268,7 +1273,7 @@ export class RunnableRetry<
   protected maxAttemptNumber = 3;
 
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  onFailedAttempt?: RunnableRetryFailedAttemptHandler = () => {};
+  onFailedAttempt: RunnableRetryFailedAttemptHandler = () => {};
 
   constructor(
     fields: RunnableBindingArgs<RunInput, RunOutput, CallOptions> & {
@@ -1303,7 +1308,8 @@ export class RunnableRetry<
           this._patchConfigForRetry(attemptNumber, config, runManager)
         ),
       {
-        onFailedAttempt: this.onFailedAttempt,
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        onFailedAttempt: (error: any) => this.onFailedAttempt(error, input),
         retries: Math.max(this.maxAttemptNumber - 1, 0),
         randomize: true,
       }
@@ -1362,6 +1368,8 @@ export class RunnableRetry<
             if (result instanceof Error) {
               if (firstException === undefined) {
                 firstException = result;
+                // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                (firstException as any).input = remainingInputs[i];
               }
             }
             resultsMap[resultMapIndex.toString()] = result;
@@ -1372,7 +1380,9 @@ export class RunnableRetry<
           return results;
         },
         {
-          onFailedAttempt: this.onFailedAttempt,
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          onFailedAttempt: (error: any) =>
+            this.onFailedAttempt(error, error.input),
           retries: Math.max(this.maxAttemptNumber - 1, 0),
           randomize: true,
         }
diff --git a/langchain-core/src/runnables/tests/runnable_retry.test.ts b/langchain-core/src/runnables/tests/runnable_retry.test.ts
index a16f1a7b1cc7..64d0875f56cc 100644
--- a/langchain-core/src/runnables/tests/runnable_retry.test.ts
+++ b/langchain-core/src/runnables/tests/runnable_retry.test.ts
@@ -21,6 +21,28 @@ test("RunnableRetry invoke", async () => {
   expect(result).toEqual(3);
 });
 
+test("RunnableRetry invoke with a failed attempt handler", async () => {
+  let attemptCount = 0;
+  const runnable = new RunnableLambda({
+    func: (_thing: unknown) => {
+      attemptCount += 1;
+      if (attemptCount < 3) {
+        throw new Error("TEST ERROR");
+      } else {
+        return attemptCount;
+      }
+    },
+  });
+  const runnableRetry = runnable.withRetry({
+    onFailedAttempt: (error, input) => {
+      expect(error.message).toBe("TEST ERROR");
+      expect(input).toBe("test");
+    },
+  });
+  const result = await runnableRetry.invoke("test");
+  expect(result).toEqual(3);
+});
+
 test("RunnableRetry batch with thrown errors", async () => {
   const runnable = new RunnableLambda({
     func: (_thing: unknown) => {
@@ -79,3 +101,27 @@ test("RunnableRetry batch should not retry successful requests", async () => {
   expect(attemptCount).toEqual(5);
   expect(result.sort()).toEqual([3, 4, 5]);
 });
+
+test("RunnableRetry batch with an onFailedAttempt handler", async () => {
+  let attemptCount = 0;
+  const runnable = new RunnableLambda({
+    func: (_thing: unknown) => {
+      attemptCount += 1;
+      if (attemptCount < 3) {
+        throw new Error("TEST ERROR");
+      } else {
+        return attemptCount;
+      }
+    },
+  });
+  const runnableRetry = runnable.withRetry({
+    stopAfterAttempt: 2,
+    onFailedAttempt: (error, input) => {
+      expect(error.message).toEqual("TEST ERROR");
+      expect(input).toEqual("test1");
+    },
+  });
+  const result = await runnableRetry.batch(["test1", "test2", "test3"]);
+  expect(attemptCount).toEqual(5);
+  expect(result.sort()).toEqual([3, 4, 5]);
+});

From dd46dc9db7ed778c8f9eb5878bbc9fadb7edc286 Mon Sep 17 00:00:00 2001
From: Huaichen <Huaichen@gmail.com>
Date: Mon, 22 Apr 2024 19:58:04 -0400
Subject: [PATCH 16/18] core[patch]: Fix "Bad control character in string
 literal" (#5160)

* Fixed "Bad control character in string literal"

The error "Bad control character in string literal" occurs when calling JSON.parse(json). This typically happens because the JSON string contains control characters that are not allowed, such as newline characters. Any control characters in the JSON string should be escaped before parsing it using JSON.parse().

* Added a test for PR#5135

Added a test for PR# #5135. Added a few newlines to the JSON text, which will result in an "Bad control character in string literal" error before the fix.

* Format

---------

Co-authored-by: jacoblee93 <jacoblee93@gmail.com>
---
 .../src/output_parsers/structured.ts          | 10 +++++++++-
 .../output_parsers/tests/structured.test.ts   | 20 +++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/langchain-core/src/output_parsers/structured.ts b/langchain-core/src/output_parsers/structured.ts
index de04cbef0895..e9ebbc69dc74 100644
--- a/langchain-core/src/output_parsers/structured.ts
+++ b/langchain-core/src/output_parsers/structured.ts
@@ -104,7 +104,15 @@ ${JSON.stringify(zodToJsonSchema(this.schema))}
       const json = text.includes("```")
         ? text.trim().split(/```(?:json)?/)[1]
         : text.trim();
-      return await this.schema.parseAsync(JSON.parse(json));
+
+      const escapedJson = json
+        .replace(/"([^"\\]*(\\.[^"\\]*)*)"/g, (_match, capturedGroup) => {
+          const escapedInsideQuotes = capturedGroup.replace(/\n/g, "\\n");
+          return `"${escapedInsideQuotes}"`;
+        })
+        .replace(/\n/g, "");
+
+      return await this.schema.parseAsync(JSON.parse(escapedJson));
     } catch (e) {
       throw new OutputParserException(
         `Failed to parse. Text: "${text}". Error: ${e}`,
diff --git a/langchain-core/src/output_parsers/tests/structured.test.ts b/langchain-core/src/output_parsers/tests/structured.test.ts
index f0c9f68fce76..433431cd44b5 100644
--- a/langchain-core/src/output_parsers/tests/structured.test.ts
+++ b/langchain-core/src/output_parsers/tests/structured.test.ts
@@ -198,3 +198,23 @@ Here is the JSON Schema instance your output must adhere to. Include the enclosi
 "
 `);
 });
+
+test("StructuredOutputParser.fromZodSchema parsing newlines", async () => {
+  const parser = StructuredOutputParser.fromZodSchema(
+    z
+      .object({
+        url: z.string().describe("A link to the resource"),
+        summary: z.string().describe("A summary"),
+      })
+      .describe("Only One object")
+  );
+
+  expect(
+    await parser.parse(
+      '```\n{"url": "value", "summary": "line1,\nline2,\nline3"}```'
+    )
+  ).toEqual({
+    url: "value",
+    summary: "line1,\nline2,\nline3",
+  });
+});

From 76193ec0db1cebd55ea49456b373bae7c83838eb Mon Sep 17 00:00:00 2001
From: Jacob Lee <jacoblee93@gmail.com>
Date: Mon, 22 Apr 2024 17:11:07 -0700
Subject: [PATCH 17/18] core[patch]: 0.1.59 (#5181)

---
 langchain-core/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/langchain-core/package.json b/langchain-core/package.json
index 25e8a4b7436b..8f1a0019f368 100644
--- a/langchain-core/package.json
+++ b/langchain-core/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@langchain/core",
-  "version": "0.1.58",
+  "version": "0.1.59",
   "description": "Core LangChain.js abstractions and schemas",
   "type": "module",
   "engines": {

From b9d86b16bd0788144f8f27cf5d18960c43da49af Mon Sep 17 00:00:00 2001
From: Jacob Lee <jacoblee93@gmail.com>
Date: Mon, 22 Apr 2024 23:25:50 -0700
Subject: [PATCH 18/18] Init text splitter package (#5183)

---
 langchain/package.json                        |   3 +-
 langchain/src/text_splitter.ts                | 804 +-----------------
 libs/langchain-textsplitters/.eslintrc.cjs    |  66 ++
 libs/langchain-textsplitters/.gitignore       |   7 +
 libs/langchain-textsplitters/.prettierrc      |  19 +
 libs/langchain-textsplitters/.release-it.json |  10 +
 libs/langchain-textsplitters/LICENSE          |  21 +
 libs/langchain-textsplitters/README.md        |  53 ++
 libs/langchain-textsplitters/jest.config.cjs  |  21 +
 libs/langchain-textsplitters/jest.env.cjs     |  12 +
 .../langchain.config.js                       |  22 +
 libs/langchain-textsplitters/package.json     |  90 ++
 .../scripts/jest-setup-after-env.js           |   3 +
 libs/langchain-textsplitters/src/index.ts     |   1 +
 .../src/tests/code_text_splitter.test.ts      | 318 +++++++
 .../src/tests/text_splitter.test.ts           | 514 +++++++++++
 .../src/text_splitter.ts                      | 803 +++++++++++++++++
 .../langchain-textsplitters/tsconfig.cjs.json |   8 +
 libs/langchain-textsplitters/tsconfig.json    |  23 +
 yarn.lock                                     |  41 +
 20 files changed, 2035 insertions(+), 804 deletions(-)
 create mode 100644 libs/langchain-textsplitters/.eslintrc.cjs
 create mode 100644 libs/langchain-textsplitters/.gitignore
 create mode 100644 libs/langchain-textsplitters/.prettierrc
 create mode 100644 libs/langchain-textsplitters/.release-it.json
 create mode 100644 libs/langchain-textsplitters/LICENSE
 create mode 100644 libs/langchain-textsplitters/README.md
 create mode 100644 libs/langchain-textsplitters/jest.config.cjs
 create mode 100644 libs/langchain-textsplitters/jest.env.cjs
 create mode 100644 libs/langchain-textsplitters/langchain.config.js
 create mode 100644 libs/langchain-textsplitters/package.json
 create mode 100644 libs/langchain-textsplitters/scripts/jest-setup-after-env.js
 create mode 100644 libs/langchain-textsplitters/src/index.ts
 create mode 100644 libs/langchain-textsplitters/src/tests/code_text_splitter.test.ts
 create mode 100644 libs/langchain-textsplitters/src/tests/text_splitter.test.ts
 create mode 100644 libs/langchain-textsplitters/src/text_splitter.ts
 create mode 100644 libs/langchain-textsplitters/tsconfig.cjs.json
 create mode 100644 libs/langchain-textsplitters/tsconfig.json

diff --git a/langchain/package.json b/langchain/package.json
index d3afe39b6231..6f4928e9e166 100644
--- a/langchain/package.json
+++ b/langchain/package.json
@@ -1190,7 +1190,7 @@
   "homepage": "https://github.com/langchain-ai/langchainjs/tree/main/langchain/",
   "scripts": {
     "build": "yarn run build:deps && yarn clean && yarn build:esm && yarn build:cjs && yarn build:scripts",
-    "build:deps": "yarn run turbo:command build --filter=@langchain/openai --filter=@langchain/community --concurrency=1",
+    "build:deps": "yarn run turbo:command build --filter=@langchain/openai --filter=@langchain/community --filter=@langchain/textsplitters --concurrency=1",
     "build:esm": "NODE_OPTIONS=--max-old-space-size=4096 tsc --outDir dist/ && rimraf dist/tests dist/**/tests",
     "build:cjs": "NODE_OPTIONS=--max-old-space-size=4096 tsc --outDir dist-cjs/ -p tsconfig.cjs.json && yarn move-cjs-to-dist && rimraf dist-cjs",
     "build:watch": "yarn create-entrypoints && tsc --outDir dist/ --watch",
@@ -1515,6 +1515,7 @@
     "@langchain/community": "~0.0.47",
     "@langchain/core": "~0.1.56",
     "@langchain/openai": "~0.0.28",
+    "@langchain/textsplitters": "~0.0.0",
     "binary-extensions": "^2.2.0",
     "js-tiktoken": "^1.0.7",
     "js-yaml": "^4.1.0",
diff --git a/langchain/src/text_splitter.ts b/langchain/src/text_splitter.ts
index 095ea3e796ca..ab5479bce0ee 100644
--- a/langchain/src/text_splitter.ts
+++ b/langchain/src/text_splitter.ts
@@ -1,803 +1 @@
-import type * as tiktoken from "js-tiktoken";
-import { Document, BaseDocumentTransformer } from "@langchain/core/documents";
-import { getEncoding } from "@langchain/core/utils/tiktoken";
-
-export interface TextSplitterParams {
-  chunkSize: number;
-  chunkOverlap: number;
-  keepSeparator: boolean;
-  lengthFunction?:
-    | ((text: string) => number)
-    | ((text: string) => Promise<number>);
-}
-
-export type TextSplitterChunkHeaderOptions = {
-  chunkHeader?: string;
-  chunkOverlapHeader?: string;
-  appendChunkOverlapHeader?: boolean;
-};
-
-export abstract class TextSplitter
-  extends BaseDocumentTransformer
-  implements TextSplitterParams
-{
-  lc_namespace = ["langchain", "document_transformers", "text_splitters"];
-
-  chunkSize = 1000;
-
-  chunkOverlap = 200;
-
-  keepSeparator = false;
-
-  lengthFunction:
-    | ((text: string) => number)
-    | ((text: string) => Promise<number>);
-
-  constructor(fields?: Partial<TextSplitterParams>) {
-    super(fields);
-    this.chunkSize = fields?.chunkSize ?? this.chunkSize;
-    this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap;
-    this.keepSeparator = fields?.keepSeparator ?? this.keepSeparator;
-    this.lengthFunction =
-      fields?.lengthFunction ?? ((text: string) => text.length);
-    if (this.chunkOverlap >= this.chunkSize) {
-      throw new Error("Cannot have chunkOverlap >= chunkSize");
-    }
-  }
-
-  async transformDocuments(
-    documents: Document[],
-    chunkHeaderOptions: TextSplitterChunkHeaderOptions = {}
-  ): Promise<Document[]> {
-    return this.splitDocuments(documents, chunkHeaderOptions);
-  }
-
-  abstract splitText(text: string): Promise<string[]>;
-
-  protected splitOnSeparator(text: string, separator: string): string[] {
-    let splits;
-    if (separator) {
-      if (this.keepSeparator) {
-        const regexEscapedSeparator = separator.replace(
-          /[/\-\\^$*+?.()|[\]{}]/g,
-          "\\$&"
-        );
-        splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`));
-      } else {
-        splits = text.split(separator);
-      }
-    } else {
-      splits = text.split("");
-    }
-    return splits.filter((s) => s !== "");
-  }
-
-  async createDocuments(
-    texts: string[],
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    metadatas: Record<string, any>[] = [],
-    chunkHeaderOptions: TextSplitterChunkHeaderOptions = {}
-  ): Promise<Document[]> {
-    // if no metadata is provided, we create an empty one for each text
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const _metadatas: Record<string, any>[] =
-      metadatas.length > 0
-        ? metadatas
-        : [...Array(texts.length)].map(() => ({}));
-    const {
-      chunkHeader = "",
-      chunkOverlapHeader = "(cont'd) ",
-      appendChunkOverlapHeader = false,
-    } = chunkHeaderOptions;
-    const documents = new Array<Document>();
-    for (let i = 0; i < texts.length; i += 1) {
-      const text = texts[i];
-      let lineCounterIndex = 1;
-      let prevChunk = null;
-      let indexPrevChunk = -1;
-      for (const chunk of await this.splitText(text)) {
-        let pageContent = chunkHeader;
-
-        // we need to count the \n that are in the text before getting removed by the splitting
-        const indexChunk = text.indexOf(chunk, indexPrevChunk + 1);
-        if (prevChunk === null) {
-          const newLinesBeforeFirstChunk = this.numberOfNewLines(
-            text,
-            0,
-            indexChunk
-          );
-          lineCounterIndex += newLinesBeforeFirstChunk;
-        } else {
-          const indexEndPrevChunk =
-            indexPrevChunk + (await this.lengthFunction(prevChunk));
-          if (indexEndPrevChunk < indexChunk) {
-            const numberOfIntermediateNewLines = this.numberOfNewLines(
-              text,
-              indexEndPrevChunk,
-              indexChunk
-            );
-            lineCounterIndex += numberOfIntermediateNewLines;
-          } else if (indexEndPrevChunk > indexChunk) {
-            const numberOfIntermediateNewLines = this.numberOfNewLines(
-              text,
-              indexChunk,
-              indexEndPrevChunk
-            );
-            lineCounterIndex -= numberOfIntermediateNewLines;
-          }
-          if (appendChunkOverlapHeader) {
-            pageContent += chunkOverlapHeader;
-          }
-        }
-        const newLinesCount = this.numberOfNewLines(chunk);
-
-        const loc =
-          _metadatas[i].loc && typeof _metadatas[i].loc === "object"
-            ? { ..._metadatas[i].loc }
-            : {};
-        loc.lines = {
-          from: lineCounterIndex,
-          to: lineCounterIndex + newLinesCount,
-        };
-        const metadataWithLinesNumber = {
-          ..._metadatas[i],
-          loc,
-        };
-
-        pageContent += chunk;
-        documents.push(
-          new Document({
-            pageContent,
-            metadata: metadataWithLinesNumber,
-          })
-        );
-        lineCounterIndex += newLinesCount;
-        prevChunk = chunk;
-        indexPrevChunk = indexChunk;
-      }
-    }
-    return documents;
-  }
-
-  private numberOfNewLines(text: string, start?: number, end?: number) {
-    const textSection = text.slice(start, end);
-    return (textSection.match(/\n/g) || []).length;
-  }
-
-  async splitDocuments(
-    documents: Document[],
-    chunkHeaderOptions: TextSplitterChunkHeaderOptions = {}
-  ): Promise<Document[]> {
-    const selectedDocuments = documents.filter(
-      (doc) => doc.pageContent !== undefined
-    );
-    const texts = selectedDocuments.map((doc) => doc.pageContent);
-    const metadatas = selectedDocuments.map((doc) => doc.metadata);
-    return this.createDocuments(texts, metadatas, chunkHeaderOptions);
-  }
-
-  private joinDocs(docs: string[], separator: string): string | null {
-    const text = docs.join(separator).trim();
-    return text === "" ? null : text;
-  }
-
-  async mergeSplits(splits: string[], separator: string): Promise<string[]> {
-    const docs: string[] = [];
-    const currentDoc: string[] = [];
-    let total = 0;
-    for (const d of splits) {
-      const _len = await this.lengthFunction(d);
-      if (
-        total + _len + currentDoc.length * separator.length >
-        this.chunkSize
-      ) {
-        if (total > this.chunkSize) {
-          console.warn(
-            `Created a chunk of size ${total}, +
-which is longer than the specified ${this.chunkSize}`
-          );
-        }
-        if (currentDoc.length > 0) {
-          const doc = this.joinDocs(currentDoc, separator);
-          if (doc !== null) {
-            docs.push(doc);
-          }
-          // Keep on popping if:
-          // - we have a larger chunk than in the chunk overlap
-          // - or if we still have any chunks and the length is long
-          while (
-            total > this.chunkOverlap ||
-            (total + _len + currentDoc.length * separator.length >
-              this.chunkSize &&
-              total > 0)
-          ) {
-            total -= await this.lengthFunction(currentDoc[0]);
-            currentDoc.shift();
-          }
-        }
-      }
-      currentDoc.push(d);
-      total += _len;
-    }
-    const doc = this.joinDocs(currentDoc, separator);
-    if (doc !== null) {
-      docs.push(doc);
-    }
-    return docs;
-  }
-}
-
-export interface CharacterTextSplitterParams extends TextSplitterParams {
-  separator: string;
-}
-
-export class CharacterTextSplitter
-  extends TextSplitter
-  implements CharacterTextSplitterParams
-{
-  static lc_name() {
-    return "CharacterTextSplitter";
-  }
-
-  separator = "\n\n";
-
-  constructor(fields?: Partial<CharacterTextSplitterParams>) {
-    super(fields);
-    this.separator = fields?.separator ?? this.separator;
-  }
-
-  async splitText(text: string): Promise<string[]> {
-    // First we naively split the large input into a bunch of smaller ones.
-    const splits = this.splitOnSeparator(text, this.separator);
-    return this.mergeSplits(splits, this.keepSeparator ? "" : this.separator);
-  }
-}
-
-export interface RecursiveCharacterTextSplitterParams
-  extends TextSplitterParams {
-  separators: string[];
-}
-
-export const SupportedTextSplitterLanguages = [
-  "cpp",
-  "go",
-  "java",
-  "js",
-  "php",
-  "proto",
-  "python",
-  "rst",
-  "ruby",
-  "rust",
-  "scala",
-  "swift",
-  "markdown",
-  "latex",
-  "html",
-  "sol",
-] as const;
-
-export type SupportedTextSplitterLanguage =
-  (typeof SupportedTextSplitterLanguages)[number];
-
-export class RecursiveCharacterTextSplitter
-  extends TextSplitter
-  implements RecursiveCharacterTextSplitterParams
-{
-  static lc_name() {
-    return "RecursiveCharacterTextSplitter";
-  }
-
-  separators: string[] = ["\n\n", "\n", " ", ""];
-
-  constructor(fields?: Partial<RecursiveCharacterTextSplitterParams>) {
-    super(fields);
-    this.separators = fields?.separators ?? this.separators;
-    this.keepSeparator = fields?.keepSeparator ?? true;
-  }
-
-  private async _splitText(text: string, separators: string[]) {
-    const finalChunks: string[] = [];
-
-    // Get appropriate separator to use
-    let separator: string = separators[separators.length - 1];
-    let newSeparators;
-    for (let i = 0; i < separators.length; i += 1) {
-      const s = separators[i];
-      if (s === "") {
-        separator = s;
-        break;
-      }
-      if (text.includes(s)) {
-        separator = s;
-        newSeparators = separators.slice(i + 1);
-        break;
-      }
-    }
-
-    // Now that we have the separator, split the text
-    const splits = this.splitOnSeparator(text, separator);
-
-    // Now go merging things, recursively splitting longer texts.
-    let goodSplits: string[] = [];
-    const _separator = this.keepSeparator ? "" : separator;
-    for (const s of splits) {
-      if ((await this.lengthFunction(s)) < this.chunkSize) {
-        goodSplits.push(s);
-      } else {
-        if (goodSplits.length) {
-          const mergedText = await this.mergeSplits(goodSplits, _separator);
-          finalChunks.push(...mergedText);
-          goodSplits = [];
-        }
-        if (!newSeparators) {
-          finalChunks.push(s);
-        } else {
-          const otherInfo = await this._splitText(s, newSeparators);
-          finalChunks.push(...otherInfo);
-        }
-      }
-    }
-    if (goodSplits.length) {
-      const mergedText = await this.mergeSplits(goodSplits, _separator);
-      finalChunks.push(...mergedText);
-    }
-    return finalChunks;
-  }
-
-  async splitText(text: string): Promise<string[]> {
-    return this._splitText(text, this.separators);
-  }
-
-  static fromLanguage(
-    language: SupportedTextSplitterLanguage,
-    options?: Partial<RecursiveCharacterTextSplitterParams>
-  ) {
-    return new RecursiveCharacterTextSplitter({
-      ...options,
-      separators:
-        RecursiveCharacterTextSplitter.getSeparatorsForLanguage(language),
-    });
-  }
-
-  static getSeparatorsForLanguage(language: SupportedTextSplitterLanguage) {
-    if (language === "cpp") {
-      return [
-        // Split along class definitions
-        "\nclass ",
-        // Split along function definitions
-        "\nvoid ",
-        "\nint ",
-        "\nfloat ",
-        "\ndouble ",
-        // Split along control flow statements
-        "\nif ",
-        "\nfor ",
-        "\nwhile ",
-        "\nswitch ",
-        "\ncase ",
-        // Split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "go") {
-      return [
-        // Split along function definitions
-        "\nfunc ",
-        "\nvar ",
-        "\nconst ",
-        "\ntype ",
-        // Split along control flow statements
-        "\nif ",
-        "\nfor ",
-        "\nswitch ",
-        "\ncase ",
-        // Split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "java") {
-      return [
-        // Split along class definitions
-        "\nclass ",
-        // Split along method definitions
-        "\npublic ",
-        "\nprotected ",
-        "\nprivate ",
-        "\nstatic ",
-        // Split along control flow statements
-        "\nif ",
-        "\nfor ",
-        "\nwhile ",
-        "\nswitch ",
-        "\ncase ",
-        // Split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "js") {
-      return [
-        // Split along function definitions
-        "\nfunction ",
-        "\nconst ",
-        "\nlet ",
-        "\nvar ",
-        "\nclass ",
-        // Split along control flow statements
-        "\nif ",
-        "\nfor ",
-        "\nwhile ",
-        "\nswitch ",
-        "\ncase ",
-        "\ndefault ",
-        // Split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "php") {
-      return [
-        // Split along function definitions
-        "\nfunction ",
-        // Split along class definitions
-        "\nclass ",
-        // Split along control flow statements
-        "\nif ",
-        "\nforeach ",
-        "\nwhile ",
-        "\ndo ",
-        "\nswitch ",
-        "\ncase ",
-        // Split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "proto") {
-      return [
-        // Split along message definitions
-        "\nmessage ",
-        // Split along service definitions
-        "\nservice ",
-        // Split along enum definitions
-        "\nenum ",
-        // Split along option definitions
-        "\noption ",
-        // Split along import statements
-        "\nimport ",
-        // Split along syntax declarations
-        "\nsyntax ",
-        // Split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "python") {
-      return [
-        // First, try to split along class definitions
-        "\nclass ",
-        "\ndef ",
-        "\n\tdef ",
-        // Now split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "rst") {
-      return [
-        // Split along section titles
-        "\n===\n",
-        "\n---\n",
-        "\n***\n",
-        // Split along directive markers
-        "\n.. ",
-        // Split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "ruby") {
-      return [
-        // Split along method definitions
-        "\ndef ",
-        "\nclass ",
-        // Split along control flow statements
-        "\nif ",
-        "\nunless ",
-        "\nwhile ",
-        "\nfor ",
-        "\ndo ",
-        "\nbegin ",
-        "\nrescue ",
-        // Split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "rust") {
-      return [
-        // Split along function definitions
-        "\nfn ",
-        "\nconst ",
-        "\nlet ",
-        // Split along control flow statements
-        "\nif ",
-        "\nwhile ",
-        "\nfor ",
-        "\nloop ",
-        "\nmatch ",
-        "\nconst ",
-        // Split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "scala") {
-      return [
-        // Split along class definitions
-        "\nclass ",
-        "\nobject ",
-        // Split along method definitions
-        "\ndef ",
-        "\nval ",
-        "\nvar ",
-        // Split along control flow statements
-        "\nif ",
-        "\nfor ",
-        "\nwhile ",
-        "\nmatch ",
-        "\ncase ",
-        // Split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "swift") {
-      return [
-        // Split along function definitions
-        "\nfunc ",
-        // Split along class definitions
-        "\nclass ",
-        "\nstruct ",
-        "\nenum ",
-        // Split along control flow statements
-        "\nif ",
-        "\nfor ",
-        "\nwhile ",
-        "\ndo ",
-        "\nswitch ",
-        "\ncase ",
-        // Split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "markdown") {
-      return [
-        // First, try to split along Markdown headings (starting with level 2)
-        "\n## ",
-        "\n### ",
-        "\n#### ",
-        "\n##### ",
-        "\n###### ",
-        // Note the alternative syntax for headings (below) is not handled here
-        // Heading level 2
-        // ---------------
-        // End of code block
-        "```\n\n",
-        // Horizontal lines
-        "\n\n***\n\n",
-        "\n\n---\n\n",
-        "\n\n___\n\n",
-        // Note that this splitter doesn't handle horizontal lines defined
-        // by *three or more* of ***, ---, or ___, but this is not handled
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "latex") {
-      return [
-        // First, try to split along Latex sections
-        "\n\\chapter{",
-        "\n\\section{",
-        "\n\\subsection{",
-        "\n\\subsubsection{",
-
-        // Now split by environments
-        "\n\\begin{enumerate}",
-        "\n\\begin{itemize}",
-        "\n\\begin{description}",
-        "\n\\begin{list}",
-        "\n\\begin{quote}",
-        "\n\\begin{quotation}",
-        "\n\\begin{verse}",
-        "\n\\begin{verbatim}",
-
-        // Now split by math environments
-        "\n\\begin{align}",
-        "$$",
-        "$",
-
-        // Now split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else if (language === "html") {
-      return [
-        // First, try to split along HTML tags
-        "<body>",
-        "<div>",
-        "<p>",
-        "<br>",
-        "<li>",
-        "<h1>",
-        "<h2>",
-        "<h3>",
-        "<h4>",
-        "<h5>",
-        "<h6>",
-        "<span>",
-        "<table>",
-        "<tr>",
-        "<td>",
-        "<th>",
-        "<ul>",
-        "<ol>",
-        "<header>",
-        "<footer>",
-        "<nav>",
-        // Head
-        "<head>",
-        "<style>",
-        "<script>",
-        "<meta>",
-        "<title>",
-        // Normal type of lines
-        " ",
-        "",
-      ];
-    } else if (language === "sol") {
-      return [
-        // Split along compiler informations definitions
-        "\npragma ",
-        "\nusing ",
-        // Split along contract definitions
-        "\ncontract ",
-        "\ninterface ",
-        "\nlibrary ",
-        // Split along method definitions
-        "\nconstructor ",
-        "\ntype ",
-        "\nfunction ",
-        "\nevent ",
-        "\nmodifier ",
-        "\nerror ",
-        "\nstruct ",
-        "\nenum ",
-        // Split along control flow statements
-        "\nif ",
-        "\nfor ",
-        "\nwhile ",
-        "\ndo while ",
-        "\nassembly ",
-        // Split by the normal type of lines
-        "\n\n",
-        "\n",
-        " ",
-        "",
-      ];
-    } else {
-      throw new Error(`Language ${language} is not supported.`);
-    }
-  }
-}
-
-export interface TokenTextSplitterParams extends TextSplitterParams {
-  encodingName: tiktoken.TiktokenEncoding;
-  allowedSpecial: "all" | Array<string>;
-  disallowedSpecial: "all" | Array<string>;
-}
-
-/**
- * Implementation of splitter which looks at tokens.
- */
-export class TokenTextSplitter
-  extends TextSplitter
-  implements TokenTextSplitterParams
-{
-  static lc_name() {
-    return "TokenTextSplitter";
-  }
-
-  encodingName: tiktoken.TiktokenEncoding;
-
-  allowedSpecial: "all" | Array<string>;
-
-  disallowedSpecial: "all" | Array<string>;
-
-  private tokenizer: tiktoken.Tiktoken;
-
-  constructor(fields?: Partial<TokenTextSplitterParams>) {
-    super(fields);
-
-    this.encodingName = fields?.encodingName ?? "gpt2";
-    this.allowedSpecial = fields?.allowedSpecial ?? [];
-    this.disallowedSpecial = fields?.disallowedSpecial ?? "all";
-  }
-
-  async splitText(text: string): Promise<string[]> {
-    if (!this.tokenizer) {
-      this.tokenizer = await getEncoding(this.encodingName);
-    }
-
-    const splits: string[] = [];
-
-    const input_ids = this.tokenizer.encode(
-      text,
-      this.allowedSpecial,
-      this.disallowedSpecial
-    );
-
-    let start_idx = 0;
-
-    while (start_idx < input_ids.length) {
-      if (start_idx > 0) {
-        start_idx -= this.chunkOverlap;
-      }
-      const end_idx = Math.min(start_idx + this.chunkSize, input_ids.length);
-      const chunk_ids = input_ids.slice(start_idx, end_idx);
-      splits.push(this.tokenizer.decode(chunk_ids));
-      start_idx = end_idx;
-    }
-
-    return splits;
-  }
-}
-
-export type MarkdownTextSplitterParams = TextSplitterParams;
-
-export class MarkdownTextSplitter
-  extends RecursiveCharacterTextSplitter
-  implements MarkdownTextSplitterParams
-{
-  constructor(fields?: Partial<MarkdownTextSplitterParams>) {
-    super({
-      ...fields,
-      separators:
-        RecursiveCharacterTextSplitter.getSeparatorsForLanguage("markdown"),
-    });
-  }
-}
-
-export type LatexTextSplitterParams = TextSplitterParams;
-
-export class LatexTextSplitter
-  extends RecursiveCharacterTextSplitter
-  implements LatexTextSplitterParams
-{
-  constructor(fields?: Partial<LatexTextSplitterParams>) {
-    super({
-      ...fields,
-      separators:
-        RecursiveCharacterTextSplitter.getSeparatorsForLanguage("latex"),
-    });
-  }
-}
+export * from "@langchain/textsplitters";
diff --git a/libs/langchain-textsplitters/.eslintrc.cjs b/libs/langchain-textsplitters/.eslintrc.cjs
new file mode 100644
index 000000000000..344f8a9d6cd9
--- /dev/null
+++ b/libs/langchain-textsplitters/.eslintrc.cjs
@@ -0,0 +1,66 @@
+module.exports = {
+  extends: [
+    "airbnb-base",
+    "eslint:recommended",
+    "prettier",
+    "plugin:@typescript-eslint/recommended",
+  ],
+  parserOptions: {
+    ecmaVersion: 12,
+    parser: "@typescript-eslint/parser",
+    project: "./tsconfig.json",
+    sourceType: "module",
+  },
+  plugins: ["@typescript-eslint", "no-instanceof"],
+  ignorePatterns: [
+    ".eslintrc.cjs",
+    "scripts",
+    "node_modules",
+    "dist",
+    "dist-cjs",
+    "*.js",
+    "*.cjs",
+    "*.d.ts",
+  ],
+  rules: {
+    "no-process-env": 2,
+    "no-instanceof/no-instanceof": 2,
+    "@typescript-eslint/explicit-module-boundary-types": 0,
+    "@typescript-eslint/no-empty-function": 0,
+    "@typescript-eslint/no-shadow": 0,
+    "@typescript-eslint/no-empty-interface": 0,
+    "@typescript-eslint/no-use-before-define": ["error", "nofunc"],
+    "@typescript-eslint/no-unused-vars": ["warn", { args: "none" }],
+    "@typescript-eslint/no-floating-promises": "error",
+    "@typescript-eslint/no-misused-promises": "error",
+    camelcase: 0,
+    "class-methods-use-this": 0,
+    "import/extensions": [2, "ignorePackages"],
+    "import/no-extraneous-dependencies": [
+      "error",
+      { devDependencies: ["**/*.test.ts"] },
+    ],
+    "import/no-unresolved": 0,
+    "import/prefer-default-export": 0,
+    "keyword-spacing": "error",
+    "max-classes-per-file": 0,
+    "max-len": 0,
+    "no-await-in-loop": 0,
+    "no-bitwise": 0,
+    "no-console": 0,
+    "no-restricted-syntax": 0,
+    "no-shadow": 0,
+    "no-continue": 0,
+    "no-void": 0,
+    "no-underscore-dangle": 0,
+    "no-use-before-define": 0,
+    "no-useless-constructor": 0,
+    "no-return-await": 0,
+    "consistent-return": 0,
+    "no-else-return": 0,
+    "func-names": 0,
+    "no-lonely-if": 0,
+    "prefer-rest-params": 0,
+    "new-cap": ["error", { properties: false, capIsNew: false }],
+  },
+};
diff --git a/libs/langchain-textsplitters/.gitignore b/libs/langchain-textsplitters/.gitignore
new file mode 100644
index 000000000000..c10034e2f1be
--- /dev/null
+++ b/libs/langchain-textsplitters/.gitignore
@@ -0,0 +1,7 @@
+index.cjs
+index.js
+index.d.ts
+index.d.cts
+node_modules
+dist
+.yarn
diff --git a/libs/langchain-textsplitters/.prettierrc b/libs/langchain-textsplitters/.prettierrc
new file mode 100644
index 000000000000..ba08ff04f677
--- /dev/null
+++ b/libs/langchain-textsplitters/.prettierrc
@@ -0,0 +1,19 @@
+{
+  "$schema": "https://json.schemastore.org/prettierrc",
+  "printWidth": 80,
+  "tabWidth": 2,
+  "useTabs": false,
+  "semi": true,
+  "singleQuote": false,
+  "quoteProps": "as-needed",
+  "jsxSingleQuote": false,
+  "trailingComma": "es5",
+  "bracketSpacing": true,
+  "arrowParens": "always",
+  "requirePragma": false,
+  "insertPragma": false,
+  "proseWrap": "preserve",
+  "htmlWhitespaceSensitivity": "css",
+  "vueIndentScriptAndStyle": false,
+  "endOfLine": "lf"
+}
diff --git a/libs/langchain-textsplitters/.release-it.json b/libs/langchain-textsplitters/.release-it.json
new file mode 100644
index 000000000000..522ee6abf705
--- /dev/null
+++ b/libs/langchain-textsplitters/.release-it.json
@@ -0,0 +1,10 @@
+{
+  "github": {
+    "release": true,
+    "autoGenerate": true,
+    "tokenRef": "GITHUB_TOKEN_RELEASE"
+  },
+  "npm": {
+    "versionArgs": ["--workspaces-update=false"]
+  }
+}
diff --git a/libs/langchain-textsplitters/LICENSE b/libs/langchain-textsplitters/LICENSE
new file mode 100644
index 000000000000..8cd8f501eb49
--- /dev/null
+++ b/libs/langchain-textsplitters/LICENSE
@@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) 2023 LangChain
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/libs/langchain-textsplitters/README.md b/libs/langchain-textsplitters/README.md
new file mode 100644
index 000000000000..5ab3019e7c2f
--- /dev/null
+++ b/libs/langchain-textsplitters/README.md
@@ -0,0 +1,53 @@
+# 🦜✂️ @langchain/textsplitters
+
+This package contains various implementations of LangChain.js text splitters, most commonly used as part of retrieval-augmented generation (RAG) pipelines.
+
+## Installation
+
+```bash npm2yarn
+npm install @langchain/textsplitters
+```
+
+## Development
+
+To develop the `@langchain/textsplitters` package, you'll need to follow these instructions:
+
+### Install dependencies
+
+```bash
+yarn install
+```
+
+### Build the package
+
+```bash
+yarn build
+```
+
+Or from the repo root:
+
+```bash
+yarn build --filter=@langchain/textsplitters
+```
+
+### Run tests
+
+Test files should live within a `tests/` file in the `src/` folder. Unit tests should end in `.test.ts` and integration tests should
+end in `.int.test.ts`:
+
+```bash
+$ yarn test
+$ yarn test:int
+```
+
+### Lint & Format
+
+Run the linter & formatter to ensure your code is up to standard:
+
+```bash
+yarn lint && yarn format
+```
+
+### Adding new entrypoints
+
+If you add a new file to be exported, either import & re-export from `src/index.ts`, or add it to the `entrypoints` field in the `config` variable located inside `langchain.config.js` and run `yarn build` to generate the new entrypoint.
diff --git a/libs/langchain-textsplitters/jest.config.cjs b/libs/langchain-textsplitters/jest.config.cjs
new file mode 100644
index 000000000000..994826496bc5
--- /dev/null
+++ b/libs/langchain-textsplitters/jest.config.cjs
@@ -0,0 +1,21 @@
+/** @type {import('ts-jest').JestConfigWithTsJest} */
+module.exports = {
+  preset: "ts-jest/presets/default-esm",
+  testEnvironment: "./jest.env.cjs",
+  modulePathIgnorePatterns: ["dist/", "docs/"],
+  moduleNameMapper: {
+    "^(\\.{1,2}/.*)\\.js$": "$1",
+  },
+  transform: {
+    "^.+\\.tsx?$": ["@swc/jest"],
+  },
+  transformIgnorePatterns: [
+    "/node_modules/",
+    "\\.pnp\\.[^\\/]+$",
+    "./scripts/jest-setup-after-env.js",
+  ],
+  setupFiles: ["dotenv/config"],
+  testTimeout: 20_000,
+  passWithNoTests: true,
+  collectCoverageFrom: ["src/**/*.ts"],
+};
diff --git a/libs/langchain-textsplitters/jest.env.cjs b/libs/langchain-textsplitters/jest.env.cjs
new file mode 100644
index 000000000000..2ccedccb8672
--- /dev/null
+++ b/libs/langchain-textsplitters/jest.env.cjs
@@ -0,0 +1,12 @@
+const { TestEnvironment } = require("jest-environment-node");
+
+class AdjustedTestEnvironmentToSupportFloat32Array extends TestEnvironment {
+  constructor(config, context) {
+    // Make `instanceof Float32Array` return true in tests
+    // to avoid https://github.com/xenova/transformers.js/issues/57 and https://github.com/jestjs/jest/issues/2549
+    super(config, context);
+    this.global.Float32Array = Float32Array;
+  }
+}
+
+module.exports = AdjustedTestEnvironmentToSupportFloat32Array;
diff --git a/libs/langchain-textsplitters/langchain.config.js b/libs/langchain-textsplitters/langchain.config.js
new file mode 100644
index 000000000000..46b1a2b31264
--- /dev/null
+++ b/libs/langchain-textsplitters/langchain.config.js
@@ -0,0 +1,22 @@
+import { resolve, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+/**
+ * @param {string} relativePath
+ * @returns {string}
+ */
+function abs(relativePath) {
+  return resolve(dirname(fileURLToPath(import.meta.url)), relativePath);
+}
+
+export const config = {
+  internals: [/node\:/, /@langchain\/core\//],
+  entrypoints: {
+    index: "index",
+  },
+  requiresOptionalDependency: [],
+  tsConfigPath: resolve("./tsconfig.json"),
+  cjsSource: "./dist-cjs",
+  cjsDestination: "./dist",
+  abs,
+};
diff --git a/libs/langchain-textsplitters/package.json b/libs/langchain-textsplitters/package.json
new file mode 100644
index 000000000000..1578d30a1fb9
--- /dev/null
+++ b/libs/langchain-textsplitters/package.json
@@ -0,0 +1,90 @@
+{
+  "name": "@langchain/textsplitters",
+  "version": "0.0.0",
+  "description": "Various implementations of LangChain.js text splitters",
+  "type": "module",
+  "engines": {
+    "node": ">=18"
+  },
+  "main": "./index.js",
+  "types": "./index.d.ts",
+  "repository": {
+    "type": "git",
+    "url": "git@github.com:langchain-ai/langchainjs.git"
+  },
+  "homepage": "https://github.com/langchain-ai/langchainjs/tree/main/libs/langchain-INTEGRATION_NAME/",
+  "scripts": {
+    "build": "yarn clean && yarn build:esm && yarn build:cjs && yarn build:scripts",
+    "build:esm": "NODE_OPTIONS=--max-old-space-size=4096 tsc --outDir dist/ && rm -rf dist/tests dist/**/tests",
+    "build:cjs": "NODE_OPTIONS=--max-old-space-size=4096 tsc --outDir dist-cjs/ -p tsconfig.cjs.json && yarn move-cjs-to-dist && rm -rf dist-cjs",
+    "build:watch": "yarn create-entrypoints && tsc --outDir dist/ --watch",
+    "build:scripts": "yarn create-entrypoints && yarn check-tree-shaking",
+    "lint:eslint": "NODE_OPTIONS=--max-old-space-size=4096 eslint --cache --ext .ts,.js src/",
+    "lint:dpdm": "dpdm --exit-code circular:1 --no-warning --no-tree src/*.ts src/**/*.ts",
+    "lint": "yarn lint:eslint && yarn lint:dpdm",
+    "lint:fix": "yarn lint:eslint --fix && yarn lint:dpdm",
+    "clean": "rm -rf dist/ && NODE_OPTIONS=--max-old-space-size=4096 yarn lc-build --config ./langchain.config.js --create-entrypoints --pre",
+    "prepack": "yarn build",
+    "test": "NODE_OPTIONS=--experimental-vm-modules jest --testPathIgnorePatterns=\\.int\\.test.ts --testTimeout 30000 --maxWorkers=50%",
+    "test:watch": "NODE_OPTIONS=--experimental-vm-modules jest --watch --testPathIgnorePatterns=\\.int\\.test.ts",
+    "test:single": "NODE_OPTIONS=--experimental-vm-modules yarn run jest --config jest.config.cjs --testTimeout 100000",
+    "test:int": "NODE_OPTIONS=--experimental-vm-modules jest --testPathPattern=\\.int\\.test.ts --testTimeout 100000 --maxWorkers=50%",
+    "format": "prettier --config .prettierrc --write \"src\"",
+    "format:check": "prettier --config .prettierrc --check \"src\"",
+    "move-cjs-to-dist": "yarn lc-build --config ./langchain.config.js --move-cjs-dist",
+    "create-entrypoints": "yarn lc-build --config ./langchain.config.js --create-entrypoints",
+    "check-tree-shaking": "yarn lc-build --config ./langchain.config.js --tree-shaking"
+  },
+  "author": "LangChain",
+  "license": "MIT",
+  "dependencies": {
+    "@langchain/core": "~0.1",
+    "js-tiktoken": "^1.0.11"
+  },
+  "devDependencies": {
+    "@jest/globals": "^29.5.0",
+    "@langchain/scripts": "~0.0",
+    "@swc/core": "^1.3.90",
+    "@swc/jest": "^0.2.29",
+    "@tsconfig/recommended": "^1.0.3",
+    "@typescript-eslint/eslint-plugin": "^6.12.0",
+    "@typescript-eslint/parser": "^6.12.0",
+    "dotenv": "^16.3.1",
+    "dpdm": "^3.12.0",
+    "eslint": "^8.33.0",
+    "eslint-config-airbnb-base": "^15.0.0",
+    "eslint-config-prettier": "^8.6.0",
+    "eslint-plugin-import": "^2.27.5",
+    "eslint-plugin-no-instanceof": "^1.0.1",
+    "eslint-plugin-prettier": "^4.2.1",
+    "jest": "^29.5.0",
+    "jest-environment-node": "^29.6.4",
+    "prettier": "^2.8.3",
+    "release-it": "^15.10.1",
+    "rollup": "^4.5.2",
+    "ts-jest": "^29.1.0",
+    "typescript": "<5.2.0"
+  },
+  "publishConfig": {
+    "access": "public"
+  },
+  "exports": {
+    ".": {
+      "types": {
+        "import": "./index.d.ts",
+        "require": "./index.d.cts",
+        "default": "./index.d.ts"
+      },
+      "import": "./index.js",
+      "require": "./index.cjs"
+    },
+    "./package.json": "./package.json"
+  },
+  "files": [
+    "dist/",
+    "index.cjs",
+    "index.js",
+    "index.d.ts",
+    "index.d.cts"
+  ]
+}
diff --git a/libs/langchain-textsplitters/scripts/jest-setup-after-env.js b/libs/langchain-textsplitters/scripts/jest-setup-after-env.js
new file mode 100644
index 000000000000..778cf7437a20
--- /dev/null
+++ b/libs/langchain-textsplitters/scripts/jest-setup-after-env.js
@@ -0,0 +1,3 @@
+import { awaitAllCallbacks } from "@langchain/core/callbacks/promises";
+
+afterAll(awaitAllCallbacks);
diff --git a/libs/langchain-textsplitters/src/index.ts b/libs/langchain-textsplitters/src/index.ts
new file mode 100644
index 000000000000..6ce2a186b8fc
--- /dev/null
+++ b/libs/langchain-textsplitters/src/index.ts
@@ -0,0 +1 @@
+export * from "./text_splitter.js";
diff --git a/libs/langchain-textsplitters/src/tests/code_text_splitter.test.ts b/libs/langchain-textsplitters/src/tests/code_text_splitter.test.ts
new file mode 100644
index 000000000000..8d2119198f22
--- /dev/null
+++ b/libs/langchain-textsplitters/src/tests/code_text_splitter.test.ts
@@ -0,0 +1,318 @@
+import { test, expect } from "@jest/globals";
+import { RecursiveCharacterTextSplitter } from "../text_splitter.js";
+
+test("Python code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("python", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `def hello_world():
+  print("Hello, World!")
+# Call the function
+hello_world()`;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "def",
+    "hello_world():",
+    'print("Hello,',
+    'World!")',
+    "# Call the",
+    "function",
+    "hello_world()",
+  ]);
+});
+
+test("Golang code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("go", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `package main
+import "fmt"
+func helloWorld() {
+    fmt.Println("Hello, World!")
+}
+func main() {
+    helloWorld()
+}`;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "package main",
+    'import "fmt"',
+    "func",
+    "helloWorld() {",
+    'fmt.Println("He',
+    "llo,",
+    'World!")',
+    "}",
+    "func main() {",
+    "helloWorld()",
+    "}",
+  ]);
+});
+
+test("RST code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("rst", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `Sample Document
+===============
+Section
+-------
+This is the content of the section.
+Lists
+-----
+- Item 1
+- Item 2
+- Item 3`;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "Sample Document",
+    "===============",
+    "Section\n-------",
+    "This is the",
+    "content of the",
+    "section.",
+    "Lists\n-----",
+    "- Item 1",
+    "- Item 2",
+    "- Item 3",
+  ]);
+});
+
+test("Proto code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("proto", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `syntax = "proto3";
+package example;
+message Person {
+    string name = 1;
+    int32 age = 2;
+    repeated string hobbies = 3;
+}`;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "syntax =",
+    '"proto3";',
+    "package",
+    "example;",
+    "message Person",
+    "{",
+    "string name",
+    "= 1;",
+    "int32 age =",
+    "2;",
+    "repeated",
+    "string hobbies",
+    "= 3;",
+    "}",
+  ]);
+});
+
+test("JS code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("js", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `function helloWorld() {
+  console.log("Hello, World!");
+}
+// Call the function
+helloWorld();`;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "function",
+    "helloWorld() {",
+    'console.log("He',
+    "llo,",
+    'World!");',
+    "}",
+    "// Call the",
+    "function",
+    "helloWorld();",
+  ]);
+});
+
+test("Java code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("java", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `public class HelloWorld {
+  public static void main(String[] args) {
+      System.out.println("Hello, World!");
+  }
+}`;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "public class",
+    "HelloWorld {",
+    "public static",
+    "void",
+    "main(String[]",
+    "args) {",
+    "System.out.prin",
+    'tln("Hello,',
+    'World!");',
+    "}\n}",
+  ]);
+});
+
+test("CPP code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("cpp", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `#include <iostream>
+int main() {
+    std::cout << "Hello, World!" << std::endl;
+    return 0;
+}`;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "#include",
+    "<iostream>",
+    "int main() {",
+    "std::cout",
+    '<< "Hello,',
+    'World!" <<',
+    "std::endl;",
+    "return 0;\n}",
+  ]);
+});
+
+test("Scala code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("scala", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `object HelloWorld {
+  def main(args: Array[String]): Unit = {
+    println("Hello, World!")
+  }
+}`;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "object",
+    "HelloWorld {",
+    "def",
+    "main(args:",
+    "Array[String]):",
+    "Unit = {",
+    'println("Hello,',
+    'World!")',
+    "}\n}",
+  ]);
+});
+
+test("Ruby code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("ruby", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `def hello_world
+  puts "Hello, World!"
+end
+hello_world`;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "def hello_world",
+    'puts "Hello,',
+    'World!"',
+    "end\nhello_world",
+  ]);
+});
+
+test("PHP code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("php", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `<?php
+function hello_world() {
+    echo "Hello, World!";
+}
+hello_world();
+?>`;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "<?php",
+    "function",
+    "hello_world() {",
+    "echo",
+    '"Hello,',
+    'World!";',
+    "}",
+    "hello_world();",
+    "?>",
+  ]);
+});
+
+test("Swift code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("swift", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `func helloWorld() {
+  print("Hello, World!")
+}
+helloWorld()`;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "func",
+    "helloWorld() {",
+    'print("Hello,',
+    'World!")',
+    "}\nhelloWorld()",
+  ]);
+});
+
+test("Rust code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("rust", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `fn main() {
+  println!("Hello, World!");
+}`;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "fn main() {",
+    'println!("Hello',
+    ",",
+    'World!");',
+    "}",
+  ]);
+});
+
+test("Solidity code splitter", async () => {
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("sol", {
+    chunkSize: 16,
+    chunkOverlap: 0,
+  });
+  const code = `pragma solidity ^0.8.20;
+  contract HelloWorld {
+    function add(uint a, uint b) pure public returns(uint) {
+      return  a + b;
+    }
+  }
+  `;
+  const chunks = await splitter.splitText(code);
+  expect(chunks).toStrictEqual([
+    "pragma solidity",
+    "^0.8.20;",
+    "contract",
+    "HelloWorld {",
+    "function",
+    "add(uint a,",
+    "uint b) pure",
+    "public",
+    "returns(uint) {",
+    "return  a",
+    "+ b;",
+    "}\n  }",
+  ]);
+});
diff --git a/libs/langchain-textsplitters/src/tests/text_splitter.test.ts b/libs/langchain-textsplitters/src/tests/text_splitter.test.ts
new file mode 100644
index 000000000000..104efd090320
--- /dev/null
+++ b/libs/langchain-textsplitters/src/tests/text_splitter.test.ts
@@ -0,0 +1,514 @@
+import { describe, expect, test } from "@jest/globals";
+import { Document } from "@langchain/core/documents";
+import {
+  CharacterTextSplitter,
+  LatexTextSplitter,
+  MarkdownTextSplitter,
+  RecursiveCharacterTextSplitter,
+  TokenTextSplitter,
+} from "../text_splitter.js";
+
+function textLineGenerator(char: string, length: number) {
+  const line = new Array(length).join(char);
+  return `${line}\n`;
+}
+
+describe("Character text splitter", () => {
+  test("Test splitting by character count.", async () => {
+    const text = "foo bar baz 123";
+    const splitter = new CharacterTextSplitter({
+      separator: " ",
+      chunkSize: 7,
+      chunkOverlap: 3,
+    });
+    const output = await splitter.splitText(text);
+    const expectedOutput = ["foo bar", "bar baz", "baz 123"];
+    expect(output).toEqual(expectedOutput);
+  });
+
+  test("Test splitting by character count doesn't create empty documents.", async () => {
+    const text = "foo  bar";
+    const splitter = new CharacterTextSplitter({
+      separator: " ",
+      chunkSize: 2,
+      chunkOverlap: 0,
+    });
+    const output = await splitter.splitText(text);
+    const expectedOutput = ["foo", "bar"];
+    expect(output).toEqual(expectedOutput);
+  });
+
+  test("Test splitting by character count on long words.", async () => {
+    const text = "foo bar baz a a";
+    const splitter = new CharacterTextSplitter({
+      separator: " ",
+      chunkSize: 3,
+      chunkOverlap: 1,
+    });
+    const output = await splitter.splitText(text);
+    const expectedOutput = ["foo", "bar", "baz", "a a"];
+    expect(output).toEqual(expectedOutput);
+  });
+
+  test("Test splitting by character count when shorter words are first.", async () => {
+    const text = "a a foo bar baz";
+    const splitter = new CharacterTextSplitter({
+      separator: " ",
+      chunkSize: 3,
+      chunkOverlap: 1,
+    });
+    const output = await splitter.splitText(text);
+    const expectedOutput = ["a a", "foo", "bar", "baz"];
+    expect(output).toEqual(expectedOutput);
+  });
+
+  test("Test splitting by characters when splits not found easily.", async () => {
+    const text = "foo bar baz 123";
+    const splitter = new CharacterTextSplitter({
+      separator: " ",
+      chunkSize: 1,
+      chunkOverlap: 0,
+    });
+    const output = await splitter.splitText(text);
+    const expectedOutput = ["foo", "bar", "baz", "123"];
+    expect(output).toEqual(expectedOutput);
+  });
+
+  test("Test invalid arguments.", () => {
+    expect(() => {
+      const res = new CharacterTextSplitter({ chunkSize: 2, chunkOverlap: 4 });
+      console.log(res);
+    }).toThrow();
+  });
+
+  test("Test create documents method.", async () => {
+    const texts = ["foo bar", "baz"];
+    const splitter = new CharacterTextSplitter({
+      separator: " ",
+      chunkSize: 3,
+      chunkOverlap: 0,
+    });
+    const docs = await splitter.createDocuments(texts);
+    const metadata = { loc: { lines: { from: 1, to: 1 } } };
+    const expectedDocs = [
+      new Document({ pageContent: "foo", metadata }),
+      new Document({ pageContent: "bar", metadata }),
+      new Document({ pageContent: "baz", metadata }),
+    ];
+    expect(docs).toEqual(expectedDocs);
+  });
+
+  test("Test create documents with metadata method.", async () => {
+    const texts = ["foo bar", "baz"];
+    const splitter = new CharacterTextSplitter({
+      separator: " ",
+      chunkSize: 3,
+      chunkOverlap: 0,
+    });
+    const docs = await splitter.createDocuments(texts, [
+      { source: "1" },
+      { source: "2" },
+    ]);
+    const loc = { lines: { from: 1, to: 1 } };
+    const expectedDocs = [
+      new Document({ pageContent: "foo", metadata: { source: "1", loc } }),
+      new Document({
+        pageContent: "bar",
+        metadata: { source: "1", loc },
+      }),
+      new Document({ pageContent: "baz", metadata: { source: "2", loc } }),
+    ];
+    expect(docs).toEqual(expectedDocs);
+  });
+
+  test("Test create documents method with metadata and an added chunk header.", async () => {
+    const texts = ["foo bar", "baz"];
+    const splitter = new CharacterTextSplitter({
+      separator: " ",
+      chunkSize: 3,
+      chunkOverlap: 0,
+    });
+    const docs = await splitter.createDocuments(
+      texts,
+      [{ source: "1" }, { source: "2" }],
+      {
+        chunkHeader: `SOURCE NAME: testing\n-----\n`,
+        appendChunkOverlapHeader: true,
+      }
+    );
+    const loc = { lines: { from: 1, to: 1 } };
+    const expectedDocs = [
+      new Document({
+        pageContent: "SOURCE NAME: testing\n-----\nfoo",
+        metadata: { source: "1", loc },
+      }),
+      new Document({
+        pageContent: "SOURCE NAME: testing\n-----\n(cont'd) bar",
+        metadata: { source: "1", loc },
+      }),
+      new Document({
+        pageContent: "SOURCE NAME: testing\n-----\nbaz",
+        metadata: { source: "2", loc },
+      }),
+    ];
+    expect(docs).toEqual(expectedDocs);
+  });
+});
+
+describe("RecursiveCharacter text splitter", () => {
+  test("One unique chunk", async () => {
+    const splitter = new RecursiveCharacterTextSplitter({
+      chunkSize: 100,
+      chunkOverlap: 0,
+    });
+    const content = textLineGenerator("A", 70);
+
+    const docs = await splitter.createDocuments([content]);
+
+    const expectedDocs = [
+      new Document({
+        pageContent: content.trim(),
+        metadata: { loc: { lines: { from: 1, to: 1 } } },
+      }),
+    ];
+
+    expect(docs).toEqual(expectedDocs);
+  });
+
+  test("Test iterative text splitter.", async () => {
+    const text = `Hi.\n\nI'm Harrison.\n\nHow? Are? You?\nOkay then f f f f.
+This is a weird text to write, but gotta test the splittingggg some how.\n\n
+Bye!\n\n-H.`;
+    const splitter = new RecursiveCharacterTextSplitter({
+      chunkSize: 10,
+      chunkOverlap: 1,
+    });
+    const output = await splitter.splitText(text);
+    const expectedOutput = [
+      "Hi.",
+      "I'm",
+      "Harrison.",
+      "How? Are?",
+      "You?",
+      "Okay then",
+      "f f f f.",
+      "This is a",
+      "weird",
+      "text to",
+      "write,",
+      "but gotta",
+      "test the",
+      "splitting",
+      "gggg",
+      "some how.",
+      "Bye!",
+      "-H.",
+    ];
+    expect(output).toEqual(expectedOutput);
+  });
+
+  test("A basic chunked document", async () => {
+    const splitter = new RecursiveCharacterTextSplitter({
+      chunkSize: 100,
+      chunkOverlap: 0,
+    });
+    const line1 = textLineGenerator("A", 70);
+    const line2 = textLineGenerator("B", 70);
+    const content = line1 + line2;
+
+    const docs = await splitter.createDocuments([content]);
+
+    const expectedDocs = [
+      new Document({
+        pageContent: line1.trim(),
+        metadata: { loc: { lines: { from: 1, to: 1 } } },
+      }),
+      new Document({
+        pageContent: line2.trim(),
+        metadata: { loc: { lines: { from: 2, to: 2 } } },
+      }),
+    ];
+
+    expect(docs).toEqual(expectedDocs);
+  });
+
+  test("A chunked document with similar text", async () => {
+    const splitter = new RecursiveCharacterTextSplitter({
+      chunkSize: 100,
+      chunkOverlap: 0,
+    });
+    const line = textLineGenerator("A", 70);
+    const content = line + line;
+
+    const docs = await splitter.createDocuments([content]);
+
+    const expectedDocs = [
+      new Document({
+        pageContent: line.trim(),
+        metadata: { loc: { lines: { from: 1, to: 1 } } },
+      }),
+      new Document({
+        pageContent: line.trim(),
+        metadata: { loc: { lines: { from: 2, to: 2 } } },
+      }),
+    ];
+
+    expect(docs).toEqual(expectedDocs);
+  });
+
+  test("A chunked document starting with new lines", async () => {
+    const splitter = new RecursiveCharacterTextSplitter({
+      chunkSize: 100,
+      chunkOverlap: 0,
+    });
+    const line1 = textLineGenerator("\n", 2);
+    const line2 = textLineGenerator("A", 70);
+    const line3 = textLineGenerator("\n", 4);
+    const line4 = textLineGenerator("B", 70);
+    const line5 = textLineGenerator("\n", 4);
+    const content = line1 + line2 + line3 + line4 + line5;
+
+    const docs = await splitter.createDocuments([content]);
+
+    const expectedDocs = [
+      new Document({
+        pageContent: line2.trim(),
+        metadata: { loc: { lines: { from: 3, to: 3 } } },
+      }),
+      new Document({
+        pageContent: line4.trim(),
+        metadata: { loc: { lines: { from: 8, to: 8 } } },
+      }),
+    ];
+
+    expect(docs).toEqual(expectedDocs);
+  });
+
+  test("A chunked with overlap", async () => {
+    const splitter = new RecursiveCharacterTextSplitter({
+      chunkSize: 100,
+      chunkOverlap: 30,
+    });
+    const line1 = textLineGenerator("A", 70);
+    const line2 = textLineGenerator("B", 20);
+    const line3 = textLineGenerator("C", 70);
+    const content = line1 + line2 + line3;
+
+    const docs = await splitter.createDocuments([content]);
+
+    const expectedDocs = [
+      new Document({
+        pageContent: line1 + line2.trim(),
+        metadata: { loc: { lines: { from: 1, to: 2 } } },
+      }),
+      new Document({
+        pageContent: line2 + line3.trim(),
+        metadata: { loc: { lines: { from: 2, to: 3 } } },
+      }),
+    ];
+
+    expect(docs).toEqual(expectedDocs);
+  });
+
+  test("Chunks with overlap that contains new lines", async () => {
+    const splitter = new RecursiveCharacterTextSplitter({
+      chunkSize: 100,
+      chunkOverlap: 30,
+    });
+    const line1 = textLineGenerator("A", 70);
+    const line2 = textLineGenerator("B", 10);
+    const line3 = textLineGenerator("C", 10);
+    const line4 = textLineGenerator("D", 70);
+    const content = line1 + line2 + line3 + line4;
+
+    const docs = await splitter.createDocuments([content]);
+
+    const expectedDocs = [
+      new Document({
+        pageContent: line1 + line2 + line3.trim(),
+        metadata: { loc: { lines: { from: 1, to: 3 } } },
+      }),
+      new Document({
+        pageContent: line2 + line3 + line4.trim(),
+        metadata: { loc: { lines: { from: 2, to: 4 } } },
+      }),
+    ];
+    expect(docs).toEqual(expectedDocs);
+  });
+});
+
+test("Separator length is considered correctly for chunk size", async () => {
+  const text = "aa ab ac ba bb";
+  const splitter = new RecursiveCharacterTextSplitter({
+    keepSeparator: false,
+    chunkSize: 7,
+    chunkOverlap: 3,
+  });
+  const output = await splitter.splitText(text);
+  const expectedOutput = ["aa ab", "ab ac", "ac ba", "ba bb"];
+
+  expect(output).toEqual(expectedOutput);
+});
+
+test("Token text splitter", async () => {
+  const text = "foo bar baz a a";
+  const splitter = new TokenTextSplitter({
+    encodingName: "r50k_base",
+    chunkSize: 3,
+    chunkOverlap: 0,
+  });
+  const output = await splitter.splitText(text);
+  const expectedOutput = ["foo bar b", "az a a"];
+
+  expect(output).toEqual(expectedOutput);
+});
+
+test("Token text splitter overlap when last chunk is large", async () => {
+  const text = "foo bar baz a a";
+  const splitter = new TokenTextSplitter({
+    encodingName: "r50k_base",
+    chunkSize: 5,
+    chunkOverlap: 3,
+  });
+  const output = await splitter.splitText(text);
+  const expectedOutput = ["foo bar baz a", " baz a a"];
+
+  expect(output).toEqual(expectedOutput);
+});
+
+test("Test markdown text splitter", async () => {
+  const text =
+    "# 🦜️🔗 LangChain\n" +
+    "\n" +
+    "⚡ Building applications with LLMs through composability ⚡\n" +
+    "\n" +
+    "## Quick Install\n" +
+    "\n" +
+    "```bash\n" +
+    "# Hopefully this code block isn't split\n" +
+    "pip install langchain\n" +
+    "```\n" +
+    "\n" +
+    "As an open source project in a rapidly developing field, we are extremely open to contributions.";
+  const splitter = new MarkdownTextSplitter({
+    chunkSize: 100,
+    chunkOverlap: 0,
+  });
+  const output = await splitter.splitText(text);
+
+  const expectedOutput = [
+    "# 🦜️🔗 LangChain\n\n⚡ Building applications with LLMs through composability ⚡",
+    "## Quick Install\n\n```bash\n# Hopefully this code block isn't split\npip install langchain",
+    "```",
+    "As an open source project in a rapidly developing field, we are extremely open to contributions.",
+  ];
+  expect(output).toEqual(expectedOutput);
+});
+
+test("Test latex text splitter.", async () => {
+  const text = `\\begin{document}
+\\title{🦜️🔗 LangChain}
+⚡ Building applications with LLMs through composability ⚡
+
+\\section{Quick Install}
+
+\\begin{verbatim}
+Hopefully this code block isn't split
+yarn add langchain
+\\end{verbatim}
+
+As an open source project in a rapidly developing field, we are extremely open to contributions.
+
+\\end{document}`;
+  const splitter = new LatexTextSplitter({
+    chunkSize: 100,
+    chunkOverlap: 0,
+  });
+  const output = await splitter.splitText(text);
+
+  const expectedOutput = [
+    "\\begin{document}\n\\title{🦜️🔗 LangChain}\n⚡ Building applications with LLMs through composability ⚡",
+    "\\section{Quick Install}",
+    "\\begin{verbatim}\nHopefully this code block isn't split\nyarn add langchain\n\\end{verbatim}",
+    "As an open source project in a rapidly developing field, we are extremely open to contributions.",
+    "\\end{document}",
+  ];
+  expect(output).toEqual(expectedOutput);
+});
+
+test("Test HTML text splitter", async () => {
+  const text = `<!DOCTYPE html>
+<html>
+  <head>
+    <title>🦜️🔗 LangChain</title>
+    <style>
+      body {
+        font-family: Arial, sans-serif;
+      }
+      h1 {
+        color: darkblue;
+      }
+    </style>
+  </head>
+  <body>
+    <div>
+      <h1>🦜️🔗 LangChain</h1>
+      <p>⚡ Building applications with LLMs through composability ⚡</p>
+    </div>
+    <div>
+      As an open source project in a rapidly developing field, we are extremely open to contributions.
+    </div>
+  </body>
+</html>`;
+  const splitter = RecursiveCharacterTextSplitter.fromLanguage("html", {
+    chunkSize: 175,
+    chunkOverlap: 20,
+  });
+  const output = await splitter.splitText(text);
+
+  const expectedOutput = [
+    "<!DOCTYPE html>\n<html>",
+    "<head>\n    <title>🦜️🔗 LangChain</title>",
+    `<style>\n      body {
+        font-family: Arial, sans-serif;
+      }
+      h1 {
+        color: darkblue;
+      }
+    </style>
+  </head>`,
+    `<body>
+    <div>
+      <h1>🦜️🔗 LangChain</h1>
+      <p>⚡ Building applications with LLMs through composability ⚡</p>
+    </div>`,
+    `<div>
+      As an open source project in a rapidly developing field, we are extremely open to contributions.
+    </div>
+  </body>
+</html>`,
+  ];
+  expect(output).toEqual(expectedOutput);
+});
+
+test("Test lines loc on iterative text splitter.", async () => {
+  const text = `Hi.\nI'm Harrison.\n\nHow?\na\nb`;
+  const splitter = new RecursiveCharacterTextSplitter({
+    chunkSize: 20,
+    chunkOverlap: 1,
+  });
+  const docs = await splitter.createDocuments([text]);
+
+  const expectedDocs = [
+    new Document({
+      pageContent: "Hi.\nI'm Harrison.",
+      metadata: { loc: { lines: { from: 1, to: 2 } } },
+    }),
+    new Document({
+      pageContent: "How?\na\nb",
+      metadata: { loc: { lines: { from: 4, to: 6 } } },
+    }),
+  ];
+
+  expect(docs).toEqual(expectedDocs);
+});
diff --git a/libs/langchain-textsplitters/src/text_splitter.ts b/libs/langchain-textsplitters/src/text_splitter.ts
new file mode 100644
index 000000000000..095ea3e796ca
--- /dev/null
+++ b/libs/langchain-textsplitters/src/text_splitter.ts
@@ -0,0 +1,803 @@
+import type * as tiktoken from "js-tiktoken";
+import { Document, BaseDocumentTransformer } from "@langchain/core/documents";
+import { getEncoding } from "@langchain/core/utils/tiktoken";
+
+export interface TextSplitterParams {
+  chunkSize: number;
+  chunkOverlap: number;
+  keepSeparator: boolean;
+  lengthFunction?:
+    | ((text: string) => number)
+    | ((text: string) => Promise<number>);
+}
+
+export type TextSplitterChunkHeaderOptions = {
+  chunkHeader?: string;
+  chunkOverlapHeader?: string;
+  appendChunkOverlapHeader?: boolean;
+};
+
+export abstract class TextSplitter
+  extends BaseDocumentTransformer
+  implements TextSplitterParams
+{
+  lc_namespace = ["langchain", "document_transformers", "text_splitters"];
+
+  chunkSize = 1000;
+
+  chunkOverlap = 200;
+
+  keepSeparator = false;
+
+  lengthFunction:
+    | ((text: string) => number)
+    | ((text: string) => Promise<number>);
+
+  constructor(fields?: Partial<TextSplitterParams>) {
+    super(fields);
+    this.chunkSize = fields?.chunkSize ?? this.chunkSize;
+    this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap;
+    this.keepSeparator = fields?.keepSeparator ?? this.keepSeparator;
+    this.lengthFunction =
+      fields?.lengthFunction ?? ((text: string) => text.length);
+    if (this.chunkOverlap >= this.chunkSize) {
+      throw new Error("Cannot have chunkOverlap >= chunkSize");
+    }
+  }
+
+  async transformDocuments(
+    documents: Document[],
+    chunkHeaderOptions: TextSplitterChunkHeaderOptions = {}
+  ): Promise<Document[]> {
+    return this.splitDocuments(documents, chunkHeaderOptions);
+  }
+
+  abstract splitText(text: string): Promise<string[]>;
+
+  protected splitOnSeparator(text: string, separator: string): string[] {
+    let splits;
+    if (separator) {
+      if (this.keepSeparator) {
+        const regexEscapedSeparator = separator.replace(
+          /[/\-\\^$*+?.()|[\]{}]/g,
+          "\\$&"
+        );
+        splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`));
+      } else {
+        splits = text.split(separator);
+      }
+    } else {
+      splits = text.split("");
+    }
+    return splits.filter((s) => s !== "");
+  }
+
+  async createDocuments(
+    texts: string[],
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    metadatas: Record<string, any>[] = [],
+    chunkHeaderOptions: TextSplitterChunkHeaderOptions = {}
+  ): Promise<Document[]> {
+    // if no metadata is provided, we create an empty one for each text
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const _metadatas: Record<string, any>[] =
+      metadatas.length > 0
+        ? metadatas
+        : [...Array(texts.length)].map(() => ({}));
+    const {
+      chunkHeader = "",
+      chunkOverlapHeader = "(cont'd) ",
+      appendChunkOverlapHeader = false,
+    } = chunkHeaderOptions;
+    const documents = new Array<Document>();
+    for (let i = 0; i < texts.length; i += 1) {
+      const text = texts[i];
+      let lineCounterIndex = 1;
+      let prevChunk = null;
+      let indexPrevChunk = -1;
+      for (const chunk of await this.splitText(text)) {
+        let pageContent = chunkHeader;
+
+        // we need to count the \n that are in the text before getting removed by the splitting
+        const indexChunk = text.indexOf(chunk, indexPrevChunk + 1);
+        if (prevChunk === null) {
+          const newLinesBeforeFirstChunk = this.numberOfNewLines(
+            text,
+            0,
+            indexChunk
+          );
+          lineCounterIndex += newLinesBeforeFirstChunk;
+        } else {
+          const indexEndPrevChunk =
+            indexPrevChunk + (await this.lengthFunction(prevChunk));
+          if (indexEndPrevChunk < indexChunk) {
+            const numberOfIntermediateNewLines = this.numberOfNewLines(
+              text,
+              indexEndPrevChunk,
+              indexChunk
+            );
+            lineCounterIndex += numberOfIntermediateNewLines;
+          } else if (indexEndPrevChunk > indexChunk) {
+            const numberOfIntermediateNewLines = this.numberOfNewLines(
+              text,
+              indexChunk,
+              indexEndPrevChunk
+            );
+            lineCounterIndex -= numberOfIntermediateNewLines;
+          }
+          if (appendChunkOverlapHeader) {
+            pageContent += chunkOverlapHeader;
+          }
+        }
+        const newLinesCount = this.numberOfNewLines(chunk);
+
+        const loc =
+          _metadatas[i].loc && typeof _metadatas[i].loc === "object"
+            ? { ..._metadatas[i].loc }
+            : {};
+        loc.lines = {
+          from: lineCounterIndex,
+          to: lineCounterIndex + newLinesCount,
+        };
+        const metadataWithLinesNumber = {
+          ..._metadatas[i],
+          loc,
+        };
+
+        pageContent += chunk;
+        documents.push(
+          new Document({
+            pageContent,
+            metadata: metadataWithLinesNumber,
+          })
+        );
+        lineCounterIndex += newLinesCount;
+        prevChunk = chunk;
+        indexPrevChunk = indexChunk;
+      }
+    }
+    return documents;
+  }
+
+  private numberOfNewLines(text: string, start?: number, end?: number) {
+    const textSection = text.slice(start, end);
+    return (textSection.match(/\n/g) || []).length;
+  }
+
+  async splitDocuments(
+    documents: Document[],
+    chunkHeaderOptions: TextSplitterChunkHeaderOptions = {}
+  ): Promise<Document[]> {
+    const selectedDocuments = documents.filter(
+      (doc) => doc.pageContent !== undefined
+    );
+    const texts = selectedDocuments.map((doc) => doc.pageContent);
+    const metadatas = selectedDocuments.map((doc) => doc.metadata);
+    return this.createDocuments(texts, metadatas, chunkHeaderOptions);
+  }
+
+  private joinDocs(docs: string[], separator: string): string | null {
+    const text = docs.join(separator).trim();
+    return text === "" ? null : text;
+  }
+
+  async mergeSplits(splits: string[], separator: string): Promise<string[]> {
+    const docs: string[] = [];
+    const currentDoc: string[] = [];
+    let total = 0;
+    for (const d of splits) {
+      const _len = await this.lengthFunction(d);
+      if (
+        total + _len + currentDoc.length * separator.length >
+        this.chunkSize
+      ) {
+        if (total > this.chunkSize) {
+          console.warn(
+            `Created a chunk of size ${total}, +
+which is longer than the specified ${this.chunkSize}`
+          );
+        }
+        if (currentDoc.length > 0) {
+          const doc = this.joinDocs(currentDoc, separator);
+          if (doc !== null) {
+            docs.push(doc);
+          }
+          // Keep on popping if:
+          // - we have a larger chunk than in the chunk overlap
+          // - or if we still have any chunks and the length is long
+          while (
+            total > this.chunkOverlap ||
+            (total + _len + currentDoc.length * separator.length >
+              this.chunkSize &&
+              total > 0)
+          ) {
+            total -= await this.lengthFunction(currentDoc[0]);
+            currentDoc.shift();
+          }
+        }
+      }
+      currentDoc.push(d);
+      total += _len;
+    }
+    const doc = this.joinDocs(currentDoc, separator);
+    if (doc !== null) {
+      docs.push(doc);
+    }
+    return docs;
+  }
+}
+
+export interface CharacterTextSplitterParams extends TextSplitterParams {
+  separator: string;
+}
+
+export class CharacterTextSplitter
+  extends TextSplitter
+  implements CharacterTextSplitterParams
+{
+  static lc_name() {
+    return "CharacterTextSplitter";
+  }
+
+  separator = "\n\n";
+
+  constructor(fields?: Partial<CharacterTextSplitterParams>) {
+    super(fields);
+    this.separator = fields?.separator ?? this.separator;
+  }
+
+  async splitText(text: string): Promise<string[]> {
+    // First we naively split the large input into a bunch of smaller ones.
+    const splits = this.splitOnSeparator(text, this.separator);
+    return this.mergeSplits(splits, this.keepSeparator ? "" : this.separator);
+  }
+}
+
+export interface RecursiveCharacterTextSplitterParams
+  extends TextSplitterParams {
+  separators: string[];
+}
+
+export const SupportedTextSplitterLanguages = [
+  "cpp",
+  "go",
+  "java",
+  "js",
+  "php",
+  "proto",
+  "python",
+  "rst",
+  "ruby",
+  "rust",
+  "scala",
+  "swift",
+  "markdown",
+  "latex",
+  "html",
+  "sol",
+] as const;
+
+export type SupportedTextSplitterLanguage =
+  (typeof SupportedTextSplitterLanguages)[number];
+
+export class RecursiveCharacterTextSplitter
+  extends TextSplitter
+  implements RecursiveCharacterTextSplitterParams
+{
+  static lc_name() {
+    return "RecursiveCharacterTextSplitter";
+  }
+
+  separators: string[] = ["\n\n", "\n", " ", ""];
+
+  constructor(fields?: Partial<RecursiveCharacterTextSplitterParams>) {
+    super(fields);
+    this.separators = fields?.separators ?? this.separators;
+    this.keepSeparator = fields?.keepSeparator ?? true;
+  }
+
+  private async _splitText(text: string, separators: string[]) {
+    const finalChunks: string[] = [];
+
+    // Get appropriate separator to use
+    let separator: string = separators[separators.length - 1];
+    let newSeparators;
+    for (let i = 0; i < separators.length; i += 1) {
+      const s = separators[i];
+      if (s === "") {
+        separator = s;
+        break;
+      }
+      if (text.includes(s)) {
+        separator = s;
+        newSeparators = separators.slice(i + 1);
+        break;
+      }
+    }
+
+    // Now that we have the separator, split the text
+    const splits = this.splitOnSeparator(text, separator);
+
+    // Now go merging things, recursively splitting longer texts.
+    let goodSplits: string[] = [];
+    const _separator = this.keepSeparator ? "" : separator;
+    for (const s of splits) {
+      if ((await this.lengthFunction(s)) < this.chunkSize) {
+        goodSplits.push(s);
+      } else {
+        if (goodSplits.length) {
+          const mergedText = await this.mergeSplits(goodSplits, _separator);
+          finalChunks.push(...mergedText);
+          goodSplits = [];
+        }
+        if (!newSeparators) {
+          finalChunks.push(s);
+        } else {
+          const otherInfo = await this._splitText(s, newSeparators);
+          finalChunks.push(...otherInfo);
+        }
+      }
+    }
+    if (goodSplits.length) {
+      const mergedText = await this.mergeSplits(goodSplits, _separator);
+      finalChunks.push(...mergedText);
+    }
+    return finalChunks;
+  }
+
+  async splitText(text: string): Promise<string[]> {
+    return this._splitText(text, this.separators);
+  }
+
+  static fromLanguage(
+    language: SupportedTextSplitterLanguage,
+    options?: Partial<RecursiveCharacterTextSplitterParams>
+  ) {
+    return new RecursiveCharacterTextSplitter({
+      ...options,
+      separators:
+        RecursiveCharacterTextSplitter.getSeparatorsForLanguage(language),
+    });
+  }
+
+  static getSeparatorsForLanguage(language: SupportedTextSplitterLanguage) {
+    if (language === "cpp") {
+      return [
+        // Split along class definitions
+        "\nclass ",
+        // Split along function definitions
+        "\nvoid ",
+        "\nint ",
+        "\nfloat ",
+        "\ndouble ",
+        // Split along control flow statements
+        "\nif ",
+        "\nfor ",
+        "\nwhile ",
+        "\nswitch ",
+        "\ncase ",
+        // Split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "go") {
+      return [
+        // Split along function definitions
+        "\nfunc ",
+        "\nvar ",
+        "\nconst ",
+        "\ntype ",
+        // Split along control flow statements
+        "\nif ",
+        "\nfor ",
+        "\nswitch ",
+        "\ncase ",
+        // Split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "java") {
+      return [
+        // Split along class definitions
+        "\nclass ",
+        // Split along method definitions
+        "\npublic ",
+        "\nprotected ",
+        "\nprivate ",
+        "\nstatic ",
+        // Split along control flow statements
+        "\nif ",
+        "\nfor ",
+        "\nwhile ",
+        "\nswitch ",
+        "\ncase ",
+        // Split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "js") {
+      return [
+        // Split along function definitions
+        "\nfunction ",
+        "\nconst ",
+        "\nlet ",
+        "\nvar ",
+        "\nclass ",
+        // Split along control flow statements
+        "\nif ",
+        "\nfor ",
+        "\nwhile ",
+        "\nswitch ",
+        "\ncase ",
+        "\ndefault ",
+        // Split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "php") {
+      return [
+        // Split along function definitions
+        "\nfunction ",
+        // Split along class definitions
+        "\nclass ",
+        // Split along control flow statements
+        "\nif ",
+        "\nforeach ",
+        "\nwhile ",
+        "\ndo ",
+        "\nswitch ",
+        "\ncase ",
+        // Split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "proto") {
+      return [
+        // Split along message definitions
+        "\nmessage ",
+        // Split along service definitions
+        "\nservice ",
+        // Split along enum definitions
+        "\nenum ",
+        // Split along option definitions
+        "\noption ",
+        // Split along import statements
+        "\nimport ",
+        // Split along syntax declarations
+        "\nsyntax ",
+        // Split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "python") {
+      return [
+        // First, try to split along class definitions
+        "\nclass ",
+        "\ndef ",
+        "\n\tdef ",
+        // Now split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "rst") {
+      return [
+        // Split along section titles
+        "\n===\n",
+        "\n---\n",
+        "\n***\n",
+        // Split along directive markers
+        "\n.. ",
+        // Split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "ruby") {
+      return [
+        // Split along method definitions
+        "\ndef ",
+        "\nclass ",
+        // Split along control flow statements
+        "\nif ",
+        "\nunless ",
+        "\nwhile ",
+        "\nfor ",
+        "\ndo ",
+        "\nbegin ",
+        "\nrescue ",
+        // Split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "rust") {
+      return [
+        // Split along function definitions
+        "\nfn ",
+        "\nconst ",
+        "\nlet ",
+        // Split along control flow statements
+        "\nif ",
+        "\nwhile ",
+        "\nfor ",
+        "\nloop ",
+        "\nmatch ",
+        "\nconst ",
+        // Split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "scala") {
+      return [
+        // Split along class definitions
+        "\nclass ",
+        "\nobject ",
+        // Split along method definitions
+        "\ndef ",
+        "\nval ",
+        "\nvar ",
+        // Split along control flow statements
+        "\nif ",
+        "\nfor ",
+        "\nwhile ",
+        "\nmatch ",
+        "\ncase ",
+        // Split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "swift") {
+      return [
+        // Split along function definitions
+        "\nfunc ",
+        // Split along class definitions
+        "\nclass ",
+        "\nstruct ",
+        "\nenum ",
+        // Split along control flow statements
+        "\nif ",
+        "\nfor ",
+        "\nwhile ",
+        "\ndo ",
+        "\nswitch ",
+        "\ncase ",
+        // Split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "markdown") {
+      return [
+        // First, try to split along Markdown headings (starting with level 2)
+        "\n## ",
+        "\n### ",
+        "\n#### ",
+        "\n##### ",
+        "\n###### ",
+        // Note the alternative syntax for headings (below) is not handled here
+        // Heading level 2
+        // ---------------
+        // End of code block
+        "```\n\n",
+        // Horizontal lines
+        "\n\n***\n\n",
+        "\n\n---\n\n",
+        "\n\n___\n\n",
+        // Note that this splitter doesn't handle horizontal lines defined
+        // by *three or more* of ***, ---, or ___, but this is not handled
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "latex") {
+      return [
+        // First, try to split along Latex sections
+        "\n\\chapter{",
+        "\n\\section{",
+        "\n\\subsection{",
+        "\n\\subsubsection{",
+
+        // Now split by environments
+        "\n\\begin{enumerate}",
+        "\n\\begin{itemize}",
+        "\n\\begin{description}",
+        "\n\\begin{list}",
+        "\n\\begin{quote}",
+        "\n\\begin{quotation}",
+        "\n\\begin{verse}",
+        "\n\\begin{verbatim}",
+
+        // Now split by math environments
+        "\n\\begin{align}",
+        "$$",
+        "$",
+
+        // Now split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else if (language === "html") {
+      return [
+        // First, try to split along HTML tags
+        "<body>",
+        "<div>",
+        "<p>",
+        "<br>",
+        "<li>",
+        "<h1>",
+        "<h2>",
+        "<h3>",
+        "<h4>",
+        "<h5>",
+        "<h6>",
+        "<span>",
+        "<table>",
+        "<tr>",
+        "<td>",
+        "<th>",
+        "<ul>",
+        "<ol>",
+        "<header>",
+        "<footer>",
+        "<nav>",
+        // Head
+        "<head>",
+        "<style>",
+        "<script>",
+        "<meta>",
+        "<title>",
+        // Normal type of lines
+        " ",
+        "",
+      ];
+    } else if (language === "sol") {
+      return [
+        // Split along compiler informations definitions
+        "\npragma ",
+        "\nusing ",
+        // Split along contract definitions
+        "\ncontract ",
+        "\ninterface ",
+        "\nlibrary ",
+        // Split along method definitions
+        "\nconstructor ",
+        "\ntype ",
+        "\nfunction ",
+        "\nevent ",
+        "\nmodifier ",
+        "\nerror ",
+        "\nstruct ",
+        "\nenum ",
+        // Split along control flow statements
+        "\nif ",
+        "\nfor ",
+        "\nwhile ",
+        "\ndo while ",
+        "\nassembly ",
+        // Split by the normal type of lines
+        "\n\n",
+        "\n",
+        " ",
+        "",
+      ];
+    } else {
+      throw new Error(`Language ${language} is not supported.`);
+    }
+  }
+}
+
+export interface TokenTextSplitterParams extends TextSplitterParams {
+  encodingName: tiktoken.TiktokenEncoding;
+  allowedSpecial: "all" | Array<string>;
+  disallowedSpecial: "all" | Array<string>;
+}
+
+/**
+ * Implementation of splitter which looks at tokens.
+ */
+export class TokenTextSplitter
+  extends TextSplitter
+  implements TokenTextSplitterParams
+{
+  static lc_name() {
+    return "TokenTextSplitter";
+  }
+
+  encodingName: tiktoken.TiktokenEncoding;
+
+  allowedSpecial: "all" | Array<string>;
+
+  disallowedSpecial: "all" | Array<string>;
+
+  private tokenizer: tiktoken.Tiktoken;
+
+  constructor(fields?: Partial<TokenTextSplitterParams>) {
+    super(fields);
+
+    this.encodingName = fields?.encodingName ?? "gpt2";
+    this.allowedSpecial = fields?.allowedSpecial ?? [];
+    this.disallowedSpecial = fields?.disallowedSpecial ?? "all";
+  }
+
+  async splitText(text: string): Promise<string[]> {
+    if (!this.tokenizer) {
+      this.tokenizer = await getEncoding(this.encodingName);
+    }
+
+    const splits: string[] = [];
+
+    const input_ids = this.tokenizer.encode(
+      text,
+      this.allowedSpecial,
+      this.disallowedSpecial
+    );
+
+    let start_idx = 0;
+
+    while (start_idx < input_ids.length) {
+      if (start_idx > 0) {
+        start_idx -= this.chunkOverlap;
+      }
+      const end_idx = Math.min(start_idx + this.chunkSize, input_ids.length);
+      const chunk_ids = input_ids.slice(start_idx, end_idx);
+      splits.push(this.tokenizer.decode(chunk_ids));
+      start_idx = end_idx;
+    }
+
+    return splits;
+  }
+}
+
+export type MarkdownTextSplitterParams = TextSplitterParams;
+
+export class MarkdownTextSplitter
+  extends RecursiveCharacterTextSplitter
+  implements MarkdownTextSplitterParams
+{
+  constructor(fields?: Partial<MarkdownTextSplitterParams>) {
+    super({
+      ...fields,
+      separators:
+        RecursiveCharacterTextSplitter.getSeparatorsForLanguage("markdown"),
+    });
+  }
+}
+
+export type LatexTextSplitterParams = TextSplitterParams;
+
+export class LatexTextSplitter
+  extends RecursiveCharacterTextSplitter
+  implements LatexTextSplitterParams
+{
+  constructor(fields?: Partial<LatexTextSplitterParams>) {
+    super({
+      ...fields,
+      separators:
+        RecursiveCharacterTextSplitter.getSeparatorsForLanguage("latex"),
+    });
+  }
+}
diff --git a/libs/langchain-textsplitters/tsconfig.cjs.json b/libs/langchain-textsplitters/tsconfig.cjs.json
new file mode 100644
index 000000000000..3b7026ea406c
--- /dev/null
+++ b/libs/langchain-textsplitters/tsconfig.cjs.json
@@ -0,0 +1,8 @@
+{
+  "extends": "./tsconfig.json",
+  "compilerOptions": {
+    "module": "commonjs",
+    "declaration": false
+  },
+  "exclude": ["node_modules", "dist", "docs", "**/tests"]
+}
diff --git a/libs/langchain-textsplitters/tsconfig.json b/libs/langchain-textsplitters/tsconfig.json
new file mode 100644
index 000000000000..bc85d83b6229
--- /dev/null
+++ b/libs/langchain-textsplitters/tsconfig.json
@@ -0,0 +1,23 @@
+{
+  "extends": "@tsconfig/recommended",
+  "compilerOptions": {
+    "outDir": "../dist",
+    "rootDir": "./src",
+    "target": "ES2021",
+    "lib": ["ES2021", "ES2022.Object", "DOM"],
+    "module": "ES2020",
+    "moduleResolution": "nodenext",
+    "esModuleInterop": true,
+    "declaration": true,
+    "noImplicitReturns": true,
+    "noFallthroughCasesInSwitch": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "useDefineForClassFields": true,
+    "strictPropertyInitialization": false,
+    "allowJs": true,
+    "strict": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist", "docs"]
+}
diff --git a/yarn.lock b/yarn.lock
index 67b2d17cfde0..9ac8389f4de9 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -9883,6 +9883,37 @@ __metadata:
   languageName: unknown
   linkType: soft
 
+"@langchain/textsplitters@workspace:libs/langchain-textsplitters, @langchain/textsplitters@~0.0.0":
+  version: 0.0.0-use.local
+  resolution: "@langchain/textsplitters@workspace:libs/langchain-textsplitters"
+  dependencies:
+    "@jest/globals": ^29.5.0
+    "@langchain/core": ~0.1
+    "@langchain/scripts": ~0.0
+    "@swc/core": ^1.3.90
+    "@swc/jest": ^0.2.29
+    "@tsconfig/recommended": ^1.0.3
+    "@typescript-eslint/eslint-plugin": ^6.12.0
+    "@typescript-eslint/parser": ^6.12.0
+    dotenv: ^16.3.1
+    dpdm: ^3.12.0
+    eslint: ^8.33.0
+    eslint-config-airbnb-base: ^15.0.0
+    eslint-config-prettier: ^8.6.0
+    eslint-plugin-import: ^2.27.5
+    eslint-plugin-no-instanceof: ^1.0.1
+    eslint-plugin-prettier: ^4.2.1
+    jest: ^29.5.0
+    jest-environment-node: ^29.6.4
+    js-tiktoken: ^1.0.11
+    prettier: ^2.8.3
+    release-it: ^15.10.1
+    rollup: ^4.5.2
+    ts-jest: ^29.1.0
+    typescript: <5.2.0
+  languageName: unknown
+  linkType: soft
+
 "@langchain/weaviate@workspace:*, @langchain/weaviate@workspace:libs/langchain-weaviate":
   version: 0.0.0-use.local
   resolution: "@langchain/weaviate@workspace:libs/langchain-weaviate"
@@ -26140,6 +26171,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"js-tiktoken@npm:^1.0.11":
+  version: 1.0.11
+  resolution: "js-tiktoken@npm:1.0.11"
+  dependencies:
+    base64-js: ^1.5.1
+  checksum: 0cb3e81f28bdf72b35994a864c1466639e2424943bc66377e85e2c7984aa914d53617a6787a6e97930365a0ca9ba503357a8266f83e32fc610ba1dbee105d134
+  languageName: node
+  linkType: hard
+
 "js-tiktoken@npm:^1.0.7":
   version: 1.0.7
   resolution: "js-tiktoken@npm:1.0.7"
@@ -26629,6 +26669,7 @@ __metadata:
     "@langchain/core": ~0.1.56
     "@langchain/openai": ~0.0.28
     "@langchain/scripts": ~0.0
+    "@langchain/textsplitters": ~0.0.0
     "@notionhq/client": ^2.2.10
     "@pinecone-database/pinecone": ^1.1.0
     "@supabase/supabase-js": ^2.10.0