From 82908e4096cb2ccefb262c0c3347be8d16568881 Mon Sep 17 00:00:00 2001 From: Henry Heng Date: Mon, 22 Apr 2024 21:15:34 +0100 Subject: [PATCH 01/18] google-genai[patch]: update google/generative-ai version (#5115) * update google/generative-ai version * update leading string check gemini-1.5 * Format --------- Co-authored-by: jacoblee93 --- examples/package.json | 2 +- libs/langchain-google-genai/package.json | 2 +- .../langchain-google-genai/src/chat_models.ts | 2 +- libs/langchain-google-genai/src/utils.ts | 9 +++++++-- yarn.lock | 19 ++++++------------- 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/examples/package.json b/examples/package.json index 7225d3940ca0..62a828548b2a 100644 --- a/examples/package.json +++ b/examples/package.json @@ -29,7 +29,7 @@ "@getmetal/metal-sdk": "^4.0.0", "@getzep/zep-js": "^0.9.0", "@gomomento/sdk": "^1.51.1", - "@google/generative-ai": "^0.1.0", + "@google/generative-ai": "^0.7.0", "@langchain/anthropic": "workspace:*", "@langchain/azure-openai": "workspace:*", "@langchain/cloudflare": "workspace:*", diff --git a/libs/langchain-google-genai/package.json b/libs/langchain-google-genai/package.json index aeef9d7f8912..aa89ae872530 100644 --- a/libs/langchain-google-genai/package.json +++ b/libs/langchain-google-genai/package.json @@ -39,7 +39,7 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@google/generative-ai": "^0.1.3", + "@google/generative-ai": "^0.7.0", "@langchain/core": "~0.1.5" }, "devDependencies": { diff --git a/libs/langchain-google-genai/src/chat_models.ts b/libs/langchain-google-genai/src/chat_models.ts index 5e62472c4c48..fca4a2be5898 100644 --- a/libs/langchain-google-genai/src/chat_models.ts +++ b/libs/langchain-google-genai/src/chat_models.ts @@ -186,7 +186,7 @@ export class ChatGoogleGenerativeAI private client: GenerativeModel; get _isMultimodalModel() { - return this.model.includes("vision"); + return this.model.includes("vision") || this.model.startsWith("gemini-1.5"); } constructor(fields?: GoogleGenerativeAIChatInput) { diff --git a/libs/langchain-google-genai/src/utils.ts b/libs/langchain-google-genai/src/utils.ts index 509359ed9790..6cbe4a1b94b5 100644 --- a/libs/langchain-google-genai/src/utils.ts +++ b/libs/langchain-google-genai/src/utils.ts @@ -67,10 +67,15 @@ export function convertMessageContentToParts( if (!isMultimodalModel) { throw new Error(`This model does not support images`); } - if (typeof c.image_url !== "string") { + let source; + if (typeof c.image_url === "string") { + source = c.image_url; + } else if (typeof c.image_url === "object" && "url" in c.image_url) { + source = c.image_url.url; + } else { throw new Error("Please provide image as base64 encoded data URL"); } - const [dm, data] = c.image_url.split(","); + const [dm, data] = source.split(","); if (!dm.startsWith("data:")) { throw new Error("Please provide image as base64 encoded data URL"); } diff --git a/yarn.lock b/yarn.lock index 7db4a01f6c6a..dfa1792aa94d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8124,17 +8124,10 @@ __metadata: languageName: node linkType: hard -"@google/generative-ai@npm:^0.1.0": - version: 0.1.0 - resolution: "@google/generative-ai@npm:0.1.0" - checksum: 80a3eebcd831ad894e6c7a828a0519c7974eee10aa5c765aa837dc7dab36017e5dc3a08470acc4955a5b44013b77358760d5b197e83016f416d1c3b39d265a79 - languageName: node - linkType: hard - -"@google/generative-ai@npm:^0.1.3": - version: 0.1.3 - resolution: "@google/generative-ai@npm:0.1.3" - checksum: 6ab4e214c5f792c9dce66aa00268dd75295be093ec9305ccb8e2251210e5a6680a17ec9f041d8108ee3a2ce49e2f26bc9a30ef97e17a2d83818a92a824f6efd1 +"@google/generative-ai@npm:^0.7.0": + version: 0.7.1 + resolution: "@google/generative-ai@npm:0.7.1" + checksum: 536c7c75545c93731f0ab1fa9be6c88c64ead6ab6b24e70763e592e163041444f9ae78e2095019cd0e27fc18cbdc1ecaf1fdfd3561ca0a61577f720ddbaba1f2 languageName: node linkType: hard @@ -9496,7 +9489,7 @@ __metadata: version: 0.0.0-use.local resolution: "@langchain/google-genai@workspace:libs/langchain-google-genai" dependencies: - "@google/generative-ai": ^0.1.3 + "@google/generative-ai": ^0.7.0 "@jest/globals": ^29.5.0 "@langchain/core": ~0.1.5 "@langchain/scripts": ~0.0 @@ -21723,7 +21716,7 @@ __metadata: "@getmetal/metal-sdk": ^4.0.0 "@getzep/zep-js": ^0.9.0 "@gomomento/sdk": ^1.51.1 - "@google/generative-ai": ^0.1.0 + "@google/generative-ai": ^0.7.0 "@langchain/anthropic": "workspace:*" "@langchain/azure-openai": "workspace:*" "@langchain/cloudflare": "workspace:*" From 93070985a4f21d32a1a7cd8d5dc9c3902cf1d6d4 Mon Sep 17 00:00:00 2001 From: Mohammed Bilal Shareef Date: Tue, 23 Apr 2024 01:48:32 +0530 Subject: [PATCH 02/18] community[patch]: Invoke toTitleCase only when string is present to avoid errors (#5145) * Invoke toTitleCase only when string is present to avoid errors * fix: prettier formatting issues --------- Co-authored-by: Brace Sproul --- .../src/experimental/graph_transformers/llm.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/libs/langchain-community/src/experimental/graph_transformers/llm.ts b/libs/langchain-community/src/experimental/graph_transformers/llm.ts index fdb6ed75d127..41167e09ad6f 100644 --- a/libs/langchain-community/src/experimental/graph_transformers/llm.ts +++ b/libs/langchain-community/src/experimental/graph_transformers/llm.ts @@ -129,7 +129,7 @@ function createSchema(allowedNodes: string[], allowedRelationships: string[]) { function mapToBaseNode(node: any): Node { return new Node({ id: node.id, - type: toTitleCase(node.type || ""), + type: node.type ? toTitleCase(node.type) : "", }); } @@ -138,11 +138,15 @@ function mapToBaseRelationship(relationship: any): Relationship { return new Relationship({ source: new Node({ id: relationship.sourceNodeId, - type: toTitleCase(relationship.sourceNodeType || ""), + type: relationship.sourceNodeType + ? toTitleCase(relationship.sourceNodeType) + : "", }), target: new Node({ id: relationship.targetNodeId, - type: toTitleCase(relationship.targetNodeType || ""), + type: relationship.targetNodeType + ? toTitleCase(relationship.targetNodeType) + : "", }), type: relationship.relationshipType.replace(" ", "_").toUpperCase(), }); From ed05a2fd85c5f2d11b89f5d83c7bb8c21f021d20 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 22 Apr 2024 14:19:34 -0700 Subject: [PATCH 03/18] google-genai[patch]: Release 0.0.12 (#5174) --- libs/langchain-google-genai/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain-google-genai/package.json b/libs/langchain-google-genai/package.json index aa89ae872530..02c10361b130 100644 --- a/libs/langchain-google-genai/package.json +++ b/libs/langchain-google-genai/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/google-genai", - "version": "0.0.11", + "version": "0.0.12", "description": "Sample integration for LangChain.js", "type": "module", "engines": { From 99bfed20382d32fbf70aecede3cc305479ef470d Mon Sep 17 00:00:00 2001 From: Sam Trost Date: Mon, 22 Apr 2024 17:51:44 -0400 Subject: [PATCH 04/18] community[patch]: allow dynamic opensearch vector, text, and metadata field names (#5165) * allow dynamic opensearch vector, text, and metadata field names * Fix typing --------- Co-authored-by: jacoblee93 --- .../src/vectorstores/opensearch.ts | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/libs/langchain-community/src/vectorstores/opensearch.ts b/libs/langchain-community/src/vectorstores/opensearch.ts index 79c7360d010b..2f434928e245 100644 --- a/libs/langchain-community/src/vectorstores/opensearch.ts +++ b/libs/langchain-community/src/vectorstores/opensearch.ts @@ -29,6 +29,9 @@ interface VectorSearchOptions { */ export interface OpenSearchClientArgs { readonly client: Client; + readonly vectorFieldName?: string; + readonly textFieldName?: string; + readonly metadataFieldName?: string; readonly service?: "es" | "aoss"; readonly indexName?: string; @@ -91,6 +94,12 @@ export class OpenSearchVectorStore extends VectorStore { private readonly m: number; + private readonly vectorFieldName: string; + + private readonly textFieldName: string; + + private readonly metadataFieldName: string; + _vectorstoreType(): string { return "opensearch"; } @@ -105,6 +114,9 @@ export class OpenSearchVectorStore extends VectorStore { this.efSearch = args.vectorSearchOptions?.efSearch ?? 512; this.numberOfShards = args.vectorSearchOptions?.numberOfShards ?? 5; this.numberOfReplicas = args.vectorSearchOptions?.numberOfReplicas ?? 1; + this.vectorFieldName = args.vectorFieldName ?? "embedding"; + this.textFieldName = args.textFieldName ?? "text"; + this.metadataFieldName = args.metadataFieldName ?? "metadata"; this.client = args.client; this.indexName = args.indexName ?? "documents"; @@ -161,9 +173,9 @@ export class OpenSearchVectorStore extends VectorStore { }, }, { - embedding, - metadata: documents[idx].metadata, - text: documents[idx].pageContent, + [this.vectorFieldName]: embedding, + [this.textFieldName]: documents[idx].pageContent, + [this.metadataFieldName]: documents[idx].metadata, }, ]; @@ -204,7 +216,7 @@ export class OpenSearchVectorStore extends VectorStore { must: [ { knn: { - embedding: { vector: query, k }, + [this.vectorFieldName]: { vector: query, k }, }, }, ], @@ -219,8 +231,8 @@ export class OpenSearchVectorStore extends VectorStore { // eslint-disable-next-line @typescript-eslint/no-explicit-any return body.hits.hits.map((hit: any) => [ new Document({ - pageContent: hit._source.text, - metadata: hit._source.metadata, + pageContent: hit._source[this.textFieldName], + metadata: hit._source[this.metadataFieldName], }), hit._score, ]); @@ -306,22 +318,22 @@ export class OpenSearchVectorStore extends VectorStore { dynamic_templates: [ { // map all metadata properties to be keyword - "metadata.*": { + [`${this.metadataFieldName}.*`]: { match_mapping_type: "string", mapping: { type: "keyword" }, }, }, { - "metadata.loc": { + [`${this.metadataFieldName}.loc`]: { match_mapping_type: "object", mapping: { type: "object" }, }, }, ], properties: { - text: { type: "text" }, - metadata: { type: "object" }, - embedding: { + [this.textFieldName]: { type: "text" }, + [this.metadataFieldName]: { type: "object" }, + [this.vectorFieldName]: { type: "knn_vector", dimension, method: { @@ -373,7 +385,7 @@ export class OpenSearchVectorStore extends VectorStore { const must = []; const must_not = []; for (const [key, value] of Object.entries(filter)) { - const metadataKey = `metadata.${key}`; + const metadataKey = `${this.metadataFieldName}.${key}`; if (value) { if (typeof value === "object" && !Array.isArray(value)) { if ("exists" in value) { From fcb2797eccab0d462e3262e6933beebdfede1373 Mon Sep 17 00:00:00 2001 From: Anush Date: Tue, 23 Apr 2024 03:24:10 +0530 Subject: [PATCH 05/18] refactor: configurable keys Qdrant (#5172) --- .../docs/integrations/vectorstores/qdrant.mdx | 4 ---- .../src/vectorstores/qdrant.ts | 22 +++++++++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx b/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx index 2d0467357d05..1c616c2cf940 100644 --- a/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx +++ b/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx @@ -6,10 +6,6 @@ sidebar_class_name: node-only [Qdrant](https://qdrant.tech/) is a vector similarity search engine. It provides a production-ready service with a convenient API to store, search, and manage points - vectors with an additional payload. -:::tip Compatibility -Only available on Node.js. -::: - ## Setup 1. Run a Qdrant instance with Docker on your computer by following the [Qdrant setup instructions](https://qdrant.tech/documentation/install/). diff --git a/libs/langchain-community/src/vectorstores/qdrant.ts b/libs/langchain-community/src/vectorstores/qdrant.ts index 7925c09bdae1..e2b6d5bc8024 100644 --- a/libs/langchain-community/src/vectorstores/qdrant.ts +++ b/libs/langchain-community/src/vectorstores/qdrant.ts @@ -6,6 +6,9 @@ import { VectorStore } from "@langchain/core/vectorstores"; import { Document } from "@langchain/core/documents"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; +const CONTENT_KEY = "content"; +const METADATA_KEY = "metadata"; + /** * Interface for the arguments that can be passed to the * `QdrantVectorStore` constructor. It includes options for specifying a @@ -20,6 +23,8 @@ export interface QdrantLibArgs { collectionConfig?: QdrantSchemas["CreateCollection"]; // eslint-disable-next-line @typescript-eslint/no-explicit-any customPayload?: Record[]; + contentPayloadKey?: string; + metadataPayloadKey?: string; } export type QdrantAddDocumentOptions = { @@ -59,6 +64,10 @@ export class QdrantVectorStore extends VectorStore { collectionConfig?: QdrantSchemas["CreateCollection"]; + contentPayloadKey: string; + + metadataPayloadKey: string; + _vectorstoreType(): string { return "qdrant"; } @@ -83,6 +92,10 @@ export class QdrantVectorStore extends VectorStore { this.collectionName = args.collectionName ?? "documents"; this.collectionConfig = args.collectionConfig; + + this.contentPayloadKey = args.contentPayloadKey ?? CONTENT_KEY; + + this.metadataPayloadKey = args.metadataPayloadKey ?? METADATA_KEY; } /** @@ -129,8 +142,8 @@ export class QdrantVectorStore extends VectorStore { id: uuid(), vector: embedding, payload: { - content: documents[idx].pageContent, - metadata: documents[idx].metadata, + [this.contentPayloadKey]: documents[idx].pageContent, + [this.metadataPayloadKey]: documents[idx].metadata, customPayload: documentOptions?.customPayload[idx], }, })); @@ -181,8 +194,9 @@ export class QdrantVectorStore extends VectorStore { results as QdrantSearchResponse[] ).map((res) => [ new Document({ - metadata: res.payload.metadata, - pageContent: res.payload.content, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + metadata: res.payload[this.metadataPayloadKey] as Record, + pageContent: res.payload[this.contentPayloadKey] as string, }), res.score, ]); From b0c383dfa9391b55bd296cbc3fd35065d3d381fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= Date: Mon, 22 Apr 2024 23:54:25 +0200 Subject: [PATCH 06/18] community[patch]: AstraDB: add option to skip create collection call (#5170) * community[feat]: AstraDB: add option to skip create collection call * test --- .../src/vectorstores/astradb.ts | 20 ++++++++++--- .../vectorstores/tests/astradb.int.test.ts | 29 +++++++++++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/libs/langchain-community/src/vectorstores/astradb.ts b/libs/langchain-community/src/vectorstores/astradb.ts index 60f3d66d3b56..cffe092cfaee 100644 --- a/libs/langchain-community/src/vectorstores/astradb.ts +++ b/libs/langchain-community/src/vectorstores/astradb.ts @@ -29,6 +29,7 @@ export interface AstraLibArgs extends AsyncCallerParams { namespace?: string; idKey?: string; contentKey?: string; + skipCollectionProvisioning?: boolean; collectionOptions?: CreateCollectionOptions; batchSize?: number; } @@ -56,6 +57,8 @@ export class AstraDBVectorStore extends VectorStore { caller: AsyncCaller; + private readonly skipCollectionProvisioning: boolean; + _vectorstoreType(): string { return "astradb"; } @@ -72,6 +75,7 @@ export class AstraDBVectorStore extends VectorStore { idKey, contentKey, batchSize, + skipCollectionProvisioning, ...callerArgs } = args; const dataAPIClient = new DataAPIClient(token, { caller: ["langchainjs"] }); @@ -91,6 +95,12 @@ export class AstraDBVectorStore extends VectorStore { this.contentKey = contentKey ?? "text"; this.batchSize = batchSize && batchSize <= 20 ? batchSize : 20; this.caller = new AsyncCaller(callerArgs); + this.skipCollectionProvisioning = skipCollectionProvisioning ?? false; + if (this.skipCollectionProvisioning && this.collectionOptions) { + throw new Error( + "If 'skipCollectionProvisioning' has been set to true, 'collectionOptions' must not be defined" + ); + } } /** @@ -100,10 +110,12 @@ export class AstraDBVectorStore extends VectorStore { * @returns Promise that resolves if connected to the collection. */ async initialize(): Promise { - await this.astraDBClient.createCollection( - this.collectionName, - this.collectionOptions - ); + if (!this.skipCollectionProvisioning) { + await this.astraDBClient.createCollection( + this.collectionName, + this.collectionOptions + ); + } this.collection = await this.astraDBClient.collection(this.collectionName); console.debug("Connected to Astra DB collection"); } diff --git a/libs/langchain-community/src/vectorstores/tests/astradb.int.test.ts b/libs/langchain-community/src/vectorstores/tests/astradb.int.test.ts index 16e41f1a473c..c13555ebd22b 100644 --- a/libs/langchain-community/src/vectorstores/tests/astradb.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/astradb.int.test.ts @@ -163,4 +163,33 @@ describe.skip("AstraDBVectorStore", () => { ); } }, 60000); + + test("skipCollectionProvisioning", async () => { + let store = new AstraDBVectorStore(new FakeEmbeddings(), { + ...astraConfig, + skipCollectionProvisioning: true, + collectionOptions: undefined, + }); + await store.initialize(); + try { + await store.similaritySearch("test"); + fail("Should have thrown error"); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (e: any) { + expect(e.message).toContain("'default_keyspace.langchain_test'"); + } + store = new AstraDBVectorStore(new FakeEmbeddings(), { + ...astraConfig, + skipCollectionProvisioning: false, + collectionOptions: { + checkExists: false, + vector: { + dimension: 4, + metric: "cosine", + }, + }, + }); + await store.initialize(); + await store.similaritySearch("test"); + }); }); From 137a3be855716d33601eb3e93e44d59b423b0533 Mon Sep 17 00:00:00 2001 From: davidfant Date: Mon, 22 Apr 2024 23:16:17 +0100 Subject: [PATCH 07/18] langchain[patch]: Parallelize applyEvaluators used by `runOnDataset` (#5127) * parallelize applyEvaluators * Add concurrency with AsyncCaller --------- Co-authored-by: jacoblee93 --- langchain/src/smith/runner_utils.ts | 85 +++++++++++++++++++---------- 1 file changed, 56 insertions(+), 29 deletions(-) diff --git a/langchain/src/smith/runner_utils.ts b/langchain/src/smith/runner_utils.ts index c6d013438250..4377ec7a9523 100644 --- a/langchain/src/smith/runner_utils.ts +++ b/langchain/src/smith/runner_utils.ts @@ -10,6 +10,7 @@ import { import { LangChainTracer } from "@langchain/core/tracers/tracer_langchain"; import { BaseTracer } from "@langchain/core/tracers/base"; import { ChainValues } from "@langchain/core/utils/types"; +import { AsyncCaller } from "@langchain/core/utils/async_caller"; import { Client, Example, @@ -522,46 +523,71 @@ const applyEvaluators = async ({ runs, examples, client, + maxConcurrency, }: { evaluation: LoadedEvalConfig; runs: Run[]; examples: Example[]; client: Client; -}) => { + maxConcurrency: number; +}): Promise<{ + [key: string]: { + execution_time?: number; + run_id: string; + feedback: Feedback[]; + }; +}> => { // TODO: Parallelize and/or put in callbacks to speed up evals. const { evaluators } = evaluation; const progress = new ProgressBar({ total: examples.length, format: "Running Evaluators: {bar} {percentage}% | {value}/{total}\n", }); - const results: Record< - string, - { run_id: string; execution_time?: number; feedback: Feedback[] } - > = {}; - for (let i = 0; i < runs.length; i += 1) { - const run = runs[i]; - const example = examples[i]; - const evaluatorResults = await Promise.allSettled( - evaluators.map((evaluator) => - client.evaluateRun(run, evaluator, { - referenceExample: example, - loadChildRuns: false, - }) - ) - ); - progress.increment(); - results[example.id] = { - execution_time: - run?.end_time && run.start_time - ? run.end_time - run.start_time - : undefined, - feedback: evaluatorResults.map((evalResult) => - evalResult.status === "fulfilled" ? evalResult.value : evalResult.reason - ), - run_id: run.id, - }; - } - return results; + const caller = new AsyncCaller({ + maxConcurrency, + }); + const requests = runs.map( + async ( + run, + i + ): Promise<{ + run_id: string; + execution_time?: number; + feedback: Feedback[]; + }> => + caller.call(async () => { + const evaluatorResults = await Promise.allSettled( + evaluators.map((evaluator) => + client.evaluateRun(run, evaluator, { + referenceExample: examples[i], + loadChildRuns: false, + }) + ) + ); + progress.increment(); + return { + execution_time: + run?.end_time && run.start_time + ? run.end_time - run.start_time + : undefined, + feedback: evaluatorResults.map((evalResult) => + evalResult.status === "fulfilled" + ? evalResult.value + : evalResult.reason + ), + run_id: run.id, + }; + }) + ); + const results = await Promise.all(requests); + + return results.reduce( + (acc, result, i) => ({ + ...acc, + [examples[i].id]: result, + }), + {} + ); }; export type EvalResults = { @@ -733,6 +759,7 @@ export async function runOnDataset( runs, examples, client: testClient, + maxConcurrency: testConcurrency, }); } const results: EvalResults = { From 81a5195c742865a2c6fda4a862221a1e2df8c0cd Mon Sep 17 00:00:00 2001 From: Katarina Supe <61758502+katarinasupe@users.noreply.github.com> Date: Tue, 23 Apr 2024 00:16:32 +0200 Subject: [PATCH 08/18] docs[patch]: Update Memgraph docs (#5171) * Update Memgraph docs * Format --------- Co-authored-by: jacoblee93 --- .../experimental/graph_databases/memgraph.mdx | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/core_docs/docs/modules/data_connection/experimental/graph_databases/memgraph.mdx b/docs/core_docs/docs/modules/data_connection/experimental/graph_databases/memgraph.mdx index 2582b0cc4078..f4e4e10e9bea 100644 --- a/docs/core_docs/docs/modules/data_connection/experimental/graph_databases/memgraph.mdx +++ b/docs/core_docs/docs/modules/data_connection/experimental/graph_databases/memgraph.mdx @@ -18,7 +18,7 @@ npm install @langchain/openai neo4j-driver @langchain/community Memgraph bundles the database along with various analytical tools into distinct Docker images. If you're new to Memgraph or you're in a developing stage, we -recommend using the `memgraph-platform` image. Besides the database, it also +recommend running Memgraph Platform with Docker Compose. Besides the database, it also includes all the tools you might need to analyze your data, such as command-line interface [mgconsole](https://memgraph.com/docs/getting-started/cli), web interface [Memgraph Lab](https://memgraph.com/docs/data-visualization) and a @@ -28,8 +28,16 @@ complete set of algorithms within a With the Docker running in the background, run the following command in the console: +Linux/MacOS: + ```bash -docker run -p 7687:7687 -p 7444:7444 -p 3000:3000 --name memgraph memgraph/memgraph-platform +curl https://install.memgraph.com | sh +``` + +Windows: + +``` +iwr https://windows.memgraph.com | iex ``` For other options of installation, check the [Getting started guide](https://memgraph.com/docs/getting-started). From 39bd3c2a247601aa9ef11fe43f1c23134aea8b7d Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 22 Apr 2024 15:30:58 -0700 Subject: [PATCH 09/18] community[patch]: Release 0.0.51 (#5176) --- libs/langchain-community/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 4ace57e72241..eb46050b7254 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/community", - "version": "0.0.50", + "version": "0.0.51", "description": "Third-party integrations for LangChain.js", "type": "module", "engines": { From a8e3e2d7ea190ee50baf4531feec94c98883d7f5 Mon Sep 17 00:00:00 2001 From: Brace Sproul Date: Mon, 22 Apr 2024 15:41:20 -0700 Subject: [PATCH 10/18] docs[minor]: Make providers index page show all integration pkgs (#5175) * docs[minor]: Make providers index page show all integrations * cr * nit --- docs/core_docs/.gitignore | 6 ++-- .../docs/integrations/platforms/index.mdx | 30 +++++++++++++++++++ docs/core_docs/sidebars.js | 7 ++--- 3 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 docs/core_docs/docs/integrations/platforms/index.mdx diff --git a/docs/core_docs/.gitignore b/docs/core_docs/.gitignore index 3277be5e78ff..310e12e5ff76 100644 --- a/docs/core_docs/.gitignore +++ b/docs/core_docs/.gitignore @@ -109,10 +109,12 @@ docs/use_cases/extraction/how_to/examples.md docs/use_cases/extraction/how_to/examples.mdx docs/modules/model_io/output_parsers/custom.md docs/modules/model_io/output_parsers/custom.mdx -docs/modules/model_io/chat/function_calling.md -docs/modules/model_io/chat/function_calling.mdx docs/modules/memory/chat_messages/custom.md docs/modules/memory/chat_messages/custom.mdx +docs/modules/model_io/chat/response_metadata.md +docs/modules/model_io/chat/response_metadata.mdx +docs/modules/model_io/chat/function_calling.md +docs/modules/model_io/chat/function_calling.mdx docs/modules/data_connection/vectorstores/custom.md docs/modules/data_connection/vectorstores/custom.mdx docs/modules/agents/agent_types/tool_calling.md diff --git a/docs/core_docs/docs/integrations/platforms/index.mdx b/docs/core_docs/docs/integrations/platforms/index.mdx new file mode 100644 index 000000000000..3cdd03e83d2e --- /dev/null +++ b/docs/core_docs/docs/integrations/platforms/index.mdx @@ -0,0 +1,30 @@ +--- +sidebar_position: 0 +sidebar_class_name: hidden +--- + +# Providers + +LangChain integrates with many providers. + +## Partner Packages + +These providers have standalone `@langchain/{provider}` packages for improved versioning, dependency management and testing. + +- [Anthropic](https://www.npmjs.com/package/@langchain/anthropic) +- [Azure OpenAI](https://www.npmjs.com/package/@langchain/azure-openai) +- [Cloudflare](https://www.npmjs.com/package/@langchain/cloudflare) +- [Cohere](https://www.npmjs.com/package/@langchain/cohere) +- [Exa](https://www.npmjs.com/package/@langchain/exa) +- [Google GenAI](https://www.npmjs.com/package/@langchain/google-genai) +- [Google VertexAI](https://www.npmjs.com/package/@langchain/google-vertexai) +- [Google VertexAI Web](https://www.npmjs.com/package/@langchain/google-vertexai-web) +- [Groq](https://www.npmjs.com/package/@langchain/groq) +- [MistralAI](https://www.npmjs.com/package/@langchain/mistralai) +- [MongoDB](https://www.npmjs.com/package/@langchain/mongodb) +- [Nomic](https://www.npmjs.com/package/@langchain/nomic) +- [OpenAI](https://www.npmjs.com/package/@langchain/openai) +- [Pinecone](https://www.npmjs.com/package/@langchain/pinecone) +- [Redis](https://www.npmjs.com/package/@langchain/redis) +- [Weaviate](https://www.npmjs.com/package/@langchain/weaviate) +- [Yandex](https://www.npmjs.com/package/@langchain/yandex) diff --git a/docs/core_docs/sidebars.js b/docs/core_docs/sidebars.js index 01c76b7ebde3..c21b26ea5201 100644 --- a/docs/core_docs/sidebars.js +++ b/docs/core_docs/sidebars.js @@ -200,12 +200,11 @@ module.exports = { { type: "category", label: "Providers", - collapsed: true, + collapsed: false, items: [{ type: "autogenerated", dirName: "integrations/platforms" }], link: { - type: "generated-index", - description: "LangChain.js integration providers.", - slug: "integrations/platforms", + type: "doc", + id: "integrations/platforms/index", }, }, { From d3339ae12c69444563c891d098f3130b6434f534 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 22 Apr 2024 15:55:08 -0700 Subject: [PATCH 11/18] cohere[patch]: Make CohereRerank extend BaseDocumentCompressor (#5177) * Make Cohere rerank extend BaseDocumentCompressor * Bump dep --- libs/langchain-cohere/package.json | 2 +- libs/langchain-cohere/src/rerank.ts | 4 +++- yarn.lock | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/libs/langchain-cohere/package.json b/libs/langchain-cohere/package.json index e306e02224e0..eff56785e32b 100644 --- a/libs/langchain-cohere/package.json +++ b/libs/langchain-cohere/package.json @@ -38,7 +38,7 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@langchain/core": "~0.1", + "@langchain/core": "~0.1.58", "cohere-ai": "^7.9.3" }, "devDependencies": { diff --git a/libs/langchain-cohere/src/rerank.ts b/libs/langchain-cohere/src/rerank.ts index 4fbb86a3b8bd..2b29f19dbfb4 100644 --- a/libs/langchain-cohere/src/rerank.ts +++ b/libs/langchain-cohere/src/rerank.ts @@ -1,4 +1,5 @@ import { DocumentInterface } from "@langchain/core/documents"; +import { BaseDocumentCompressor } from "@langchain/core/retrievers/document_compressors"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { CohereClient } from "cohere-ai"; @@ -27,7 +28,7 @@ export interface CohereRerankArgs { /** * Document compressor that uses `Cohere Rerank API`. */ -export class CohereRerank { +export class CohereRerank extends BaseDocumentCompressor { model = "rerank-english-v2.0"; topN = 3; @@ -37,6 +38,7 @@ export class CohereRerank { maxChunksPerDoc: number | undefined; constructor(fields?: CohereRerankArgs) { + super(); const token = fields?.apiKey ?? getEnvironmentVariable("COHERE_API_KEY"); if (!token) { throw new Error("No API key provided for CohereRerank."); diff --git a/yarn.lock b/yarn.lock index dfa1792aa94d..67b2d17cfde0 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8895,7 +8895,7 @@ __metadata: resolution: "@langchain/cohere@workspace:libs/langchain-cohere" dependencies: "@jest/globals": ^29.5.0 - "@langchain/core": ~0.1 + "@langchain/core": ~0.1.58 "@langchain/scripts": ~0.0 "@swc/core": ^1.3.90 "@swc/jest": ^0.2.29 @@ -9346,7 +9346,7 @@ __metadata: languageName: unknown linkType: soft -"@langchain/core@workspace:*, @langchain/core@workspace:langchain-core, @langchain/core@~0.1, @langchain/core@~0.1.5, @langchain/core@~0.1.56, @langchain/core@~0.1.9": +"@langchain/core@workspace:*, @langchain/core@workspace:langchain-core, @langchain/core@~0.1, @langchain/core@~0.1.5, @langchain/core@~0.1.56, @langchain/core@~0.1.58, @langchain/core@~0.1.9": version: 0.0.0-use.local resolution: "@langchain/core@workspace:langchain-core" dependencies: From c35fd25628a483611c8425241aba462578436fb8 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 22 Apr 2024 15:58:25 -0700 Subject: [PATCH 12/18] cohere[patch]: Release 0.0.8 (#5178) --- libs/langchain-cohere/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain-cohere/package.json b/libs/langchain-cohere/package.json index eff56785e32b..31f156982f1e 100644 --- a/libs/langchain-cohere/package.json +++ b/libs/langchain-cohere/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/cohere", - "version": "0.0.7", + "version": "0.0.8", "description": "Cohere integration for LangChain.js", "type": "module", "engines": { From 6407078ce6bfa4989c45d7f1c96df9ee8baea5e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Fabja=C5=84czuk?= Date: Tue, 23 Apr 2024 01:26:18 +0200 Subject: [PATCH 13/18] langchain[patch]: Add possibility to rerank retrieved docs in ParentDocumentRetriever and MultiQueryRetriever (#4738) * feat: add Document Compressor to chain to allow rerank * feat: add example * fix: typo * feat: rerank child documents instead of parents one * feat: improve example * feat: add compressor to Multi Query Retreiver * feat: remove example * feat: remove example * fix: missing docs * feat: remove default value of threshold score to adjust to new Cohere models better * feat: make filtering optional and configurable * docs: add examples * fix: type checking so it allow for build * Fix lint --------- Co-authored-by: jacoblee93 --- .../retrievers/parent-document-retriever.mdx | 10 ++ .../parent_document_retriever_rerank.ts | 93 +++++++++++++++++++ langchain/src/retrievers/multi_query.ts | 42 +++++++-- langchain/src/retrievers/parent_document.ts | 22 ++++- 4 files changed, 157 insertions(+), 10 deletions(-) create mode 100644 examples/src/retrievers/parent_document_retriever_rerank.ts diff --git a/docs/core_docs/docs/modules/data_connection/retrievers/parent-document-retriever.mdx b/docs/core_docs/docs/modules/data_connection/retrievers/parent-document-retriever.mdx index 6233ce57896f..a8855a2edbaa 100644 --- a/docs/core_docs/docs/modules/data_connection/retrievers/parent-document-retriever.mdx +++ b/docs/core_docs/docs/modules/data_connection/retrievers/parent-document-retriever.mdx @@ -6,6 +6,7 @@ import CodeBlock from "@theme/CodeBlock"; import Example from "@examples/retrievers/parent_document_retriever.ts"; import ExampleWithScoreThreshold from "@examples/retrievers/parent_document_retriever_score_threshold.ts"; import ExampleWithChunkHeader from "@examples/retrievers/parent_document_retriever_chunk_header.ts"; +import ExampleWithRerank from "@examples/retrievers/parent_document_retriever_rerank.ts"; # Parent Document Retriever @@ -50,3 +51,12 @@ Tagging each document with metadata is a solution if you know what to filter aga This is particularly important if you have several fine-grained child chunks that need to be correctly retrieved from the vector store. {ExampleWithChunkHeader} + +## With Reranking + +With many documents from the vector store that are passed to LLM, final answers sometimes consist of information from +irrelevant chunks, making it less precise and sometimes incorrect. Also, passing multiple irrelevant documents makes it +more expensive. +So there are two reasons to use rerank - precision and costs. + +{ExampleWithRerank} diff --git a/examples/src/retrievers/parent_document_retriever_rerank.ts b/examples/src/retrievers/parent_document_retriever_rerank.ts new file mode 100644 index 000000000000..bce20352a4f4 --- /dev/null +++ b/examples/src/retrievers/parent_document_retriever_rerank.ts @@ -0,0 +1,93 @@ +import { OpenAIEmbeddings } from "@langchain/openai"; +import { CohereRerank } from "@langchain/cohere"; +import { HNSWLib } from "@langchain/community/vectorstores/hnswlib"; +import { InMemoryStore } from "langchain/storage/in_memory"; +import { + ParentDocumentRetriever, + type SubDocs, +} from "langchain/retrievers/parent_document"; +import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; + +// init Cohere Rerank. Remember to add COHERE_API_KEY to your .env +const reranker = new CohereRerank({ + topN: 50, + model: "rerank-multilingual-v2.0", +}); + +export function documentCompressorFiltering({ + relevanceScore, +}: { relevanceScore?: number } = {}) { + return (docs: SubDocs) => { + let outputDocs = docs; + + if (relevanceScore) { + const docsRelevanceScoreValues = docs.map( + (doc) => doc?.metadata?.relevanceScore + ); + outputDocs = docs.filter( + (_doc, index) => + (docsRelevanceScoreValues?.[index] || 1) >= relevanceScore + ); + } + + return outputDocs; + }; +} + +const splitter = new RecursiveCharacterTextSplitter({ + chunkSize: 500, + chunkOverlap: 0, +}); + +const jimDocs = await splitter.createDocuments([`Jim favorite color is blue.`]); + +const pamDocs = await splitter.createDocuments([`Pam favorite color is red.`]); + +const vectorstore = await HNSWLib.fromDocuments([], new OpenAIEmbeddings()); +const docstore = new InMemoryStore(); + +const retriever = new ParentDocumentRetriever({ + vectorstore, + docstore, + // Very small chunks for demo purposes. + // Use a bigger chunk size for serious use-cases. + childSplitter: new RecursiveCharacterTextSplitter({ + chunkSize: 10, + chunkOverlap: 0, + }), + childK: 50, + parentK: 5, + // We add Reranker + documentCompressor: reranker, + documentCompressorFilteringFn: documentCompressorFiltering({ + relevanceScore: 0.3, + }), +}); + +const docs = jimDocs.concat(pamDocs); +await retriever.addDocuments(docs); + +// This will search for documents in vector store and return for LLM already reranked and sorted document +// with appropriate minimum relevance score +const retrievedDocs = await retriever.getRelevantDocuments( + "What is Pam's favorite color?" +); + +// Pam's favorite color is returned first! +console.log(JSON.stringify(retrievedDocs, null, 2)); +/* + [ + { + "pageContent": "My favorite color is red.", + "metadata": { + "relevanceScore": 0.9 + "loc": { + "lines": { + "from": 1, + "to": 1 + } + } + } + } + ] +*/ diff --git a/langchain/src/retrievers/multi_query.ts b/langchain/src/retrievers/multi_query.ts index 085ac2ef82b6..14d38f628de7 100644 --- a/langchain/src/retrievers/multi_query.ts +++ b/langchain/src/retrievers/multi_query.ts @@ -9,11 +9,15 @@ import { BaseOutputParser } from "@langchain/core/output_parsers"; import { PromptTemplate, BasePromptTemplate } from "@langchain/core/prompts"; import { CallbackManagerForRetrieverRun } from "@langchain/core/callbacks/manager"; import { LLMChain } from "../chains/llm_chain.js"; +import type { BaseDocumentCompressor } from "./document_compressors/index.js"; interface LineList { lines: string[]; } +// eslint-disable-next-line @typescript-eslint/no-explicit-any +export type MultiDocs = Document>[]; + class LineListOutputParser extends BaseOutputParser { static lc_name() { return "LineListOutputParser"; @@ -66,6 +70,8 @@ export interface MultiQueryRetrieverInput extends BaseRetrieverInput { llmChain: LLMChain; queryCount?: number; parserKey?: string; + documentCompressor?: BaseDocumentCompressor | undefined; + documentCompressorFilteringFn?: (docs: MultiDocs) => MultiDocs; } /** @@ -96,12 +102,18 @@ export class MultiQueryRetriever extends BaseRetriever { private parserKey = "lines"; + documentCompressor: BaseDocumentCompressor | undefined; + + documentCompressorFilteringFn?: MultiQueryRetrieverInput["documentCompressorFilteringFn"]; + constructor(fields: MultiQueryRetrieverInput) { super(fields); this.retriever = fields.retriever; this.llmChain = fields.llmChain; this.queryCount = fields.queryCount ?? this.queryCount; this.parserKey = fields.parserKey ?? this.parserKey; + this.documentCompressor = fields.documentCompressor; + this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn; } static fromLLM( @@ -145,13 +157,15 @@ export class MultiQueryRetriever extends BaseRetriever { runManager?: CallbackManagerForRetrieverRun ): Promise { const documents: Document[] = []; - for (const query of queries) { - const docs = await this.retriever.getRelevantDocuments( - query, - runManager?.getChild() - ); - documents.push(...docs); - } + await Promise.all( + queries.map(async (query) => { + const docs = await this.retriever.getRelevantDocuments( + query, + runManager?.getChild() + ); + documents.push(...docs); + }) + ); return documents; } @@ -177,6 +191,18 @@ export class MultiQueryRetriever extends BaseRetriever { const queries = await this._generateQueries(question, runManager); const documents = await this._retrieveDocuments(queries, runManager); const uniqueDocuments = this._uniqueUnion(documents); - return uniqueDocuments; + + let outputDocs = uniqueDocuments; + if (this.documentCompressor && uniqueDocuments.length) { + outputDocs = await this.documentCompressor.compressDocuments( + uniqueDocuments, + question + ); + if (this.documentCompressorFilteringFn) { + outputDocs = this.documentCompressorFilteringFn(outputDocs); + } + } + + return outputDocs; } } diff --git a/langchain/src/retrievers/parent_document.ts b/langchain/src/retrievers/parent_document.ts index eb05475bfc04..ca06d4b760ea 100644 --- a/langchain/src/retrievers/parent_document.ts +++ b/langchain/src/retrievers/parent_document.ts @@ -5,6 +5,7 @@ import { type VectorStoreRetrieverInterface, } from "@langchain/core/vectorstores"; import { Document } from "@langchain/core/documents"; +import type { BaseDocumentCompressor } from "./document_compressors/index.js"; import { TextSplitter, TextSplitterChunkHeaderOptions, @@ -14,6 +15,9 @@ import { type MultiVectorRetrieverInput, } from "./multi_vector.js"; +// eslint-disable-next-line @typescript-eslint/no-explicit-any +export type SubDocs = Document>[]; + /** * Interface for the fields required to initialize a * ParentDocumentRetriever instance. @@ -26,6 +30,8 @@ export type ParentDocumentRetrieverFields = MultiVectorRetrieverInput & { * the `.similaritySearch` method of the vectorstore. */ childDocumentRetriever?: VectorStoreRetrieverInterface; + documentCompressor?: BaseDocumentCompressor | undefined; + documentCompressorFilteringFn?: (docs: SubDocs) => SubDocs; }; /** @@ -81,6 +87,10 @@ export class ParentDocumentRetriever extends MultiVectorRetriever { | VectorStoreRetrieverInterface | undefined; + documentCompressor: BaseDocumentCompressor | undefined; + + documentCompressorFilteringFn?: ParentDocumentRetrieverFields["documentCompressorFilteringFn"]; + constructor(fields: ParentDocumentRetrieverFields) { super(fields); this.vectorstore = fields.vectorstore; @@ -90,17 +100,25 @@ export class ParentDocumentRetriever extends MultiVectorRetriever { this.childK = fields.childK; this.parentK = fields.parentK; this.childDocumentRetriever = fields.childDocumentRetriever; + this.documentCompressor = fields.documentCompressor; + this.documentCompressorFilteringFn = fields.documentCompressorFilteringFn; } async _getRelevantDocuments(query: string): Promise { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - let subDocs: Document>[] = []; + let subDocs: SubDocs = []; if (this.childDocumentRetriever) { subDocs = await this.childDocumentRetriever.getRelevantDocuments(query); } else { subDocs = await this.vectorstore.similaritySearch(query, this.childK); } + if (this.documentCompressor && subDocs.length) { + subDocs = await this.documentCompressor.compressDocuments(subDocs, query); + if (this.documentCompressorFilteringFn) { + subDocs = this.documentCompressorFilteringFn(subDocs); + } + } + // Maintain order const parentDocIds: string[] = []; for (const doc of subDocs) { From e5b03e5ff13f87bdec7dbb2601a212b2fd13f230 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Mon, 22 Apr 2024 16:31:11 -0700 Subject: [PATCH 14/18] Release 0.1.35 --- langchain/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/package.json b/langchain/package.json index 79770e815452..d3afe39b6231 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -1,6 +1,6 @@ { "name": "langchain", - "version": "0.1.34", + "version": "0.1.35", "description": "Typescript bindings for langchain", "type": "module", "engines": { From 8b7865952655a7b730f799421249cab48cc16f08 Mon Sep 17 00:00:00 2001 From: Mauricio Cirelli Date: Mon, 22 Apr 2024 20:50:04 -0300 Subject: [PATCH 15/18] core[patch]: Passing the input object to the Retry Attempt Handler. (#5081) * Passing the input object to the Retry Attempt Handler. * Adds test * Lint * Passing just the first input that caused exception on batch calls. * Fixing test case and formatting. * Fixing lint issue. --------- Co-authored-by: Mauricio Cirelli Co-authored-by: jacoblee93 --- langchain-core/src/runnables/base.ts | 20 ++++++-- .../runnables/tests/runnable_retry.test.ts | 46 +++++++++++++++++++ 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/langchain-core/src/runnables/base.ts b/langchain-core/src/runnables/base.ts index 103c4fc2eb82..181831391365 100644 --- a/langchain-core/src/runnables/base.ts +++ b/langchain-core/src/runnables/base.ts @@ -61,8 +61,13 @@ export type RunnableLike = | RunnableFunc | RunnableMapLike; -// eslint-disable-next-line @typescript-eslint/no-explicit-any -export type RunnableRetryFailedAttemptHandler = (error: any) => any; +export type RunnableRetryFailedAttemptHandler = ( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + error: any, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + input: any + // eslint-disable-next-line @typescript-eslint/no-explicit-any +) => any; // eslint-disable-next-line @typescript-eslint/no-explicit-any export function _coerceToDict(value: any, defaultKey: string) { @@ -1268,7 +1273,7 @@ export class RunnableRetry< protected maxAttemptNumber = 3; // eslint-disable-next-line @typescript-eslint/no-explicit-any - onFailedAttempt?: RunnableRetryFailedAttemptHandler = () => {}; + onFailedAttempt: RunnableRetryFailedAttemptHandler = () => {}; constructor( fields: RunnableBindingArgs & { @@ -1303,7 +1308,8 @@ export class RunnableRetry< this._patchConfigForRetry(attemptNumber, config, runManager) ), { - onFailedAttempt: this.onFailedAttempt, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + onFailedAttempt: (error: any) => this.onFailedAttempt(error, input), retries: Math.max(this.maxAttemptNumber - 1, 0), randomize: true, } @@ -1362,6 +1368,8 @@ export class RunnableRetry< if (result instanceof Error) { if (firstException === undefined) { firstException = result; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (firstException as any).input = remainingInputs[i]; } } resultsMap[resultMapIndex.toString()] = result; @@ -1372,7 +1380,9 @@ export class RunnableRetry< return results; }, { - onFailedAttempt: this.onFailedAttempt, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + onFailedAttempt: (error: any) => + this.onFailedAttempt(error, error.input), retries: Math.max(this.maxAttemptNumber - 1, 0), randomize: true, } diff --git a/langchain-core/src/runnables/tests/runnable_retry.test.ts b/langchain-core/src/runnables/tests/runnable_retry.test.ts index a16f1a7b1cc7..64d0875f56cc 100644 --- a/langchain-core/src/runnables/tests/runnable_retry.test.ts +++ b/langchain-core/src/runnables/tests/runnable_retry.test.ts @@ -21,6 +21,28 @@ test("RunnableRetry invoke", async () => { expect(result).toEqual(3); }); +test("RunnableRetry invoke with a failed attempt handler", async () => { + let attemptCount = 0; + const runnable = new RunnableLambda({ + func: (_thing: unknown) => { + attemptCount += 1; + if (attemptCount < 3) { + throw new Error("TEST ERROR"); + } else { + return attemptCount; + } + }, + }); + const runnableRetry = runnable.withRetry({ + onFailedAttempt: (error, input) => { + expect(error.message).toBe("TEST ERROR"); + expect(input).toBe("test"); + }, + }); + const result = await runnableRetry.invoke("test"); + expect(result).toEqual(3); +}); + test("RunnableRetry batch with thrown errors", async () => { const runnable = new RunnableLambda({ func: (_thing: unknown) => { @@ -79,3 +101,27 @@ test("RunnableRetry batch should not retry successful requests", async () => { expect(attemptCount).toEqual(5); expect(result.sort()).toEqual([3, 4, 5]); }); + +test("RunnableRetry batch with an onFailedAttempt handler", async () => { + let attemptCount = 0; + const runnable = new RunnableLambda({ + func: (_thing: unknown) => { + attemptCount += 1; + if (attemptCount < 3) { + throw new Error("TEST ERROR"); + } else { + return attemptCount; + } + }, + }); + const runnableRetry = runnable.withRetry({ + stopAfterAttempt: 2, + onFailedAttempt: (error, input) => { + expect(error.message).toEqual("TEST ERROR"); + expect(input).toEqual("test1"); + }, + }); + const result = await runnableRetry.batch(["test1", "test2", "test3"]); + expect(attemptCount).toEqual(5); + expect(result.sort()).toEqual([3, 4, 5]); +}); From dd46dc9db7ed778c8f9eb5878bbc9fadb7edc286 Mon Sep 17 00:00:00 2001 From: Huaichen Date: Mon, 22 Apr 2024 19:58:04 -0400 Subject: [PATCH 16/18] core[patch]: Fix "Bad control character in string literal" (#5160) * Fixed "Bad control character in string literal" The error "Bad control character in string literal" occurs when calling JSON.parse(json). This typically happens because the JSON string contains control characters that are not allowed, such as newline characters. Any control characters in the JSON string should be escaped before parsing it using JSON.parse(). * Added a test for PR#5135 Added a test for PR# #5135. Added a few newlines to the JSON text, which will result in an "Bad control character in string literal" error before the fix. * Format --------- Co-authored-by: jacoblee93 --- .../src/output_parsers/structured.ts | 10 +++++++++- .../output_parsers/tests/structured.test.ts | 20 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/langchain-core/src/output_parsers/structured.ts b/langchain-core/src/output_parsers/structured.ts index de04cbef0895..e9ebbc69dc74 100644 --- a/langchain-core/src/output_parsers/structured.ts +++ b/langchain-core/src/output_parsers/structured.ts @@ -104,7 +104,15 @@ ${JSON.stringify(zodToJsonSchema(this.schema))} const json = text.includes("```") ? text.trim().split(/```(?:json)?/)[1] : text.trim(); - return await this.schema.parseAsync(JSON.parse(json)); + + const escapedJson = json + .replace(/"([^"\\]*(\\.[^"\\]*)*)"/g, (_match, capturedGroup) => { + const escapedInsideQuotes = capturedGroup.replace(/\n/g, "\\n"); + return `"${escapedInsideQuotes}"`; + }) + .replace(/\n/g, ""); + + return await this.schema.parseAsync(JSON.parse(escapedJson)); } catch (e) { throw new OutputParserException( `Failed to parse. Text: "${text}". Error: ${e}`, diff --git a/langchain-core/src/output_parsers/tests/structured.test.ts b/langchain-core/src/output_parsers/tests/structured.test.ts index f0c9f68fce76..433431cd44b5 100644 --- a/langchain-core/src/output_parsers/tests/structured.test.ts +++ b/langchain-core/src/output_parsers/tests/structured.test.ts @@ -198,3 +198,23 @@ Here is the JSON Schema instance your output must adhere to. Include the enclosi " `); }); + +test("StructuredOutputParser.fromZodSchema parsing newlines", async () => { + const parser = StructuredOutputParser.fromZodSchema( + z + .object({ + url: z.string().describe("A link to the resource"), + summary: z.string().describe("A summary"), + }) + .describe("Only One object") + ); + + expect( + await parser.parse( + '```\n{"url": "value", "summary": "line1,\nline2,\nline3"}```' + ) + ).toEqual({ + url: "value", + summary: "line1,\nline2,\nline3", + }); +}); From 76193ec0db1cebd55ea49456b373bae7c83838eb Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 22 Apr 2024 17:11:07 -0700 Subject: [PATCH 17/18] core[patch]: 0.1.59 (#5181) --- langchain-core/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain-core/package.json b/langchain-core/package.json index 25e8a4b7436b..8f1a0019f368 100644 --- a/langchain-core/package.json +++ b/langchain-core/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/core", - "version": "0.1.58", + "version": "0.1.59", "description": "Core LangChain.js abstractions and schemas", "type": "module", "engines": { From b9d86b16bd0788144f8f27cf5d18960c43da49af Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 22 Apr 2024 23:25:50 -0700 Subject: [PATCH 18/18] Init text splitter package (#5183) --- langchain/package.json | 3 +- langchain/src/text_splitter.ts | 804 +----------------- libs/langchain-textsplitters/.eslintrc.cjs | 66 ++ libs/langchain-textsplitters/.gitignore | 7 + libs/langchain-textsplitters/.prettierrc | 19 + libs/langchain-textsplitters/.release-it.json | 10 + libs/langchain-textsplitters/LICENSE | 21 + libs/langchain-textsplitters/README.md | 53 ++ libs/langchain-textsplitters/jest.config.cjs | 21 + libs/langchain-textsplitters/jest.env.cjs | 12 + .../langchain.config.js | 22 + libs/langchain-textsplitters/package.json | 90 ++ .../scripts/jest-setup-after-env.js | 3 + libs/langchain-textsplitters/src/index.ts | 1 + .../src/tests/code_text_splitter.test.ts | 318 +++++++ .../src/tests/text_splitter.test.ts | 514 +++++++++++ .../src/text_splitter.ts | 803 +++++++++++++++++ .../langchain-textsplitters/tsconfig.cjs.json | 8 + libs/langchain-textsplitters/tsconfig.json | 23 + yarn.lock | 41 + 20 files changed, 2035 insertions(+), 804 deletions(-) create mode 100644 libs/langchain-textsplitters/.eslintrc.cjs create mode 100644 libs/langchain-textsplitters/.gitignore create mode 100644 libs/langchain-textsplitters/.prettierrc create mode 100644 libs/langchain-textsplitters/.release-it.json create mode 100644 libs/langchain-textsplitters/LICENSE create mode 100644 libs/langchain-textsplitters/README.md create mode 100644 libs/langchain-textsplitters/jest.config.cjs create mode 100644 libs/langchain-textsplitters/jest.env.cjs create mode 100644 libs/langchain-textsplitters/langchain.config.js create mode 100644 libs/langchain-textsplitters/package.json create mode 100644 libs/langchain-textsplitters/scripts/jest-setup-after-env.js create mode 100644 libs/langchain-textsplitters/src/index.ts create mode 100644 libs/langchain-textsplitters/src/tests/code_text_splitter.test.ts create mode 100644 libs/langchain-textsplitters/src/tests/text_splitter.test.ts create mode 100644 libs/langchain-textsplitters/src/text_splitter.ts create mode 100644 libs/langchain-textsplitters/tsconfig.cjs.json create mode 100644 libs/langchain-textsplitters/tsconfig.json diff --git a/langchain/package.json b/langchain/package.json index d3afe39b6231..6f4928e9e166 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -1190,7 +1190,7 @@ "homepage": "https://github.com/langchain-ai/langchainjs/tree/main/langchain/", "scripts": { "build": "yarn run build:deps && yarn clean && yarn build:esm && yarn build:cjs && yarn build:scripts", - "build:deps": "yarn run turbo:command build --filter=@langchain/openai --filter=@langchain/community --concurrency=1", + "build:deps": "yarn run turbo:command build --filter=@langchain/openai --filter=@langchain/community --filter=@langchain/textsplitters --concurrency=1", "build:esm": "NODE_OPTIONS=--max-old-space-size=4096 tsc --outDir dist/ && rimraf dist/tests dist/**/tests", "build:cjs": "NODE_OPTIONS=--max-old-space-size=4096 tsc --outDir dist-cjs/ -p tsconfig.cjs.json && yarn move-cjs-to-dist && rimraf dist-cjs", "build:watch": "yarn create-entrypoints && tsc --outDir dist/ --watch", @@ -1515,6 +1515,7 @@ "@langchain/community": "~0.0.47", "@langchain/core": "~0.1.56", "@langchain/openai": "~0.0.28", + "@langchain/textsplitters": "~0.0.0", "binary-extensions": "^2.2.0", "js-tiktoken": "^1.0.7", "js-yaml": "^4.1.0", diff --git a/langchain/src/text_splitter.ts b/langchain/src/text_splitter.ts index 095ea3e796ca..ab5479bce0ee 100644 --- a/langchain/src/text_splitter.ts +++ b/langchain/src/text_splitter.ts @@ -1,803 +1 @@ -import type * as tiktoken from "js-tiktoken"; -import { Document, BaseDocumentTransformer } from "@langchain/core/documents"; -import { getEncoding } from "@langchain/core/utils/tiktoken"; - -export interface TextSplitterParams { - chunkSize: number; - chunkOverlap: number; - keepSeparator: boolean; - lengthFunction?: - | ((text: string) => number) - | ((text: string) => Promise); -} - -export type TextSplitterChunkHeaderOptions = { - chunkHeader?: string; - chunkOverlapHeader?: string; - appendChunkOverlapHeader?: boolean; -}; - -export abstract class TextSplitter - extends BaseDocumentTransformer - implements TextSplitterParams -{ - lc_namespace = ["langchain", "document_transformers", "text_splitters"]; - - chunkSize = 1000; - - chunkOverlap = 200; - - keepSeparator = false; - - lengthFunction: - | ((text: string) => number) - | ((text: string) => Promise); - - constructor(fields?: Partial) { - super(fields); - this.chunkSize = fields?.chunkSize ?? this.chunkSize; - this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap; - this.keepSeparator = fields?.keepSeparator ?? this.keepSeparator; - this.lengthFunction = - fields?.lengthFunction ?? ((text: string) => text.length); - if (this.chunkOverlap >= this.chunkSize) { - throw new Error("Cannot have chunkOverlap >= chunkSize"); - } - } - - async transformDocuments( - documents: Document[], - chunkHeaderOptions: TextSplitterChunkHeaderOptions = {} - ): Promise { - return this.splitDocuments(documents, chunkHeaderOptions); - } - - abstract splitText(text: string): Promise; - - protected splitOnSeparator(text: string, separator: string): string[] { - let splits; - if (separator) { - if (this.keepSeparator) { - const regexEscapedSeparator = separator.replace( - /[/\-\\^$*+?.()|[\]{}]/g, - "\\$&" - ); - splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`)); - } else { - splits = text.split(separator); - } - } else { - splits = text.split(""); - } - return splits.filter((s) => s !== ""); - } - - async createDocuments( - texts: string[], - // eslint-disable-next-line @typescript-eslint/no-explicit-any - metadatas: Record[] = [], - chunkHeaderOptions: TextSplitterChunkHeaderOptions = {} - ): Promise { - // if no metadata is provided, we create an empty one for each text - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const _metadatas: Record[] = - metadatas.length > 0 - ? metadatas - : [...Array(texts.length)].map(() => ({})); - const { - chunkHeader = "", - chunkOverlapHeader = "(cont'd) ", - appendChunkOverlapHeader = false, - } = chunkHeaderOptions; - const documents = new Array(); - for (let i = 0; i < texts.length; i += 1) { - const text = texts[i]; - let lineCounterIndex = 1; - let prevChunk = null; - let indexPrevChunk = -1; - for (const chunk of await this.splitText(text)) { - let pageContent = chunkHeader; - - // we need to count the \n that are in the text before getting removed by the splitting - const indexChunk = text.indexOf(chunk, indexPrevChunk + 1); - if (prevChunk === null) { - const newLinesBeforeFirstChunk = this.numberOfNewLines( - text, - 0, - indexChunk - ); - lineCounterIndex += newLinesBeforeFirstChunk; - } else { - const indexEndPrevChunk = - indexPrevChunk + (await this.lengthFunction(prevChunk)); - if (indexEndPrevChunk < indexChunk) { - const numberOfIntermediateNewLines = this.numberOfNewLines( - text, - indexEndPrevChunk, - indexChunk - ); - lineCounterIndex += numberOfIntermediateNewLines; - } else if (indexEndPrevChunk > indexChunk) { - const numberOfIntermediateNewLines = this.numberOfNewLines( - text, - indexChunk, - indexEndPrevChunk - ); - lineCounterIndex -= numberOfIntermediateNewLines; - } - if (appendChunkOverlapHeader) { - pageContent += chunkOverlapHeader; - } - } - const newLinesCount = this.numberOfNewLines(chunk); - - const loc = - _metadatas[i].loc && typeof _metadatas[i].loc === "object" - ? { ..._metadatas[i].loc } - : {}; - loc.lines = { - from: lineCounterIndex, - to: lineCounterIndex + newLinesCount, - }; - const metadataWithLinesNumber = { - ..._metadatas[i], - loc, - }; - - pageContent += chunk; - documents.push( - new Document({ - pageContent, - metadata: metadataWithLinesNumber, - }) - ); - lineCounterIndex += newLinesCount; - prevChunk = chunk; - indexPrevChunk = indexChunk; - } - } - return documents; - } - - private numberOfNewLines(text: string, start?: number, end?: number) { - const textSection = text.slice(start, end); - return (textSection.match(/\n/g) || []).length; - } - - async splitDocuments( - documents: Document[], - chunkHeaderOptions: TextSplitterChunkHeaderOptions = {} - ): Promise { - const selectedDocuments = documents.filter( - (doc) => doc.pageContent !== undefined - ); - const texts = selectedDocuments.map((doc) => doc.pageContent); - const metadatas = selectedDocuments.map((doc) => doc.metadata); - return this.createDocuments(texts, metadatas, chunkHeaderOptions); - } - - private joinDocs(docs: string[], separator: string): string | null { - const text = docs.join(separator).trim(); - return text === "" ? null : text; - } - - async mergeSplits(splits: string[], separator: string): Promise { - const docs: string[] = []; - const currentDoc: string[] = []; - let total = 0; - for (const d of splits) { - const _len = await this.lengthFunction(d); - if ( - total + _len + currentDoc.length * separator.length > - this.chunkSize - ) { - if (total > this.chunkSize) { - console.warn( - `Created a chunk of size ${total}, + -which is longer than the specified ${this.chunkSize}` - ); - } - if (currentDoc.length > 0) { - const doc = this.joinDocs(currentDoc, separator); - if (doc !== null) { - docs.push(doc); - } - // Keep on popping if: - // - we have a larger chunk than in the chunk overlap - // - or if we still have any chunks and the length is long - while ( - total > this.chunkOverlap || - (total + _len + currentDoc.length * separator.length > - this.chunkSize && - total > 0) - ) { - total -= await this.lengthFunction(currentDoc[0]); - currentDoc.shift(); - } - } - } - currentDoc.push(d); - total += _len; - } - const doc = this.joinDocs(currentDoc, separator); - if (doc !== null) { - docs.push(doc); - } - return docs; - } -} - -export interface CharacterTextSplitterParams extends TextSplitterParams { - separator: string; -} - -export class CharacterTextSplitter - extends TextSplitter - implements CharacterTextSplitterParams -{ - static lc_name() { - return "CharacterTextSplitter"; - } - - separator = "\n\n"; - - constructor(fields?: Partial) { - super(fields); - this.separator = fields?.separator ?? this.separator; - } - - async splitText(text: string): Promise { - // First we naively split the large input into a bunch of smaller ones. - const splits = this.splitOnSeparator(text, this.separator); - return this.mergeSplits(splits, this.keepSeparator ? "" : this.separator); - } -} - -export interface RecursiveCharacterTextSplitterParams - extends TextSplitterParams { - separators: string[]; -} - -export const SupportedTextSplitterLanguages = [ - "cpp", - "go", - "java", - "js", - "php", - "proto", - "python", - "rst", - "ruby", - "rust", - "scala", - "swift", - "markdown", - "latex", - "html", - "sol", -] as const; - -export type SupportedTextSplitterLanguage = - (typeof SupportedTextSplitterLanguages)[number]; - -export class RecursiveCharacterTextSplitter - extends TextSplitter - implements RecursiveCharacterTextSplitterParams -{ - static lc_name() { - return "RecursiveCharacterTextSplitter"; - } - - separators: string[] = ["\n\n", "\n", " ", ""]; - - constructor(fields?: Partial) { - super(fields); - this.separators = fields?.separators ?? this.separators; - this.keepSeparator = fields?.keepSeparator ?? true; - } - - private async _splitText(text: string, separators: string[]) { - const finalChunks: string[] = []; - - // Get appropriate separator to use - let separator: string = separators[separators.length - 1]; - let newSeparators; - for (let i = 0; i < separators.length; i += 1) { - const s = separators[i]; - if (s === "") { - separator = s; - break; - } - if (text.includes(s)) { - separator = s; - newSeparators = separators.slice(i + 1); - break; - } - } - - // Now that we have the separator, split the text - const splits = this.splitOnSeparator(text, separator); - - // Now go merging things, recursively splitting longer texts. - let goodSplits: string[] = []; - const _separator = this.keepSeparator ? "" : separator; - for (const s of splits) { - if ((await this.lengthFunction(s)) < this.chunkSize) { - goodSplits.push(s); - } else { - if (goodSplits.length) { - const mergedText = await this.mergeSplits(goodSplits, _separator); - finalChunks.push(...mergedText); - goodSplits = []; - } - if (!newSeparators) { - finalChunks.push(s); - } else { - const otherInfo = await this._splitText(s, newSeparators); - finalChunks.push(...otherInfo); - } - } - } - if (goodSplits.length) { - const mergedText = await this.mergeSplits(goodSplits, _separator); - finalChunks.push(...mergedText); - } - return finalChunks; - } - - async splitText(text: string): Promise { - return this._splitText(text, this.separators); - } - - static fromLanguage( - language: SupportedTextSplitterLanguage, - options?: Partial - ) { - return new RecursiveCharacterTextSplitter({ - ...options, - separators: - RecursiveCharacterTextSplitter.getSeparatorsForLanguage(language), - }); - } - - static getSeparatorsForLanguage(language: SupportedTextSplitterLanguage) { - if (language === "cpp") { - return [ - // Split along class definitions - "\nclass ", - // Split along function definitions - "\nvoid ", - "\nint ", - "\nfloat ", - "\ndouble ", - // Split along control flow statements - "\nif ", - "\nfor ", - "\nwhile ", - "\nswitch ", - "\ncase ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "go") { - return [ - // Split along function definitions - "\nfunc ", - "\nvar ", - "\nconst ", - "\ntype ", - // Split along control flow statements - "\nif ", - "\nfor ", - "\nswitch ", - "\ncase ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "java") { - return [ - // Split along class definitions - "\nclass ", - // Split along method definitions - "\npublic ", - "\nprotected ", - "\nprivate ", - "\nstatic ", - // Split along control flow statements - "\nif ", - "\nfor ", - "\nwhile ", - "\nswitch ", - "\ncase ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "js") { - return [ - // Split along function definitions - "\nfunction ", - "\nconst ", - "\nlet ", - "\nvar ", - "\nclass ", - // Split along control flow statements - "\nif ", - "\nfor ", - "\nwhile ", - "\nswitch ", - "\ncase ", - "\ndefault ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "php") { - return [ - // Split along function definitions - "\nfunction ", - // Split along class definitions - "\nclass ", - // Split along control flow statements - "\nif ", - "\nforeach ", - "\nwhile ", - "\ndo ", - "\nswitch ", - "\ncase ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "proto") { - return [ - // Split along message definitions - "\nmessage ", - // Split along service definitions - "\nservice ", - // Split along enum definitions - "\nenum ", - // Split along option definitions - "\noption ", - // Split along import statements - "\nimport ", - // Split along syntax declarations - "\nsyntax ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "python") { - return [ - // First, try to split along class definitions - "\nclass ", - "\ndef ", - "\n\tdef ", - // Now split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "rst") { - return [ - // Split along section titles - "\n===\n", - "\n---\n", - "\n***\n", - // Split along directive markers - "\n.. ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "ruby") { - return [ - // Split along method definitions - "\ndef ", - "\nclass ", - // Split along control flow statements - "\nif ", - "\nunless ", - "\nwhile ", - "\nfor ", - "\ndo ", - "\nbegin ", - "\nrescue ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "rust") { - return [ - // Split along function definitions - "\nfn ", - "\nconst ", - "\nlet ", - // Split along control flow statements - "\nif ", - "\nwhile ", - "\nfor ", - "\nloop ", - "\nmatch ", - "\nconst ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "scala") { - return [ - // Split along class definitions - "\nclass ", - "\nobject ", - // Split along method definitions - "\ndef ", - "\nval ", - "\nvar ", - // Split along control flow statements - "\nif ", - "\nfor ", - "\nwhile ", - "\nmatch ", - "\ncase ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "swift") { - return [ - // Split along function definitions - "\nfunc ", - // Split along class definitions - "\nclass ", - "\nstruct ", - "\nenum ", - // Split along control flow statements - "\nif ", - "\nfor ", - "\nwhile ", - "\ndo ", - "\nswitch ", - "\ncase ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "markdown") { - return [ - // First, try to split along Markdown headings (starting with level 2) - "\n## ", - "\n### ", - "\n#### ", - "\n##### ", - "\n###### ", - // Note the alternative syntax for headings (below) is not handled here - // Heading level 2 - // --------------- - // End of code block - "```\n\n", - // Horizontal lines - "\n\n***\n\n", - "\n\n---\n\n", - "\n\n___\n\n", - // Note that this splitter doesn't handle horizontal lines defined - // by *three or more* of ***, ---, or ___, but this is not handled - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "latex") { - return [ - // First, try to split along Latex sections - "\n\\chapter{", - "\n\\section{", - "\n\\subsection{", - "\n\\subsubsection{", - - // Now split by environments - "\n\\begin{enumerate}", - "\n\\begin{itemize}", - "\n\\begin{description}", - "\n\\begin{list}", - "\n\\begin{quote}", - "\n\\begin{quotation}", - "\n\\begin{verse}", - "\n\\begin{verbatim}", - - // Now split by math environments - "\n\\begin{align}", - "$$", - "$", - - // Now split by the normal type of lines - "\n\n", - "\n", - " ", - "", - ]; - } else if (language === "html") { - return [ - // First, try to split along HTML tags - "", - "
", - "

", - "
", - "

  • ", - "

    ", - "

    ", - "

    ", - "

    ", - "

    ", - "
    ", - "", - "", - "", - "
    ", - "", - "
      ", - "
        ", - "
        ", - "