Merge remote-tracking branch 'upstream/main' into nsc/firecrawl-loader

langchain-ai · Apr 23, 2024 · f9a09ec · f9a09ec
2 parents 4327d2a + b9d86b1
commit f9a09ec
Show file tree

Hide file tree

Showing 47 changed files with 2,501 additions and 908 deletions.
diff --git a/docs/core_docs/.gitignore b/docs/core_docs/.gitignore
@@ -109,10 +109,12 @@ docs/use_cases/extraction/how_to/examples.md
 docs/use_cases/extraction/how_to/examples.mdx
 docs/modules/model_io/output_parsers/custom.md
 docs/modules/model_io/output_parsers/custom.mdx
-docs/modules/model_io/chat/function_calling.md
-docs/modules/model_io/chat/function_calling.mdx
 docs/modules/memory/chat_messages/custom.md
 docs/modules/memory/chat_messages/custom.mdx
+docs/modules/model_io/chat/response_metadata.md
+docs/modules/model_io/chat/response_metadata.mdx
+docs/modules/model_io/chat/function_calling.md
+docs/modules/model_io/chat/function_calling.mdx
 docs/modules/data_connection/vectorstores/custom.md
 docs/modules/data_connection/vectorstores/custom.mdx
 docs/modules/agents/agent_types/tool_calling.md

diff --git a/docs/core_docs/docs/integrations/platforms/index.mdx b/docs/core_docs/docs/integrations/platforms/index.mdx
@@ -0,0 +1,30 @@
+---
+sidebar_position: 0
+sidebar_class_name: hidden
+---
+
+# Providers
+
+LangChain integrates with many providers.
+
+## Partner Packages
+
+These providers have standalone `@langchain/{provider}` packages for improved versioning, dependency management and testing.
+
+- [Anthropic](https://www.npmjs.com/package/@langchain/anthropic)
+- [Azure OpenAI](https://www.npmjs.com/package/@langchain/azure-openai)
+- [Cloudflare](https://www.npmjs.com/package/@langchain/cloudflare)
+- [Cohere](https://www.npmjs.com/package/@langchain/cohere)
+- [Exa](https://www.npmjs.com/package/@langchain/exa)
+- [Google GenAI](https://www.npmjs.com/package/@langchain/google-genai)
+- [Google VertexAI](https://www.npmjs.com/package/@langchain/google-vertexai)
+- [Google VertexAI Web](https://www.npmjs.com/package/@langchain/google-vertexai-web)
+- [Groq](https://www.npmjs.com/package/@langchain/groq)
+- [MistralAI](https://www.npmjs.com/package/@langchain/mistralai)
+- [MongoDB](https://www.npmjs.com/package/@langchain/mongodb)
+- [Nomic](https://www.npmjs.com/package/@langchain/nomic)
+- [OpenAI](https://www.npmjs.com/package/@langchain/openai)
+- [Pinecone](https://www.npmjs.com/package/@langchain/pinecone)
+- [Redis](https://www.npmjs.com/package/@langchain/redis)
+- [Weaviate](https://www.npmjs.com/package/@langchain/weaviate)
+- [Yandex](https://www.npmjs.com/package/@langchain/yandex)
diff --git a/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx b/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx
@@ -6,10 +6,6 @@ sidebar_class_name: node-only
 
 [Qdrant](https://qdrant.tech/) is a vector similarity search engine. It provides a production-ready service with a convenient API to store, search, and manage points - vectors with an additional payload.
 
-:::tip Compatibility
-Only available on Node.js.
-:::
-
 ## Setup
 
 1. Run a Qdrant instance with Docker on your computer by following the [Qdrant setup instructions](https://qdrant.tech/documentation/install/).

diff --git a/...ore_docs/docs/modules/data_connection/experimental/graph_databases/memgraph.mdx b/...ore_docs/docs/modules/data_connection/experimental/graph_databases/memgraph.mdx
@@ -18,7 +18,7 @@ npm install @langchain/openai neo4j-driver @langchain/community
 
 Memgraph bundles the database along with various analytical tools into distinct
 Docker images. If you're new to Memgraph or you're in a developing stage, we
-recommend using the `memgraph-platform` image. Besides the database, it also
+recommend running Memgraph Platform with Docker Compose. Besides the database, it also
 includes all the tools you might need to analyze your data, such as command-line
 interface [mgconsole](https://memgraph.com/docs/getting-started/cli), web
 interface [Memgraph Lab](https://memgraph.com/docs/data-visualization) and a
@@ -28,8 +28,16 @@ complete set of algorithms within a
 With the Docker running in the background, run the following command in the
 console:
 
+Linux/MacOS:
+
 ```bash
-docker run -p 7687:7687 -p 7444:7444 -p 3000:3000 --name memgraph memgraph/memgraph-platform
+curl https://install.memgraph.com | sh
+```
+
+Windows:
+
+```
+iwr https://windows.memgraph.com | iex
 ```
 
 For other options of installation, check the [Getting started guide](https://memgraph.com/docs/getting-started).

diff --git a/...core_docs/docs/modules/data_connection/retrievers/parent-document-retriever.mdx b/...core_docs/docs/modules/data_connection/retrievers/parent-document-retriever.mdx
@@ -6,6 +6,7 @@ import CodeBlock from "@theme/CodeBlock";
 import Example from "@examples/retrievers/parent_document_retriever.ts";
 import ExampleWithScoreThreshold from "@examples/retrievers/parent_document_retriever_score_threshold.ts";
 import ExampleWithChunkHeader from "@examples/retrievers/parent_document_retriever_chunk_header.ts";
+import ExampleWithRerank from "@examples/retrievers/parent_document_retriever_rerank.ts";
 
 # Parent Document Retriever
 
@@ -50,3 +51,12 @@ Tagging each document with metadata is a solution if you know what to filter aga
 This is particularly important if you have several fine-grained child chunks that need to be correctly retrieved from the vector store.
 
 <CodeBlock language="typescript">{ExampleWithChunkHeader}</CodeBlock>
+
+## With Reranking
+
+With many documents from the vector store that are passed to LLM, final answers sometimes consist of information from
+irrelevant chunks, making it less precise and sometimes incorrect. Also, passing multiple irrelevant documents makes it
+more expensive.
+So there are two reasons to use rerank - precision and costs.
+
+<CodeBlock language="typescript">{ExampleWithRerank}</CodeBlock>
diff --git a/docs/core_docs/sidebars.js b/docs/core_docs/sidebars.js
@@ -200,12 +200,11 @@ module.exports = {
     {
       type: "category",
       label: "Providers",
-      collapsed: true,
+      collapsed: false,
       items: [{ type: "autogenerated", dirName: "integrations/platforms" }],
       link: {
-        type: "generated-index",
-        description: "LangChain.js integration providers.",
-        slug: "integrations/platforms",
+        type: "doc",
+        id: "integrations/platforms/index",
       },
     },
     {

diff --git a/examples/package.json b/examples/package.json
@@ -29,7 +29,7 @@
     "@getmetal/metal-sdk": "^4.0.0",
     "@getzep/zep-js": "^0.9.0",
     "@gomomento/sdk": "^1.51.1",
-    "@google/generative-ai": "^0.1.0",
+    "@google/generative-ai": "^0.7.0",
     "@langchain/anthropic": "workspace:*",
     "@langchain/azure-openai": "workspace:*",
     "@langchain/cloudflare": "workspace:*",

diff --git a/examples/src/retrievers/parent_document_retriever_rerank.ts b/examples/src/retrievers/parent_document_retriever_rerank.ts
@@ -0,0 +1,93 @@
+import { OpenAIEmbeddings } from "@langchain/openai";
+import { CohereRerank } from "@langchain/cohere";
+import { HNSWLib } from "@langchain/community/vectorstores/hnswlib";
+import { InMemoryStore } from "langchain/storage/in_memory";
+import {
+  ParentDocumentRetriever,
+  type SubDocs,
+} from "langchain/retrievers/parent_document";
+import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
+
+// init Cohere Rerank. Remember to add COHERE_API_KEY to your .env
+const reranker = new CohereRerank({
+  topN: 50,
+  model: "rerank-multilingual-v2.0",
+});
+
+export function documentCompressorFiltering({
+  relevanceScore,
+}: { relevanceScore?: number } = {}) {
+  return (docs: SubDocs) => {
+    let outputDocs = docs;
+
+    if (relevanceScore) {
+      const docsRelevanceScoreValues = docs.map(
+        (doc) => doc?.metadata?.relevanceScore
+      );
+      outputDocs = docs.filter(
+        (_doc, index) =>
+          (docsRelevanceScoreValues?.[index] || 1) >= relevanceScore
+      );
+    }
+
+    return outputDocs;
+  };
+}
+
+const splitter = new RecursiveCharacterTextSplitter({
+  chunkSize: 500,
+  chunkOverlap: 0,
+});
+
+const jimDocs = await splitter.createDocuments([`Jim favorite color is blue.`]);
+
+const pamDocs = await splitter.createDocuments([`Pam favorite color is red.`]);
+
+const vectorstore = await HNSWLib.fromDocuments([], new OpenAIEmbeddings());
+const docstore = new InMemoryStore();
+
+const retriever = new ParentDocumentRetriever({
+  vectorstore,
+  docstore,
+  // Very small chunks for demo purposes.
+  // Use a bigger chunk size for serious use-cases.
+  childSplitter: new RecursiveCharacterTextSplitter({
+    chunkSize: 10,
+    chunkOverlap: 0,
+  }),
+  childK: 50,
+  parentK: 5,
+  // We add Reranker
+  documentCompressor: reranker,
+  documentCompressorFilteringFn: documentCompressorFiltering({
+    relevanceScore: 0.3,
+  }),
+});
+
+const docs = jimDocs.concat(pamDocs);
+await retriever.addDocuments(docs);
+
+// This will search for documents in vector store and return for LLM already reranked and sorted document
+// with appropriate minimum relevance score
+const retrievedDocs = await retriever.getRelevantDocuments(
+  "What is Pam's favorite color?"
+);
+
+// Pam's favorite color is returned first!
+console.log(JSON.stringify(retrievedDocs, null, 2));
+/*
+  [
+    {
+      "pageContent": "My favorite color is red.",
+      "metadata": {
+        "relevanceScore": 0.9
+        "loc": {
+          "lines": {
+            "from": 1,
+            "to": 1
+          }
+        }
+      }
+    }
+  ]
+*/
diff --git a/langchain-core/package.json b/langchain-core/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@langchain/core",
-  "version": "0.1.58",
+  "version": "0.1.59",
   "description": "Core LangChain.js abstractions and schemas",
   "type": "module",
   "engines": {

diff --git a/langchain-core/src/output_parsers/structured.ts b/langchain-core/src/output_parsers/structured.ts
@@ -104,7 +104,15 @@ ${JSON.stringify(zodToJsonSchema(this.schema))}
       const json = text.includes("```")
         ? text.trim().split(/```(?:json)?/)[1]
         : text.trim();
-      return await this.schema.parseAsync(JSON.parse(json));
+
+      const escapedJson = json
+        .replace(/"([^"\\]*(\\.[^"\\]*)*)"/g, (_match, capturedGroup) => {
+          const escapedInsideQuotes = capturedGroup.replace(/\n/g, "\\n");
+          return `"${escapedInsideQuotes}"`;
+        })
+        .replace(/\n/g, "");
+
+      return await this.schema.parseAsync(JSON.parse(escapedJson));
     } catch (e) {
       throw new OutputParserException(
         `Failed to parse. Text: "${text}". Error: ${e}`,

diff --git a/langchain-core/src/output_parsers/tests/structured.test.ts b/langchain-core/src/output_parsers/tests/structured.test.ts
@@ -198,3 +198,23 @@ Here is the JSON Schema instance your output must adhere to. Include the enclosi
 "
 `);
 });
+
+test("StructuredOutputParser.fromZodSchema parsing newlines", async () => {
+  const parser = StructuredOutputParser.fromZodSchema(
+    z
+      .object({
+        url: z.string().describe("A link to the resource"),
+        summary: z.string().describe("A summary"),
+      })
+      .describe("Only One object")
+  );
+
+  expect(
+    await parser.parse(
+      '```\n{"url": "value", "summary": "line1,\nline2,\nline3"}```'
+    )
+  ).toEqual({
+    url: "value",
+    summary: "line1,\nline2,\nline3",
+  });
+});
diff --git a/langchain-core/src/runnables/base.ts b/langchain-core/src/runnables/base.ts
@@ -61,8 +61,13 @@ export type RunnableLike<RunInput = any, RunOutput = any> =
   | RunnableFunc<RunInput, RunOutput>
   | RunnableMapLike<RunInput, RunOutput>;
 
-// eslint-disable-next-line @typescript-eslint/no-explicit-any
-export type RunnableRetryFailedAttemptHandler = (error: any) => any;
+export type RunnableRetryFailedAttemptHandler = (
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  error: any,
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  input: any
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+) => any;
 
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 export function _coerceToDict(value: any, defaultKey: string) {
@@ -1268,7 +1273,7 @@ export class RunnableRetry<
   protected maxAttemptNumber = 3;
 
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  onFailedAttempt?: RunnableRetryFailedAttemptHandler = () => {};
+  onFailedAttempt: RunnableRetryFailedAttemptHandler = () => {};
 
   constructor(
     fields: RunnableBindingArgs<RunInput, RunOutput, CallOptions> & {
@@ -1303,7 +1308,8 @@ export class RunnableRetry<
           this._patchConfigForRetry(attemptNumber, config, runManager)
         ),
       {
-        onFailedAttempt: this.onFailedAttempt,
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        onFailedAttempt: (error: any) => this.onFailedAttempt(error, input),
         retries: Math.max(this.maxAttemptNumber - 1, 0),
         randomize: true,
       }
@@ -1362,6 +1368,8 @@ export class RunnableRetry<
             if (result instanceof Error) {
               if (firstException === undefined) {
                 firstException = result;
+                // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                (firstException as any).input = remainingInputs[i];
               }
             }
             resultsMap[resultMapIndex.toString()] = result;
@@ -1372,7 +1380,9 @@ export class RunnableRetry<
           return results;
         },
         {
-          onFailedAttempt: this.onFailedAttempt,
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          onFailedAttempt: (error: any) =>
+            this.onFailedAttempt(error, error.input),
           retries: Math.max(this.maxAttemptNumber - 1, 0),
           randomize: true,
         }

diff --git a/langchain-core/src/runnables/tests/runnable_retry.test.ts b/langchain-core/src/runnables/tests/runnable_retry.test.ts
@@ -21,6 +21,28 @@ test("RunnableRetry invoke", async () => {
   expect(result).toEqual(3);
 });
 
+test("RunnableRetry invoke with a failed attempt handler", async () => {
+  let attemptCount = 0;
+  const runnable = new RunnableLambda({
+    func: (_thing: unknown) => {
+      attemptCount += 1;
+      if (attemptCount < 3) {
+        throw new Error("TEST ERROR");
+      } else {
+        return attemptCount;
+      }
+    },
+  });
+  const runnableRetry = runnable.withRetry({
+    onFailedAttempt: (error, input) => {
+      expect(error.message).toBe("TEST ERROR");
+      expect(input).toBe("test");
+    },
+  });
+  const result = await runnableRetry.invoke("test");
+  expect(result).toEqual(3);
+});
+
 test("RunnableRetry batch with thrown errors", async () => {
   const runnable = new RunnableLambda({
     func: (_thing: unknown) => {
@@ -79,3 +101,27 @@ test("RunnableRetry batch should not retry successful requests", async () => {
   expect(attemptCount).toEqual(5);
   expect(result.sort()).toEqual([3, 4, 5]);
 });
+
+test("RunnableRetry batch with an onFailedAttempt handler", async () => {
+  let attemptCount = 0;
+  const runnable = new RunnableLambda({
+    func: (_thing: unknown) => {
+      attemptCount += 1;
+      if (attemptCount < 3) {
+        throw new Error("TEST ERROR");
+      } else {
+        return attemptCount;
+      }
+    },
+  });
+  const runnableRetry = runnable.withRetry({
+    stopAfterAttempt: 2,
+    onFailedAttempt: (error, input) => {
+      expect(error.message).toEqual("TEST ERROR");
+      expect(input).toEqual("test1");
+    },
+  });
+  const result = await runnableRetry.batch(["test1", "test2", "test3"]);
+  expect(attemptCount).toEqual(5);
+  expect(result.sort()).toEqual([3, 4, 5]);
+});