Merge branch 'main' of https://github.com/hwchase17/langchainjs into …

…jacob/tool_messages
langchain-ai · Apr 9, 2024 · 79bb8c1 · 79bb8c1
2 parents 0a08d20 + fc2f9de
commit 79bb8c1
Show file tree

Hide file tree

Showing 31 changed files with 890 additions and 76 deletions.
diff --git a/docs/core_docs/docs/integrations/text_embedding/baidu_qianfan.mdx b/docs/core_docs/docs/integrations/text_embedding/baidu_qianfan.mdx
@@ -0,0 +1,32 @@
+---
+sidebar_class_name: node-only
+---
+
+# Baidu Qianfan
+
+The `BaiduQianfanEmbeddings` class uses the Baidu Qianfan API to generate embeddings for a given text.
+
+## Setup
+
+Official Website: https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu
+
+An API key is required to use this embedding model. You can get one by registering at https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu.
+
+Please set the acquired API key as an environment variable named BAIDU_API_KEY, and set your secret key as an environment variable named BAIDU_SECRET_KEY.
+
+Then, you'll need to install the [`@langchain/community`](https://www.npmjs.com/package/@langchain/community) package:
+
+import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx";
+
+<IntegrationInstallTooltip></IntegrationInstallTooltip>
+
+```bash npm2yarn
+npm install @langchain/community
+```
+
+## Usage
+
+import CodeBlock from "@theme/CodeBlock";
+import BaiduQianFanExample from "@examples/embeddings/baidu_qianfan.ts";
+
+<CodeBlock language="typescript">{BaiduQianFanExample}</CodeBlock>
diff --git a/docs/core_docs/docs/modules/data_connection/retrievers/index.mdx b/docs/core_docs/docs/modules/data_connection/retrievers/index.mdx
@@ -29,7 +29,7 @@ LangChain provides several advanced retrieval types. A full list is below, along
 | [Vectorstore](/docs/modules/data_connection/retrievers/vectorstore)                             | Vectorstore                  | No                        | If you are just getting started and looking for something quick and easy.                                                               | This is the simplest method and the one that is easiest to get started with. It involves creating embeddings for each piece of text.                                                                                                                                 |
 | [ParentDocument](/docs/modules/data_connection/retrievers/parent-document-retriever)            | Vectorstore + Document Store | No                        | If your pages have lots of smaller pieces of distinct information that are best indexed by themselves, but best retrieved all together. | This involves indexing multiple chunks for each document. Then you find the chunks that are most similar in embedding space, but you retrieve the whole parent document and return that (rather than individual chunks).                                             |
 | [Multi Vector](/docs/modules/data_connection/retrievers/multi-vector-retriever)                 | Vectorstore + Document Store | Sometimes during indexing | If you are able to extract information from documents that you think is more relevant to index than the text itself.                    | This involves creating multiple vectors for each document. Each vector could be created in a myriad of ways - examples include summaries of the text and hypothetical questions.                                                                                     |
-| [Self Query](/docs/modules/data_connection/retrievers/self_query/)                              | Vectorstore                  | Yes                       | If users are asking questions that are better answered by fetching documents based on metadata rather than similarity with the text.    | This uses an LLM to transform user input into two things: (1) a string to look up semantically, (2) a metadata filer to go along with it. This is useful because oftentimes questions are about the METADATA of documents (not the content itself).                  |
+| [Self Query](/docs/modules/data_connection/retrievers/self_query/)                              | Vectorstore                  | Yes                       | If users are asking questions that are better answered by fetching documents based on metadata rather than similarity with the text.    | This uses an LLM to transform user input into two things: (1) a string to look up semantically, (2) a metadata filter to go along with it. This is useful because oftentimes questions are about the METADATA of documents (not the content itself).                 |
 | [Contextual Compression](/docs/modules/data_connection/retrievers/contextual_compression)       | Any                          | Sometimes                 | If you are finding that your retrieved documents contain too much irrelevant information and are distracting the LLM.                   | This puts a post-processing step on top of another retriever and extracts only the most relevant information from retrieved documents. This can be done with embeddings or an LLM.                                                                                   |
 | [Time-Weighted Vectorstore](/docs/modules/data_connection/retrievers/time_weighted_vectorstore) | Vectorstore                  | No                        | If you have timestamps associated with your documents, and you want to retrieve the most recent ones                                    | This fetches documents based on a combination of semantic similarity (as in normal vector retrieval) and recency (looking at timestamps of indexed documents)                                                                                                        |
 | [Multi-Query Retriever](/docs/modules/data_connection/retrievers/multi-query-retriever)         | Any                          | Yes                       | If users are asking questions that are complex and require multiple pieces of distinct information to respond                           | This uses an LLM to generate multiple queries from the original one. This is useful when the original query needs pieces of information about multiple topics to be properly answered. By generating multiple queries, we can then fetch documents for each of them. |

diff --git a/docs/core_docs/docs/use_cases/graph/construction.ipynb b/docs/core_docs/docs/use_cases/graph/construction.ipynb
@@ -0,0 +1,253 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "5e61b0f2-15b9-4241-9ab5-ff0f3f732232",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "sidebar_position: 1\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "846ef4f4-ee38-4a42-a7d3-1a23826e4830",
+   "metadata": {},
+   "source": [
+    "# Constructing knowledge graphs\n",
+    "In this guide we'll go over the basic ways of constructing a knowledge graph based on unstructured text. The constructured graph can then be used as knowledge base in a RAG application. At a high-level, the steps of constructing a knowledge are from text are:\n",
+    "\n",
+    "1. Extracting structured information from text: Model is used to extract structured graph information from text.\n",
+    "2. Storing into graph database: Storing the extracted structured graph information into a graph database enables downstream RAG applications"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "26677b08",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "#### Install dependencies\n",
+    "\n",
+    "```{=mdx}\n",
+    "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n",
+    "import Npm2Yarn from \"@theme/Npm2Yarn\";\n",
+    "\n",
+    "<IntegrationInstallTooltip></IntegrationInstallTooltip>\n",
+    "\n",
+    "<Npm2Yarn>\n",
+    "  langchain @langchain/community @langchain/openai neo4j-driver zod\n",
+    "</Npm2Yarn>\n",
+    "```\n",
+    "\n",
+    "#### Set environment variables\n",
+    "\n",
+    "We'll use OpenAI in this example:\n",
+    "\n",
+    "```env\n",
+    "OPENAI_API_KEY=your-api-key\n",
+    "\n",
+    "# Optional, use LangSmith for best-in-class observability\n",
+    "LANGSMITH_API_KEY=your-api-key\n",
+    "LANGCHAIN_TRACING_V2=true\n",
+    "```\n",
+    "\n",
+    "Next, we need to define Neo4j credentials.\n",
+    "Follow [these installation steps](https://neo4j.com/docs/operations-manual/current/installation/) to set up a Neo4j database.\n",
+    "\n",
+    "```env\n",
+    "NEO4J_URI=\"bolt://localhost:7687\"\n",
+    "NEO4J_USERNAME=\"neo4j\"\n",
+    "NEO4J_PASSWORD=\"password\"\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "50fa4510-29b7-49b6-8496-5e86f694e81f",
+   "metadata": {},
+   "source": [
+    "The below example will create a connection with a Neo4j database."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4ee9ef7a-eef9-4289-b9fd-8fbc31041688",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import \"neo4j-driver\";\n",
+    "import { Neo4jGraph } from \"@langchain/community/graphs/neo4j_graph\";\n",
+    "\n",
+    "const url = Deno.env.get(\"NEO4J_URI\");\n",
+    "const username = Deno.env.get(\"NEO4J_USER\");\n",
+    "const password = Deno.env.get(\"NEO4J_PASSWORD\");\n",
+    "const graph = await Neo4jGraph.initialize({ url, username, password });"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0cb0ea30-ca55-4f35-aad6-beb57453de66",
+   "metadata": {},
+   "source": [
+    "## LLM Graph Transformer\n",
+    "Extracting graph data from text enables the transformation of unstructured information into structured formats, facilitating deeper insights and more efficient navigation through complex relationships and patterns. The LLMGraphTransformer converts text documents into structured graph documents by leveraging a LLM to parse and categorize entities and their relationships. The selection of the LLM model significantly influences the output by determining the accuracy and nuance of the extracted graph data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e1a19424-6046-40c2-81d1-f3b88193a293",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import { ChatOpenAI } from \"@langchain/openai\";\n",
+    "import { LLMGraphTransformer } from \"@langchain/community/experimental/graph_transformers/llm\";\n",
+    "\n",
+    "const model = new ChatOpenAI({\n",
+    "    temperature: 0,\n",
+    "    modelName: \"gpt-4-turbo-preview\",\n",
+    "});\n",
+    "\n",
+    "const llmGraphTransformer = new LLMGraphTransformer({\n",
+    "    llm: model\n",
+    "});\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9c14084c-37a7-4a9c-a026-74e12961c781",
+   "metadata": {},
+   "source": [
+    "Now we can pass in example text and examine the results."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "bbfe0d8f-982e-46e6-88fb-8a4f0d850b07",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Nodes: 8\n",
+      "Relationships:7\n"
+     ]
+    }
+   ],
+   "source": [
+    "import { Document } from \"@langchain/core/documents\";\n",
+    "\n",
+    "let text = `\n",
+    "Marie Curie, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.\n",
+    "She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.\n",
+    "Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.\n",
+    "She was, in 1906, the first woman to become a professor at the University of Paris.\n",
+    "`\n",
+    "\n",
+    "const result = await llmGraphTransformer.convertToGraphDocuments([\n",
+    "    new Document({ pageContent: text }),\n",
+    "]);\n",
+    "\n",
+    "console.log(`Nodes: ${result[0].nodes.length}`);\n",
+    "console.log(`Relationships:${result[0].relationships.length}`);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8afbf13-05d0-4383-8050-f88b8c2f6fab",
+   "metadata": {},
+   "source": [
+    "Note that the graph construction process is non-deterministic since we are using LLM. Therefore, you might get slightly different results on each execution.\n",
+    "Examine the following image to better grasp the structure of the generated knowledge graph.\n",
+    "\n",
+    "![graph_construction1.png](../../../static/img/graph_construction1.png)\n",
+    "\n",
+    "Additionally, you have the flexibility to define specific types of nodes and relationships for extraction according to your requirements."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "6f92929f-74fb-4db2-b7e1-eb1e9d386a67",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Nodes: 6\n",
+      "Relationships:4\n"
+     ]
+    }
+   ],
+   "source": [
+    "const llmGraphTransformerFiltered = new LLMGraphTransformer({\n",
+    "    llm: model,\n",
+    "    allowedNodes: [\"PERSON\", \"COUNTRY\", \"ORGANIZATION\"],\n",
+    "    allowedRelationships:[\"NATIONALITY\", \"LOCATED_IN\", \"WORKED_AT\", \"SPOUSE\"],\n",
+    "    strictMode:false\n",
+    "});\n",
+    "\n",
+    "const result_filtered = await llmGraphTransformerFiltered.convertToGraphDocuments([\n",
+    "    new Document({ pageContent: text }),\n",
+    "]);\n",
+    "\n",
+    "console.log(`Nodes: ${result_filtered[0].nodes.length}`);\n",
+    "console.log(`Relationships:${result_filtered[0].relationships.length}`);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f66c6756-6efb-4b1e-9b5d-87ed914a5212",
+   "metadata": {},
+   "source": [
+    "For a better understanding of the generated graph, we can again visualize it.\n",
+    "\n",
+    "![graph_construction1.png](../../../static/img/graph_construction2.png)\n",
+    "\n",
+    "## Storing to graph database\n",
+    "The generated graph documents can be stored to a graph database using the `addGraphDocuments` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8ef3e21d-f1c2-45e2-9511-4920d1cf6e7e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "await graph.addGraphDocuments(result_filtered)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e67382aa-7324-4983-b834-1fdd841cc92c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Deno",
+   "language": "typescript",
+   "name": "deno"
+  },
+  "language_info": {
+   "file_extension": ".ts",
+   "mimetype": "text/x.typescript",
+   "name": "typescript",
+   "nb_converter": "script",
+   "pygments_lexer": "typescript",
+   "version": "5.4.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/core_docs/docs/use_cases/graph/index.ipynb b/docs/core_docs/docs/use_cases/graph/index.ipynb
@@ -32,8 +32,16 @@
     "\n",
     "* [Prompting strategies](/docs/use_cases/graph/prompting): Advanced prompt engineering techniques.\n",
     "* [Mapping values](/docs/use_cases/graph/mapping): Techniques for mapping values from questions to database.\n",
-    "* [Semantic layer](/docs/use_cases/graph/semantic): Techniques for working implementing semantic layers."
+    "* [Semantic layer](/docs/use_cases/graph/semantic): Techniques for working implementing semantic layers.\n",
+    "* [Constructing graphs](/docs/use_cases/graph/construction): Techniques for constructing knowledge graphs.\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -43,16 +51,12 @@
    "name": "deno"
   },
   "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
+   "file_extension": ".ts",
+   "mimetype": "text/x.typescript",
    "name": "typescript",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "nb_converter": "script",
+   "pygments_lexer": "typescript",
+   "version": "5.4.3"
   }
  },
  "nbformat": 4,

diff --git a/docs/core_docs/docs/use_cases/graph/quickstart.ipynb b/docs/core_docs/docs/use_cases/graph/quickstart.ipynb
@@ -225,8 +225,16 @@
     "\n",
     "* [Prompting strategies](/docs/use_cases/graph/prompting): Advanced prompt engineering techniques.\n",
     "* [Mapping values](/docs/use_cases/graph/mapping): Techniques for mapping values from questions to database.\n",
-    "* [Semantic layer](/docs/use_cases/graph/semantic): Techniques for working implementing semantic layers."
+    "* [Semantic layer](/docs/use_cases/graph/semantic): Techniques for working implementing semantic layers.\n",
+    "* [Constructing graphs](/docs/use_cases/graph/construction): Techniques for constructing knowledge graphs.\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -241,7 +249,7 @@
    "name": "typescript",
    "nb_converter": "script",
    "pygments_lexer": "typescript",
-   "version": "5.3.3"
+   "version": "5.4.3"
   }
  },
  "nbformat": 4,

diff --git a/docs/core_docs/static/img/graph_construction1.png b/docs/core_docs/static/img/graph_construction1.png
diff --git a/docs/core_docs/static/img/graph_construction2.png b/docs/core_docs/static/img/graph_construction2.png
diff --git a/examples/src/embeddings/baidu_qianfan.ts b/examples/src/embeddings/baidu_qianfan.ts
@@ -0,0 +1,7 @@
+import { BaiduQianfanEmbeddings } from "@langchain/community/embeddings/baidu_qianfan";
+
+const embeddings = new BaiduQianfanEmbeddings();
+const res = await embeddings.embedQuery(
+  "What would be a good company name a company that makes colorful socks?"
+);
+console.log({ res });
diff --git a/langchain-core/package.json b/langchain-core/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@langchain/core",
-  "version": "0.1.54",
+  "version": "0.1.55",
   "description": "Core LangChain.js abstractions and schemas",
   "type": "module",
   "engines": {

diff --git a/langchain/package.json b/langchain/package.json
@@ -1,6 +1,6 @@
 {
   "name": "langchain",
-  "version": "0.1.31",
+  "version": "0.1.32",
   "description": "Typescript bindings for langchain",
   "type": "module",
   "engines": {