Merge branch 'master' into #28628

langchain-ai · Dec 15, 2024 · 6322604 · 6322604
2 parents f47a319 + a0534ae
commit 6322604
Show file tree

Hide file tree

Showing 300 changed files with 19,588 additions and 7,567 deletions.
diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py
@@ -272,15 +272,19 @@ def _get_configs_for_multi_dirs(
             # TODO: update to include all packages that rely on standard-tests (all partner packages)
             # note: won't run on external repo partners
             dirs_to_run["lint"].add("libs/standard-tests")
+            dirs_to_run["test"].add("libs/standard-tests")
+            dirs_to_run["lint"].add("libs/cli")
+            dirs_to_run["test"].add("libs/cli")
             dirs_to_run["test"].add("libs/partners/mistralai")
             dirs_to_run["test"].add("libs/partners/openai")
             dirs_to_run["test"].add("libs/partners/anthropic")
             dirs_to_run["test"].add("libs/partners/fireworks")
             dirs_to_run["test"].add("libs/partners/groq")
 
         elif file.startswith("libs/cli"):
-            # todo: add cli makefile
-            pass
+            dirs_to_run["lint"].add("libs/cli")
+            dirs_to_run["test"].add("libs/cli")
+
         elif file.startswith("libs/partners"):
             partner_dir = file.split("/")[2]
             if os.path.isdir(f"libs/partners/{partner_dir}") and [

diff --git a/.github/workflows/check_diffs.yml b/.github/workflows/check_diffs.yml
@@ -5,6 +5,7 @@ on:
   push:
     branches: [master]
   pull_request:
+  merge_group:
 
 # If another push to the same PR or branch happens while this workflow is still running,
 # cancel the earlier run in favor of the next run.

diff --git a/.github/workflows/scheduled_test.yml b/.github/workflows/scheduled_test.yml
@@ -14,17 +14,7 @@ on:
 
 env:
   POETRY_VERSION: "1.8.4"
-  DEFAULT_LIBS: >
-    [
-      "libs/partners/openai",
-      "libs/partners/anthropic",
-      "libs/partners/fireworks",
-      "libs/partners/groq",
-      "libs/partners/mistralai",
-      "libs/partners/google-vertexai",
-      "libs/partners/google-genai",
-      "libs/partners/aws"
-    ]
+  DEFAULT_LIBS: '["libs/partners/openai", "libs/partners/anthropic", "libs/partners/fireworks", "libs/partners/groq", "libs/partners/mistralai", "libs/partners/google-vertexai", "libs/partners/google-genai", "libs/partners/aws"]'
 
 jobs:
   compute-matrix:

diff --git a/README.md b/README.md
@@ -38,26 +38,29 @@ conda install langchain -c conda-forge
 
 For these applications, LangChain simplifies the entire application lifecycle:
 
-- **Open-source libraries**: Build your applications using LangChain's open-source [building blocks](https://python.langchain.com/docs/concepts/#langchain-expression-language-lcel), [components](https://python.langchain.com/docs/concepts/), and [third-party integrations](https://python.langchain.com/docs/integrations/providers/).
+
+- **Open-source libraries**: Build your applications using LangChain's open-source
+[components](https://python.langchain.com/docs/concepts/) and
+[third-party integrations](https://python.langchain.com/docs/integrations/providers/).
   Use [LangGraph](https://langchain-ai.github.io/langgraph/) to build stateful agents with first-class streaming and human-in-the-loop support.
 - **Productionization**: Inspect, monitor, and evaluate your apps with [LangSmith](https://docs.smith.langchain.com/) so that you can constantly optimize and deploy with confidence.
-- **Deployment**: Turn your LangGraph applications into production-ready APIs and Assistants with [LangGraph Cloud](https://langchain-ai.github.io/langgraph/cloud/).
+- **Deployment**: Turn your LangGraph applications into production-ready APIs and Assistants with [LangGraph Platform](https://langchain-ai.github.io/langgraph/cloud/).
 
 ### Open-source libraries
 
-- **`langchain-core`**: Base abstractions and LangChain Expression Language.
-- **`langchain-community`**: Third party integrations.
-  - Some integrations have been further split into **partner packages** that only rely on **`langchain-core`**. Examples include **`langchain_openai`** and **`langchain_anthropic`**.
+- **`langchain-core`**: Base abstractions.
+- **Integration packages** (e.g. **`langchain-openai`**, **`langchain-anthropic`**, etc.): Important integrations have been split into lightweight packages that are co-maintained by the LangChain team and the integration developers.
 - **`langchain`**: Chains, agents, and retrieval strategies that make up an application's cognitive architecture.
-- **[`LangGraph`](https://langchain-ai.github.io/langgraph/)**: A library for building robust and stateful multi-actor applications with LLMs by modeling steps as edges and nodes in a graph. Integrates smoothly with LangChain, but can be used without it. To learn more about LangGraph, check out our first LangChain Academy course, *Introduction to LangGraph*, available [here](https://academy.langchain.com/courses/intro-to-langgraph).
+- **`langchain-community`**: Third-party integrations that are community maintained.
+- **[LangGraph](https://langchain-ai.github.io/langgraph)**: Build robust and stateful multi-actor applications with LLMs by modeling steps as edges and nodes in a graph. Integrates smoothly with LangChain, but can be used without it. To learn more about LangGraph, check out our first LangChain Academy course, *Introduction to LangGraph*, available [here](https://academy.langchain.com/courses/intro-to-langgraph).
 
 ### Productionization:
 
 - **[LangSmith](https://docs.smith.langchain.com/)**: A developer platform that lets you debug, test, evaluate, and monitor chains built on any LLM framework and seamlessly integrates with LangChain.
 
 ### Deployment:
 
-- **[LangGraph Cloud](https://langchain-ai.github.io/langgraph/cloud/)**: Turn your LangGraph applications into production-ready APIs and Assistants.
+- **[LangGraph Platform](https://langchain-ai.github.io/langgraph/cloud/)**: Turn your LangGraph applications into production-ready APIs and Assistants.
 
 ![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](docs/static/svg/langchain_stack_112024.svg#gh-light-mode-only "LangChain Architecture Overview")
 ![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](docs/static/svg/langchain_stack_112024_dark.svg#gh-dark-mode-only "LangChain Architecture Overview")
@@ -85,35 +88,32 @@ And much more! Head to the [Tutorials](https://python.langchain.com/docs/tutoria
 
 The main value props of the LangChain libraries are:
 
-1. **Components**: composable building blocks, tools and integrations for working with language models. Components are modular and easy-to-use, whether you are using the rest of the LangChain framework or not
-2. **Off-the-shelf chains**: built-in assemblages of components for accomplishing higher-level tasks
-
-Off-the-shelf chains make it easy to get started. Components make it easy to customize existing chains and build new ones.
-
-## LangChain Expression Language (LCEL)
-
-LCEL is a key part of LangChain, allowing you to build and organize chains of processes in a straightforward, declarative manner. It was designed to support taking prototypes directly into production without needing to alter any code. This means you can use LCEL to set up everything from basic "prompt + LLM" setups to intricate, multi-step workflows.
-
-- **[Overview](https://python.langchain.com/docs/concepts/#langchain-expression-language-lcel)**: LCEL and its benefits
-- **[Interface](https://python.langchain.com/docs/concepts/#runnable-interface)**: The standard Runnable interface for LCEL objects
-- **[Primitives](https://python.langchain.com/docs/how_to/#langchain-expression-language-lcel)**: More on the primitives LCEL includes
-- **[Cheatsheet](https://python.langchain.com/docs/how_to/lcel_cheatsheet/)**: Quick overview of the most common usage patterns
+1. **Components**: composable building blocks, tools and integrations for working with language models. Components are modular and easy-to-use, whether you are using the rest of the LangChain framework or not.
+2. **Easy orchestration with LangGraph**: [LangGraph](https://langchain-ai.github.io/langgraph/),
+built on top of `langchain-core`, has built-in support for [messages](https://python.langchain.com/docs/concepts/messages/), [tools](https://python.langchain.com/docs/concepts/tools/),
+and other LangChain abstractions. This makes it easy to combine components into
+production-ready applications with persistence, streaming, and other key features.
+Check out the LangChain [tutorials page](https://python.langchain.com/docs/tutorials/#orchestration) for examples.
 
 ## Components
 
 Components fall into the following **modules**:
 
 **📃 Model I/O**
 
-This includes [prompt management](https://python.langchain.com/docs/concepts/#prompt-templates), [prompt optimization](https://python.langchain.com/docs/concepts/#example-selectors), a generic interface for [chat models](https://python.langchain.com/docs/concepts/#chat-models) and [LLMs](https://python.langchain.com/docs/concepts/#llms), and common utilities for working with [model outputs](https://python.langchain.com/docs/concepts/#output-parsers).
+This includes [prompt management](https://python.langchain.com/docs/concepts/prompt_templates/)
+and a generic interface for [chat models](https://python.langchain.com/docs/concepts/chat_models/), including a consistent interface for [tool-calling](https://python.langchain.com/docs/concepts/tool_calling/) and [structured output](https://python.langchain.com/docs/concepts/structured_outputs/) across model providers.
 
 **📚 Retrieval**
 
-Retrieval Augmented Generation involves [loading data](https://python.langchain.com/docs/concepts/#document-loaders) from a variety of sources, [preparing it](https://python.langchain.com/docs/concepts/#text-splitters), then [searching over (a.k.a. retrieving from)](https://python.langchain.com/docs/concepts/#retrievers) it for use in the generation step.
+Retrieval Augmented Generation involves [loading data](https://python.langchain.com/docs/concepts/document_loaders/) from a variety of sources, [preparing it](https://python.langchain.com/docs/concepts/text_splitters/), then [searching over (a.k.a. retrieving from)](https://python.langchain.com/docs/concepts/retrievers/) it for use in the generation step.
 
 **🤖 Agents**
 
-Agents allow an LLM autonomy over how a task is accomplished. Agents make decisions about which Actions to take, then take that Action, observe the result, and repeat until the task is complete. LangChain provides a [standard interface for agents](https://python.langchain.com/docs/concepts/#agents), along with [LangGraph](https://github.com/langchain-ai/langgraph) for building custom agents.
+Agents allow an LLM autonomy over how a task is accomplished. Agents make decisions about which Actions to take, then take that Action, observe the result, and repeat until the task is complete. [LangGraph](https://langchain-ai.github.io/langgraph/) makes it easy to use
+LangChain components to build both [custom](https://langchain-ai.github.io/langgraph/tutorials/)
+and [built-in](https://langchain-ai.github.io/langgraph/how-tos/create-react-agent/)
+LLM agents.
 
 ## 📖 Documentation
 

diff --git a/SECURITY.md b/SECURITY.md
@@ -1,5 +1,30 @@
 # Security Policy
 
+LangChain has a large ecosystem of integrations with various external resources like local and remote file systems, APIs and databases. These integrations allow developers to create versatile applications that combine the power of LLMs with the ability to access, interact with and manipulate external resources.
+
+## Best practices
+
+When building such applications developers should remember to follow good security practices:
+
+* [**Limit Permissions**](https://en.wikipedia.org/wiki/Principle_of_least_privilege): Scope permissions specifically to the application's need. Granting broad or excessive permissions can introduce significant security vulnerabilities. To avoid such vulnerabilities, consider using read-only credentials, disallowing access to sensitive resources, using sandboxing techniques (such as running inside a container), specifying proxy configurations to control external requests, etc. as appropriate for your application.
+* **Anticipate Potential Misuse**: Just as humans can err, so can Large Language Models (LLMs). Always assume that any system access or credentials may be used in any way allowed by the permissions they are assigned. For example, if a pair of database credentials allows deleting data, it’s safest to assume that any LLM able to use those credentials may in fact delete data.
+* [**Defense in Depth**](https://en.wikipedia.org/wiki/Defense_in_depth_(computing)): No security technique is perfect. Fine-tuning and good chain design can reduce, but not eliminate, the odds that a Large Language Model (LLM) may make a mistake. It’s best to combine multiple layered security approaches rather than relying on any single layer of defense to ensure security. For example: use both read-only permissions and sandboxing to ensure that LLMs are only able to access data that is explicitly meant for them to use.
+
+Risks of not doing so include, but are not limited to:
+* Data corruption or loss.
+* Unauthorized access to confidential information.
+* Compromised performance or availability of critical resources.
+
+Example scenarios with mitigation strategies:
+
+* A user may ask an agent with access to the file system to delete files that should not be deleted or read the content of files that contain sensitive information. To mitigate, limit the agent to only use a specific directory and only allow it to read or write files that are safe to read or write. Consider further sandboxing the agent by running it in a container.
+* A user may ask an agent with write access to an external API to write malicious data to the API, or delete data from that API. To mitigate, give the agent read-only API keys, or limit it to only use endpoints that are already resistant to such misuse.
+* A user may ask an agent with access to a database to drop a table or mutate the schema. To mitigate, scope the credentials to only the tables that the agent needs to access and consider issuing READ-ONLY credentials.
+
+If you're building applications that access external resources like file systems, APIs
+or databases, consider speaking with your company's security team to determine how to best
+design and secure your applications.
+
 ## Reporting OSS Vulnerabilities
 
 LangChain is partnered with [huntr by Protect AI](https://huntr.com/) to provide 
@@ -14,7 +39,7 @@ Before reporting a vulnerability, please review:
 
 1) In-Scope Targets and Out-of-Scope Targets below.
 2) The [langchain-ai/langchain](https://python.langchain.com/docs/contributing/repo_structure) monorepo structure.
-3) LangChain [security guidelines](https://python.langchain.com/docs/security) to
+3) The [Best practicies](#best-practices) above to
    understand what we consider to be a security vulnerability vs. developer
    responsibility.
 
@@ -33,21 +58,21 @@ The following packages and repositories are eligible for bug bounties:
 All out of scope targets defined by huntr as well as:
 
 - **langchain-experimental**: This repository is for experimental code and is not
-  eligible for bug bounties, bug reports to it will be marked as interesting or waste of
+  eligible for bug bounties (see [package warning](https://pypi.org/project/langchain-experimental/)), bug reports to it will be marked as interesting or waste of
   time and published with no bounty attached.
 - **tools**: Tools in either langchain or langchain-community are not eligible for bug
   bounties. This includes the following directories
-  - langchain/tools
-  - langchain-community/tools
-  - Please review our [security guidelines](https://python.langchain.com/docs/security)
+  - libs/langchain/langchain/tools
+  - libs/community/langchain_community/tools
+  - Please review the [best practices](#best-practices)
     for more details, but generally tools interact with the real world. Developers are
     expected to understand the security implications of their code and are responsible
     for the security of their tools.
 - Code documented with security notices. This will be decided done on a case by
   case basis, but likely will not be eligible for a bounty as the code is already
   documented with guidelines for developers that should be followed for making their
   application secure.
-- Any LangSmith related repositories or APIs see below.
+- Any LangSmith related repositories or APIs (see [Reporting LangSmith Vulnerabilities](#reporting-langsmith-vulnerabilities)).
 
 ## Reporting LangSmith Vulnerabilities
 

diff --git a/docs/Makefile b/docs/Makefile
@@ -13,28 +13,21 @@ OUTPUT_NEW_DOCS_DIR = $(OUTPUT_NEW_DIR)/docs
 
 PYTHON = .venv/bin/python
 
-PARTNER_DEPS_LIST := $(shell find ../libs/partners -mindepth 1 -maxdepth 1 -type d -exec sh -c ' \
-for dir; do \
-    if find "$$dir" -maxdepth 1 -type f \( -name "pyproject.toml" -o -name "setup.py" \) | grep -q .; then \
-        echo "$$dir"; \
-    fi \
-done' sh {} + | grep -vE "airbyte|ibm|databricks" | tr '\n' ' ')
-
 PORT ?= 3001
 
 clean:
 	rm -rf build
 
 install-vercel-deps:
-	yum -y update
-	yum install gcc bzip2-devel libffi-devel zlib-devel wget tar gzip rsync -y
+	yum -y -q update
+	yum -y -q install gcc bzip2-devel libffi-devel zlib-devel wget tar gzip rsync -y
 
 install-py-deps:
 	python3 -m venv .venv
-	$(PYTHON) -m pip install --upgrade pip
-	$(PYTHON) -m pip install --upgrade uv
-	$(PYTHON) -m uv pip install --pre -r vercel_requirements.txt
-	$(PYTHON) -m uv pip install --pre --editable $(PARTNER_DEPS_LIST)
+	$(PYTHON) -m pip install -q --upgrade pip
+	$(PYTHON) -m pip install -q --upgrade uv
+	$(PYTHON) -m uv pip install -q --pre -r vercel_requirements.txt
+	$(PYTHON) -m uv pip install -q --pre $$($(PYTHON) scripts/partner_deps_list.py)
 
 generate-files:
 	mkdir -p $(INTERMEDIATE_DIR)
@@ -47,6 +40,7 @@ generate-files:
 	$(PYTHON) scripts/partner_pkg_table.py $(INTERMEDIATE_DIR)
 
 	curl https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md | sed 's/<=/\&lt;=/g' > $(INTERMEDIATE_DIR)/langserve.md
+	cp ../SECURITY.md $(INTERMEDIATE_DIR)/security.md
 	$(PYTHON) scripts/resolve_local_links.py $(INTERMEDIATE_DIR)/langserve.md https://github.com/langchain-ai/langserve/tree/main/
 
 copy-infra:
@@ -59,6 +53,7 @@ copy-infra:
 	cp package.json $(OUTPUT_NEW_DIR)
 	cp sidebars.js $(OUTPUT_NEW_DIR)
 	cp -r static $(OUTPUT_NEW_DIR)
+	cp -r ../libs/cli/langchain_cli/integration_template $(OUTPUT_NEW_DIR)/src/theme
 	cp yarn.lock $(OUTPUT_NEW_DIR)
 
 render:
@@ -80,6 +75,7 @@ build: install-py-deps generate-files copy-infra render md-sync append-related
 vercel-build: install-vercel-deps build generate-references
 	rm -rf docs
 	mv $(OUTPUT_NEW_DOCS_DIR) docs
+	cp -r ../libs/cli/langchain_cli/integration_template src/theme
 	rm -rf build
 	mkdir static/api_reference
 	git clone --depth=1 https://github.com/langchain-ai/langchain-api-docs-html.git