Merge branch 'master' into eugene/check_links_2

langchain-ai · Oct 31, 2024 · e48f154 · e48f154
2 parents cb89f82 + 71f590d
commit e48f154
Show file tree

Hide file tree

Showing 54 changed files with 2,124 additions and 1,442 deletions.
diff --git a/.github/scripts/prep_api_docs_build.py b/.github/scripts/prep_api_docs_build.py
@@ -14,13 +14,20 @@ def load_packages_yaml() -> Dict[str, Any]:
         return yaml.safe_load(f)
 
 
+def get_target_dir(package_name: str) -> Path:
+    """Get the target directory for a given package."""
+    package_name_short = package_name.replace("langchain-", "")
+    base_path = Path("langchain/libs")
+    if package_name_short == "experimental":
+        return base_path / "experimental"
+    return base_path / "partners" / package_name_short
+
+
 def clean_target_directories(packages: Dict[str, Any]) -> None:
     """Remove old directories that will be replaced."""
-    base_path = Path("langchain/libs/partners")
     for package in packages["packages"]:
         if package["repo"] != "langchain-ai/langchain":
-            package_name = package["name"].replace("langchain-", "")
-            target_dir = base_path / package_name
+            target_dir = get_target_dir(package["name"])
             if target_dir.exists():
                 print(f"Removing {target_dir}")
                 shutil.rmtree(target_dir)
@@ -36,9 +43,8 @@ def move_libraries(packages: Dict[str, Any]) -> None:
             continue
 
         repo_name = package["repo"].split("/")[1]
-        package_name = package["name"].replace("langchain-", "")
         source_path = package["path"]
-        target_dir = f"langchain/libs/partners/{package_name}"
+        target_dir = get_target_dir(package["name"])
 
         # Handle root path case
         if source_path == ".":

diff --git a/.github/workflows/api_doc_build.yml b/.github/workflows/api_doc_build.yml
@@ -23,15 +23,19 @@ jobs:
           path: langchain-api-docs-html
           token: ${{ secrets.TOKEN_GITHUB_API_DOCS_HTML }}
 
-      - name: Install yq
-        run: |
-          sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
-          sudo chmod a+x /usr/local/bin/yq
+      - name: Get repos with yq
+        id: get-unsorted-repos
+        uses: mikefarah/yq@master
+        with:
+          cmd: yq '.packages[].repo' langchain/libs/packages.yml
 
       - name: Parse YAML and checkout repos
+        env:
+          REPOS_UNSORTED: ${{ steps.get-unsorted-repos.outputs.result }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
           # Get unique repositories
-          REPOS=$(yq '.packages[].repo' langchain/libs/packages.yml | sort -u)
+          REPOS=$(echo "$REPOS_UNSORTED" | sort -u)
           
           # Checkout each unique repository
           for repo in $REPOS; do
@@ -41,8 +45,6 @@ jobs:
               git clone --depth 1 https://github.com/$repo.git $REPO_NAME
             fi
           done
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Set up Python ${{ env.PYTHON_VERSION }} + Poetry ${{ env.POETRY_VERSION }}
         uses: "./langchain/.github/actions/poetry_setup"

diff --git a/docs/api_reference/create_api_rst.py b/docs/api_reference/create_api_rst.py
@@ -601,9 +601,11 @@ def _build_index(dirs: List[str]) -> None:
         ]
         for header_name, dir_ in sorted(
             zip(integration_headers, integrations),
-            key=lambda h_d: integrations_to_show.index(h_d[1])
-            if h_d[1] in integrations_to_show
-            else len(integrations_to_show),
+            key=lambda h_d: (
+                integrations_to_show.index(h_d[1])
+                if h_d[1] in integrations_to_show
+                else len(integrations_to_show)
+            ),
         )[: len(integrations_to_show)]:
             integration_grid += f'\n- header: "**{header_name}**"\n  content: {_package_namespace(dir_).replace("_", "-")} {_get_package_version(_package_dir(dir_))}\n  link: {dir_.replace("-", "_")}/index.html'
         doc += f"""## Integrations
@@ -648,7 +650,7 @@ def main(dirs: Optional[list] = None) -> None:
         dirs = [
             dir_
             for dir_ in os.listdir(ROOT_DIR / "libs")
-            if dir_ not in ("cli", "partners", "standard-tests")
+            if dir_ not in ("cli", "partners", "standard-tests", "packages.yml")
         ]
         dirs += [
             dir_

diff --git a/docs/docs/concepts/chat_models.mdx b/docs/docs/concepts/chat_models.mdx
@@ -152,7 +152,7 @@ A semantic cache introduces a dependency on another model on the critical path o
 
 However, there might be situations where caching chat model responses is beneficial. For example, if you have a chat model that is used to answer frequently asked questions, caching responses can help reduce the load on the model provider and improve response times.
 
-Please see the [how to cache chat model responses](/docs/how_to/#chat-model-caching) guide for more details.
+Please see the [how to cache chat model responses](/docs/how_to/chat_model_caching/) guide for more details.
 
 ## Related resources
 
@@ -165,4 +165,4 @@ Please see the [how to cache chat model responses](/docs/how_to/#chat-model-cach
 * [Tool calling](/docs/concepts#tool-calling)
 * [Multimodality](/docs/concepts/multimodality)
 * [Structured outputs](/docs/concepts#structured_output)
-* [Tokens](/docs/concepts/tokens)
+* [Tokens](/docs/concepts/tokens)
diff --git a/docs/docs/concepts/runnables.mdx b/docs/docs/concepts/runnables.mdx
@@ -15,7 +15,7 @@ This guide covers the main concepts and methods of the Runnable interface, which
 The Runnable way defines a standard interface that allows a Runnable component to be:
 
 * [Invoked](/docs/how_to/lcel_cheatsheet/#invoke-a-runnable): A single input is transformed into an output.
-* [Batched](/docs/how_to/lcel_cheatsheet/#batch-a-runnable/): Multiple inputs are efficiently transformed into outputs.
+* [Batched](/docs/how_to/lcel_cheatsheet/#batch-a-runnable): Multiple inputs are efficiently transformed into outputs.
 * [Streamed](/docs/how_to/lcel_cheatsheet/#stream-a-runnable): Outputs are streamed as they are produced.
 * Inspected: Schematic information about Runnable's input, output, and configuration can be accessed.
 * Composed: Multiple Runnables can be composed to work together using [the LangChain Expression Language (LCEL)](/docs/concepts/lcel) to create complex pipelines.

diff --git a/docs/docs/concepts/tools.mdx b/docs/docs/concepts/tools.mdx
@@ -141,7 +141,7 @@ See [how to pass run time values to tools](/docs/how_to/tool_runtime/) for more
 
 You can use the `RunnableConfig` object to pass custom run time values to tools.
 
-If you need to access the [RunnableConfig](/docs/concepts/runnables/#RunnableConfig) object from within a tool. This can be done by using the `RunnableConfig` annotation in the tool's function signature.
+If you need to access the [RunnableConfig](/docs/concepts/runnables/#runnableconfig) object from within a tool. This can be done by using the `RunnableConfig` annotation in the tool's function signature.
 
 ```python
 from langchain_core.runnables import RunnableConfig

diff --git a/docs/docs/concepts/vectorstores.mdx b/docs/docs/concepts/vectorstores.mdx
@@ -186,6 +186,6 @@ See this [how-to guide on hybrid search](/docs/how_to/hybrid/) for more details.
 | Name                                                                                                              | When to use                                           | Description                                                                                                                                  |
 |-------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------|
 | [Hybrid search](/docs/integrations/retrievers/pinecone_hybrid_search/)                                            | When combining keyword-based and semantic similarity. | Hybrid search combines keyword and semantic similarity, marrying the benefits of both approaches. [Paper](https://arxiv.org/abs/2210.11934). |
-| [Maximal Marginal Relevance (MMR)](/docs/integrations/vectorstores/pinecone/#maximal-marginal-relevance-searches) | When needing to diversify search results.             | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents.                                        |
+| [Maximal Marginal Relevance (MMR)](https://python.langchain.com/api_reference/pinecone/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html#langchain_pinecone.vectorstores.PineconeVectorStore.max_marginal_relevance_search) | When needing to diversify search results.             | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents.                                        |
 
 
diff --git a/docs/docs/how_to/agent_executor.ipynb b/docs/docs/how_to/agent_executor.ipynb
@@ -18,7 +18,7 @@
     "# Build an Agent with AgentExecutor (Legacy)\n",
     "\n",
     ":::important\n",
-    "This section will cover building with the legacy LangChain AgentExecutor. These are fine for getting started, but past a certain point, you will likely want flexibility and control that they do not offer. For working with more advanced agents, we'd recommend checking out [LangGraph Agents](/docs/concepts/#langgraph) or the [migration guide](/docs/how_to/migrate_agent/)\n",
+    "This section will cover building with the legacy LangChain AgentExecutor. These are fine for getting started, but past a certain point, you will likely want flexibility and control that they do not offer. For working with more advanced agents, we'd recommend checking out [LangGraph Agents](/docs/concepts/architecture/#langgraph) or the [migration guide](/docs/how_to/migrate_agent/)\n",
     ":::\n",
     "\n",
     "By themselves, language models can't take actions - they just output text.\n",
@@ -802,7 +802,7 @@
     "That's a wrap! In this quick start we covered how to create a simple agent. Agents are a complex topic, and there's lot to learn! \n",
     "\n",
     ":::important\n",
-    "This section covered building with LangChain Agents. LangChain Agents are fine for getting started, but past a certain point you will likely want flexibility and control that they do not offer. For working with more advanced agents, we'd reccommend checking out [LangGraph](/docs/concepts/#langgraph)\n",
+    "This section covered building with LangChain Agents. LangChain Agents are fine for getting started, but past a certain point you will likely want flexibility and control that they do not offer. For working with more advanced agents, we'd reccommend checking out [LangGraph](/docs/concepts/architecture/#langgraph)\n",
     ":::\n",
     "\n",
     "If you want to continue using LangChain agents, some good advanced guides are:\n",

diff --git a/docs/docs/how_to/qa_chat_history_how_to.ipynb b/docs/docs/how_to/qa_chat_history_how_to.ipynb
@@ -686,7 +686,7 @@
    "source": [
     "### Agent constructor\n",
     "\n",
-    "Now that we have defined the tools and the LLM, we can create the agent. We will be using [LangGraph](/docs/concepts/#langgraph) to construct the agent. \n",
+    "Now that we have defined the tools and the LLM, we can create the agent. We will be using [LangGraph](/docs/concepts/architecture/#langgraph) to construct the agent. \n",
     "Currently we are using a high level interface to construct the agent, but the nice thing about LangGraph is that this high-level interface is backed by a low-level, highly controllable API in case you want to modify the agent logic."
    ]
   },

diff --git a/docs/docs/how_to/structured_output.ipynb b/docs/docs/how_to/structured_output.ipynb
@@ -556,7 +556,7 @@
    "id": "498d893b-ceaa-47ff-a9d8-4faa60702715",
    "metadata": {},
    "source": [
-    "For more on few shot prompting when using tool calling, see [here](/docs/how_to/function_calling/#Few-shot-prompting)."
+    "For more on few shot prompting when using tool calling, see [here](/docs/how_to/tools_few_shot/)."
    ]
   },
   {

diff --git a/docs/docs/integrations/chat/naver.ipynb b/docs/docs/integrations/chat/naver.ipynb
@@ -17,7 +17,7 @@
    "source": [
     "# ChatClovaX\n",
     "\n",
-    "This notebook provides a quick overview for getting started with Naver’s HyperCLOVA X [chat models](https://python.langchain.com/docs/concepts/#chat-models) via CLOVA Studio. For detailed documentation of all ChatClovaX features and configurations head to the [API reference](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.naver.ChatClovaX.html).\n",
+    "This notebook provides a quick overview for getting started with Naver’s HyperCLOVA X [chat models](https://python.langchain.com/docs/concepts/chat_models) via CLOVA Studio. For detailed documentation of all ChatClovaX features and configurations head to the [API reference](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.naver.ChatClovaX.html).\n",
     "\n",
     "[CLOVA Studio](http://clovastudio.ncloud.com/) has several chat models. You can find information about latest models and their costs, context windows, and supported input types in the CLOVA Studio API Guide [documentation](https://api.ncloud-docs.com/docs/clovastudio-chatcompletions).\n",
     "\n",

diff --git a/docs/docs/integrations/chat/writer.ipynb b/docs/docs/integrations/chat/writer.ipynb
@@ -17,7 +17,7 @@
    "source": [
     "# ChatWriter\n",
     "\n",
-    "This notebook provides a quick overview for getting started with Writer [chat models](/docs/concepts/#chat-models).\n",
+    "This notebook provides a quick overview for getting started with Writer [chat models](/docs/concepts/chat_models).\n",
     "\n",
     "Writer has several chat models. You can find information about their latest models and their costs, context windows, and supported input types in the [Writer docs](https://dev.writer.com/home/models).\n",
     "\n",

diff --git a/docs/docs/integrations/document_loaders/parsers/azure_openai_whisper_parser.ipynb b/docs/docs/integrations/document_loaders/parsers/azure_openai_whisper_parser.ipynb
@@ -0,0 +1,192 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Azure OpenAI Whisper Parser"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    ">[Azure OpenAI Whisper Parser](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/whisper-overview) is a wrapper around the Azure OpenAI Whisper API which utilizes machine learning to transcribe audio files to english text. \n",
+    ">\n",
+    ">The Parser supports `.mp3`, `.mp4`, `.mpeg`, `.mpga`, `.m4a`, `.wav`, and `.webm`.\n",
+    "\n",
+    "The current implementation follows LangChain core principles and can be used with other loaders to handle both audio downloading and parsing. As a result of this the parser will `yield` an `Iterator[Document]`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prerequisites"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The service requires Azure credentials, Azure endpoint and Whisper Model deployment, which can be set up by following the guide [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/whisper-quickstart?tabs=command-line%2Cpython-new%2Cjavascript&pivots=programming-language-python). Furthermore, the required dependencies must be installed.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -Uq  langchain langchain-community openai"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example 1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `AzureOpenAIWhisperParser`'s method, `.lazy_parse`, accepts a `Blob` object as a parameter containing the file path of the file to be transcribed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.documents.base import Blob\n",
+    "\n",
+    "audio_path = \"path/to/your/audio/file\"\n",
+    "audio_blob = Blob(path=audio_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.document_loaders.parsers.audio import AzureOpenAIWhisperParser\n",
+    "\n",
+    "endpoint = \"<your_endpoint>\"\n",
+    "key = \"<your_api_key\"\n",
+    "version = \"<your_api_version>\"\n",
+    "name = \"<your_deployment_name>\"\n",
+    "\n",
+    "parser = AzureOpenAIWhisperParser(\n",
+    "    api_key=key, azure_endpoint=endpoint, api_version=version, deployment_name=name\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "documents = parser.lazy_parse(blob=audio_blob)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for doc in documents:\n",
+    "    print(doc.page_content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `AzureOpenAIWhisperParser` can also be used in conjuction with audio loaders, like the `YoutubeAudioLoader` with a `GenericLoader`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.document_loaders.blob_loaders.youtube_audio import (\n",
+    "    YoutubeAudioLoader,\n",
+    ")\n",
+    "from langchain_community.document_loaders.generic import GenericLoader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Must be a list\n",
+    "url = [\"www.youtube.url.com\"]\n",
+    "\n",
+    "save_dir = \"save/directory/\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "name = \"<your_deployment_name>\"\n",
+    "\n",
+    "loader = GenericLoader(\n",
+    "    YoutubeAudioLoader(url, save_dir), AzureOpenAIWhisperParser(deployment_name=name)\n",
+    ")\n",
+    "\n",
+    "docs = loader.load()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for doc in documents:\n",
+    "    print(doc.page_content)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}