Skip to content

Commit

Permalink
Merge branch 'master' into eugene/check_links_2
Browse files Browse the repository at this point in the history
  • Loading branch information
eyurtsev authored Oct 31, 2024
2 parents cb89f82 + 71f590d commit e48f154
Show file tree
Hide file tree
Showing 54 changed files with 2,124 additions and 1,442 deletions.
16 changes: 11 additions & 5 deletions .github/scripts/prep_api_docs_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,20 @@ def load_packages_yaml() -> Dict[str, Any]:
return yaml.safe_load(f)


def get_target_dir(package_name: str) -> Path:
"""Get the target directory for a given package."""
package_name_short = package_name.replace("langchain-", "")
base_path = Path("langchain/libs")
if package_name_short == "experimental":
return base_path / "experimental"
return base_path / "partners" / package_name_short


def clean_target_directories(packages: Dict[str, Any]) -> None:
"""Remove old directories that will be replaced."""
base_path = Path("langchain/libs/partners")
for package in packages["packages"]:
if package["repo"] != "langchain-ai/langchain":
package_name = package["name"].replace("langchain-", "")
target_dir = base_path / package_name
target_dir = get_target_dir(package["name"])
if target_dir.exists():
print(f"Removing {target_dir}")
shutil.rmtree(target_dir)
Expand All @@ -36,9 +43,8 @@ def move_libraries(packages: Dict[str, Any]) -> None:
continue

repo_name = package["repo"].split("/")[1]
package_name = package["name"].replace("langchain-", "")
source_path = package["path"]
target_dir = f"langchain/libs/partners/{package_name}"
target_dir = get_target_dir(package["name"])

# Handle root path case
if source_path == ".":
Expand Down
16 changes: 9 additions & 7 deletions .github/workflows/api_doc_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,19 @@ jobs:
path: langchain-api-docs-html
token: ${{ secrets.TOKEN_GITHUB_API_DOCS_HTML }}

- name: Install yq
run: |
sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
sudo chmod a+x /usr/local/bin/yq
- name: Get repos with yq
id: get-unsorted-repos
uses: mikefarah/yq@master
with:
cmd: yq '.packages[].repo' langchain/libs/packages.yml

- name: Parse YAML and checkout repos
env:
REPOS_UNSORTED: ${{ steps.get-unsorted-repos.outputs.result }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Get unique repositories
REPOS=$(yq '.packages[].repo' langchain/libs/packages.yml | sort -u)
REPOS=$(echo "$REPOS_UNSORTED" | sort -u)
# Checkout each unique repository
for repo in $REPOS; do
Expand All @@ -41,8 +45,6 @@ jobs:
git clone --depth 1 https://github.com/$repo.git $REPO_NAME
fi
done
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Python ${{ env.PYTHON_VERSION }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./langchain/.github/actions/poetry_setup"
Expand Down
10 changes: 6 additions & 4 deletions docs/api_reference/create_api_rst.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,9 +601,11 @@ def _build_index(dirs: List[str]) -> None:
]
for header_name, dir_ in sorted(
zip(integration_headers, integrations),
key=lambda h_d: integrations_to_show.index(h_d[1])
if h_d[1] in integrations_to_show
else len(integrations_to_show),
key=lambda h_d: (
integrations_to_show.index(h_d[1])
if h_d[1] in integrations_to_show
else len(integrations_to_show)
),
)[: len(integrations_to_show)]:
integration_grid += f'\n- header: "**{header_name}**"\n content: {_package_namespace(dir_).replace("_", "-")} {_get_package_version(_package_dir(dir_))}\n link: {dir_.replace("-", "_")}/index.html'
doc += f"""## Integrations
Expand Down Expand Up @@ -648,7 +650,7 @@ def main(dirs: Optional[list] = None) -> None:
dirs = [
dir_
for dir_ in os.listdir(ROOT_DIR / "libs")
if dir_ not in ("cli", "partners", "standard-tests")
if dir_ not in ("cli", "partners", "standard-tests", "packages.yml")
]
dirs += [
dir_
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/concepts/chat_models.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ A semantic cache introduces a dependency on another model on the critical path o

However, there might be situations where caching chat model responses is beneficial. For example, if you have a chat model that is used to answer frequently asked questions, caching responses can help reduce the load on the model provider and improve response times.

Please see the [how to cache chat model responses](/docs/how_to/#chat-model-caching) guide for more details.
Please see the [how to cache chat model responses](/docs/how_to/chat_model_caching/) guide for more details.

## Related resources

Expand All @@ -165,4 +165,4 @@ Please see the [how to cache chat model responses](/docs/how_to/#chat-model-cach
* [Tool calling](/docs/concepts#tool-calling)
* [Multimodality](/docs/concepts/multimodality)
* [Structured outputs](/docs/concepts#structured_output)
* [Tokens](/docs/concepts/tokens)
* [Tokens](/docs/concepts/tokens)
2 changes: 1 addition & 1 deletion docs/docs/concepts/runnables.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ This guide covers the main concepts and methods of the Runnable interface, which
The Runnable way defines a standard interface that allows a Runnable component to be:

* [Invoked](/docs/how_to/lcel_cheatsheet/#invoke-a-runnable): A single input is transformed into an output.
* [Batched](/docs/how_to/lcel_cheatsheet/#batch-a-runnable/): Multiple inputs are efficiently transformed into outputs.
* [Batched](/docs/how_to/lcel_cheatsheet/#batch-a-runnable): Multiple inputs are efficiently transformed into outputs.
* [Streamed](/docs/how_to/lcel_cheatsheet/#stream-a-runnable): Outputs are streamed as they are produced.
* Inspected: Schematic information about Runnable's input, output, and configuration can be accessed.
* Composed: Multiple Runnables can be composed to work together using [the LangChain Expression Language (LCEL)](/docs/concepts/lcel) to create complex pipelines.
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/concepts/tools.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ See [how to pass run time values to tools](/docs/how_to/tool_runtime/) for more

You can use the `RunnableConfig` object to pass custom run time values to tools.

If you need to access the [RunnableConfig](/docs/concepts/runnables/#RunnableConfig) object from within a tool. This can be done by using the `RunnableConfig` annotation in the tool's function signature.
If you need to access the [RunnableConfig](/docs/concepts/runnables/#runnableconfig) object from within a tool. This can be done by using the `RunnableConfig` annotation in the tool's function signature.

```python
from langchain_core.runnables import RunnableConfig
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/concepts/vectorstores.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,6 @@ See this [how-to guide on hybrid search](/docs/how_to/hybrid/) for more details.
| Name | When to use | Description |
|-------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------|
| [Hybrid search](/docs/integrations/retrievers/pinecone_hybrid_search/) | When combining keyword-based and semantic similarity. | Hybrid search combines keyword and semantic similarity, marrying the benefits of both approaches. [Paper](https://arxiv.org/abs/2210.11934). |
| [Maximal Marginal Relevance (MMR)](/docs/integrations/vectorstores/pinecone/#maximal-marginal-relevance-searches) | When needing to diversify search results. | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents. |
| [Maximal Marginal Relevance (MMR)](https://python.langchain.com/api_reference/pinecone/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html#langchain_pinecone.vectorstores.PineconeVectorStore.max_marginal_relevance_search) | When needing to diversify search results. | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents. |


4 changes: 2 additions & 2 deletions docs/docs/how_to/agent_executor.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"# Build an Agent with AgentExecutor (Legacy)\n",
"\n",
":::important\n",
"This section will cover building with the legacy LangChain AgentExecutor. These are fine for getting started, but past a certain point, you will likely want flexibility and control that they do not offer. For working with more advanced agents, we'd recommend checking out [LangGraph Agents](/docs/concepts/#langgraph) or the [migration guide](/docs/how_to/migrate_agent/)\n",
"This section will cover building with the legacy LangChain AgentExecutor. These are fine for getting started, but past a certain point, you will likely want flexibility and control that they do not offer. For working with more advanced agents, we'd recommend checking out [LangGraph Agents](/docs/concepts/architecture/#langgraph) or the [migration guide](/docs/how_to/migrate_agent/)\n",
":::\n",
"\n",
"By themselves, language models can't take actions - they just output text.\n",
Expand Down Expand Up @@ -802,7 +802,7 @@
"That's a wrap! In this quick start we covered how to create a simple agent. Agents are a complex topic, and there's lot to learn! \n",
"\n",
":::important\n",
"This section covered building with LangChain Agents. LangChain Agents are fine for getting started, but past a certain point you will likely want flexibility and control that they do not offer. For working with more advanced agents, we'd reccommend checking out [LangGraph](/docs/concepts/#langgraph)\n",
"This section covered building with LangChain Agents. LangChain Agents are fine for getting started, but past a certain point you will likely want flexibility and control that they do not offer. For working with more advanced agents, we'd reccommend checking out [LangGraph](/docs/concepts/architecture/#langgraph)\n",
":::\n",
"\n",
"If you want to continue using LangChain agents, some good advanced guides are:\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/how_to/qa_chat_history_how_to.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@
"source": [
"### Agent constructor\n",
"\n",
"Now that we have defined the tools and the LLM, we can create the agent. We will be using [LangGraph](/docs/concepts/#langgraph) to construct the agent. \n",
"Now that we have defined the tools and the LLM, we can create the agent. We will be using [LangGraph](/docs/concepts/architecture/#langgraph) to construct the agent. \n",
"Currently we are using a high level interface to construct the agent, but the nice thing about LangGraph is that this high-level interface is backed by a low-level, highly controllable API in case you want to modify the agent logic."
]
},
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/how_to/structured_output.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@
"id": "498d893b-ceaa-47ff-a9d8-4faa60702715",
"metadata": {},
"source": [
"For more on few shot prompting when using tool calling, see [here](/docs/how_to/function_calling/#Few-shot-prompting)."
"For more on few shot prompting when using tool calling, see [here](/docs/how_to/tools_few_shot/)."
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/chat/naver.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"source": [
"# ChatClovaX\n",
"\n",
"This notebook provides a quick overview for getting started with Naver’s HyperCLOVA X [chat models](https://python.langchain.com/docs/concepts/#chat-models) via CLOVA Studio. For detailed documentation of all ChatClovaX features and configurations head to the [API reference](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.naver.ChatClovaX.html).\n",
"This notebook provides a quick overview for getting started with Naver’s HyperCLOVA X [chat models](https://python.langchain.com/docs/concepts/chat_models) via CLOVA Studio. For detailed documentation of all ChatClovaX features and configurations head to the [API reference](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.naver.ChatClovaX.html).\n",
"\n",
"[CLOVA Studio](http://clovastudio.ncloud.com/) has several chat models. You can find information about latest models and their costs, context windows, and supported input types in the CLOVA Studio API Guide [documentation](https://api.ncloud-docs.com/docs/clovastudio-chatcompletions).\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/chat/writer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"source": [
"# ChatWriter\n",
"\n",
"This notebook provides a quick overview for getting started with Writer [chat models](/docs/concepts/#chat-models).\n",
"This notebook provides a quick overview for getting started with Writer [chat models](/docs/concepts/chat_models).\n",
"\n",
"Writer has several chat models. You can find information about their latest models and their costs, context windows, and supported input types in the [Writer docs](https://dev.writer.com/home/models).\n",
"\n",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Azure OpenAI Whisper Parser"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
">[Azure OpenAI Whisper Parser](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/whisper-overview) is a wrapper around the Azure OpenAI Whisper API which utilizes machine learning to transcribe audio files to english text. \n",
">\n",
">The Parser supports `.mp3`, `.mp4`, `.mpeg`, `.mpga`, `.m4a`, `.wav`, and `.webm`.\n",
"\n",
"The current implementation follows LangChain core principles and can be used with other loaders to handle both audio downloading and parsing. As a result of this the parser will `yield` an `Iterator[Document]`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The service requires Azure credentials, Azure endpoint and Whisper Model deployment, which can be set up by following the guide [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/whisper-quickstart?tabs=command-line%2Cpython-new%2Cjavascript&pivots=programming-language-python). Furthermore, the required dependencies must be installed.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install -Uq langchain langchain-community openai"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example 1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The `AzureOpenAIWhisperParser`'s method, `.lazy_parse`, accepts a `Blob` object as a parameter containing the file path of the file to be transcribed."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain_core.documents.base import Blob\n",
"\n",
"audio_path = \"path/to/your/audio/file\"\n",
"audio_blob = Blob(path=audio_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.document_loaders.parsers.audio import AzureOpenAIWhisperParser\n",
"\n",
"endpoint = \"<your_endpoint>\"\n",
"key = \"<your_api_key\"\n",
"version = \"<your_api_version>\"\n",
"name = \"<your_deployment_name>\"\n",
"\n",
"parser = AzureOpenAIWhisperParser(\n",
" api_key=key, azure_endpoint=endpoint, api_version=version, deployment_name=name\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"documents = parser.lazy_parse(blob=audio_blob)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for doc in documents:\n",
" print(doc.page_content)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The `AzureOpenAIWhisperParser` can also be used in conjuction with audio loaders, like the `YoutubeAudioLoader` with a `GenericLoader`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.document_loaders.blob_loaders.youtube_audio import (\n",
" YoutubeAudioLoader,\n",
")\n",
"from langchain_community.document_loaders.generic import GenericLoader"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Must be a list\n",
"url = [\"www.youtube.url.com\"]\n",
"\n",
"save_dir = \"save/directory/\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"name = \"<your_deployment_name>\"\n",
"\n",
"loader = GenericLoader(\n",
" YoutubeAudioLoader(url, save_dir), AzureOpenAIWhisperParser(deployment_name=name)\n",
")\n",
"\n",
"docs = loader.load()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for doc in documents:\n",
" print(doc.page_content)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit e48f154

Please sign in to comment.