From 50aee687c60ec3ca5be0887a696e30ca3200ec31 Mon Sep 17 00:00:00 2001 From: geret1 <75851744+geret1@users.noreply.github.com> Date: Mon, 4 Dec 2023 18:26:32 +0100 Subject: [PATCH 01/49] langchain[patch]: Cerebrium model_api_request deprecation (#12704) - **Description:** As part of my conversation with Cerebrium team, `model_api_request` will be no longer available in cerebrium lib so it needs to be replaced. - **Issue:** #12705 12705, - **Dependencies:** Cerebrium team (agreed) - **Tag maintainer:** @eyurtsev - **Twitter handle:** No official Twitter account sorry :D --------- Co-authored-by: Bagatur --- libs/langchain/langchain/llms/cerebriumai.py | 38 ++++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/libs/langchain/langchain/llms/cerebriumai.py b/libs/langchain/langchain/llms/cerebriumai.py index 00fe2c1683f16..0a162f5dfeaa4 100644 --- a/libs/langchain/langchain/llms/cerebriumai.py +++ b/libs/langchain/langchain/llms/cerebriumai.py @@ -1,6 +1,7 @@ import logging from typing import Any, Dict, List, Mapping, Optional +import requests from langchain_core.pydantic_v1 import Extra, Field, root_validator from langchain.callbacks.manager import CallbackManagerForLLMRun @@ -89,24 +90,21 @@ def _call( run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: - """Call to CerebriumAI endpoint.""" - try: - from cerebrium import model_api_request - except ImportError: - raise ValueError( - "Could not import cerebrium python package. " - "Please install it with `pip install cerebrium`." - ) - + headers: Dict = { + "Authorization": self.cerebriumai_api_key, + "Content-Type": "application/json", + } params = self.model_kwargs or {} - response = model_api_request( - self.endpoint_url, - {"prompt": prompt, **params, **kwargs}, - self.cerebriumai_api_key, - ) - text = response["data"]["result"] - if stop is not None: - # I believe this is required since the stop tokens - # are not enforced by the model parameters - text = enforce_stop_tokens(text, stop) - return text + payload = {"prompt": prompt, **params, **kwargs} + response = requests.post(self.endpoint_url, json=payload, headers=headers) + if response.status_code == 200: + data = response.json() + text = data["result"] + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text + else: + response.raise_for_status() + return "" From 6ce5dab38c349682f5537cc3e0d95f9ddb4f6b41 Mon Sep 17 00:00:00 2001 From: James Braza Date: Mon, 4 Dec 2023 14:33:22 -0500 Subject: [PATCH 02/49] Clarifying descriptions in `GuardrailsOutputParser` (#14228) Upstreaming knowledge from https://github.com/guardrails-ai/guardrails/discussions/473 to LangChain --- libs/langchain/langchain/output_parsers/rail_parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/langchain/langchain/output_parsers/rail_parser.py b/libs/langchain/langchain/output_parsers/rail_parser.py index 2f6c7b7f0a75b..f0cabc13eb553 100644 --- a/libs/langchain/langchain/output_parsers/rail_parser.py +++ b/libs/langchain/langchain/output_parsers/rail_parser.py @@ -11,11 +11,11 @@ class GuardrailsOutputParser(BaseOutputParser): guard: Any """The Guardrails object.""" api: Optional[Callable] - """The API to use for the Guardrails object.""" + """The LLM API passed to Guardrails during parsing. An example is `openai.completions.create`.""" # noqa: E501 args: Any - """The arguments to pass to the API.""" + """Positional arguments to pass to the above LLM API callable.""" kwargs: Any - """The keyword arguments to pass to the API.""" + """Keyword arguments to pass to the above LLM API callable.""" @property def _type(self) -> str: From 6826feea147ce7761f9d27fe0fe7c3566e19ed95 Mon Sep 17 00:00:00 2001 From: Unai Garay Maestre Date: Mon, 4 Dec 2023 20:34:01 +0100 Subject: [PATCH 03/49] Adds `llm_chain_kwargs` to `BaseRetrievalQA.from_llm` (#14224) - **Description:** Adds `llm_chain_kwargs` to `BaseRetrievalQA.from_llm` so these can be passed to the LLM at runtime, - **Issue:** https://github.com/langchain-ai/langchain/issues/14216, --------- Signed-off-by: ugm2 --- libs/langchain/langchain/chains/retrieval_qa/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libs/langchain/langchain/chains/retrieval_qa/base.py b/libs/langchain/langchain/chains/retrieval_qa/base.py index a7558b87375e6..185900ca32c2b 100644 --- a/libs/langchain/langchain/chains/retrieval_qa/base.py +++ b/libs/langchain/langchain/chains/retrieval_qa/base.py @@ -68,11 +68,14 @@ def from_llm( llm: BaseLanguageModel, prompt: Optional[PromptTemplate] = None, callbacks: Callbacks = None, + llm_chain_kwargs: Optional[dict] = None, **kwargs: Any, ) -> BaseRetrievalQA: """Initialize from LLM.""" _prompt = prompt or PROMPT_SELECTOR.get_prompt(llm) - llm_chain = LLMChain(llm=llm, prompt=_prompt, callbacks=callbacks) + llm_chain = LLMChain( + llm=llm, prompt=_prompt, callbacks=callbacks, **(llm_chain_kwargs or {}) + ) document_prompt = PromptTemplate( input_variables=["page_content"], template="Context:\n{page_content}" ) From ca8a022cd937ba398bb5544f4428f6ceafe56b84 Mon Sep 17 00:00:00 2001 From: Hieu Lam Date: Tue, 5 Dec 2023 02:43:04 +0700 Subject: [PATCH 04/49] Fixed OpenAIFunctionsAgent not returning when receiving AgentFinish (#14236) **Description:** The way the condition is checked in the `return_stopped_response` function of `OpenAIAgent` may not be correct, when the value returned is `AgentFinish` from the tools it does not work properly. Thanks for review, @baskaryan, @eyurtsev, @hwchase17. --- libs/langchain/langchain/agents/openai_functions_agent/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain/langchain/agents/openai_functions_agent/base.py b/libs/langchain/langchain/agents/openai_functions_agent/base.py index 9045d836c8feb..1af9ff39ac0d6 100644 --- a/libs/langchain/langchain/agents/openai_functions_agent/base.py +++ b/libs/langchain/langchain/agents/openai_functions_agent/base.py @@ -162,7 +162,7 @@ def return_stopped_response( agent_decision = self.plan( intermediate_steps, with_functions=False, **kwargs ) - if type(agent_decision) == AgentFinish: + if isinstance(agent_decision, AgentFinish): return agent_decision else: raise ValueError( From 8504ec56e4fc25308ba5baa4beaca944d9ff3371 Mon Sep 17 00:00:00 2001 From: umair mehmood Date: Tue, 5 Dec 2023 00:53:34 +0500 Subject: [PATCH 05/49] fixed: ModuleNotFoundError: No module named 'clarifai.auth' (#14215) Updated the clarifai imports fixed: #14175 @efriis @baskaryan --- libs/langchain/langchain/embeddings/clarifai.py | 2 +- libs/langchain/langchain/llms/clarifai.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/embeddings/clarifai.py b/libs/langchain/langchain/embeddings/clarifai.py index 805cb04e6aade..2f54bf5138fe9 100644 --- a/libs/langchain/langchain/embeddings/clarifai.py +++ b/libs/langchain/langchain/embeddings/clarifai.py @@ -64,8 +64,8 @@ def validate_environment(cls, values: Dict) -> Dict: raise ValueError("Please provide a model_id.") try: - from clarifai.auth.helper import ClarifaiAuthHelper from clarifai.client import create_stub + from clarifai.client.auth.helper import ClarifaiAuthHelper except ImportError: raise ImportError( "Could not import clarifai python package. " diff --git a/libs/langchain/langchain/llms/clarifai.py b/libs/langchain/langchain/llms/clarifai.py index 632d45fe06c45..40fe0c536fcd9 100644 --- a/libs/langchain/langchain/llms/clarifai.py +++ b/libs/langchain/langchain/llms/clarifai.py @@ -71,8 +71,8 @@ def validate_environment(cls, values: Dict) -> Dict: raise ValueError("Please provide a model_id.") try: - from clarifai.auth.helper import ClarifaiAuthHelper from clarifai.client import create_stub + from clarifai.client.auth.helper import ClarifaiAuthHelper except ImportError: raise ImportError( "Could not import clarifai python package. " From e32185193e36cdab63f4fb9f8ccdde99e2f9a155 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 4 Dec 2023 11:58:52 -0800 Subject: [PATCH 06/49] Harrison/embass (#14242) Co-authored-by: Julius Lipp --- .../document_loaders/embaas.ipynb | 167 ------------ .../langchain/document_loaders/__init__.py | 3 - .../langchain/document_loaders/embaas.py | 244 ------------------ .../document_loaders/test_embaas.py | 59 ----- .../document_loaders/test_imports.py | 2 - 5 files changed, 475 deletions(-) delete mode 100644 docs/docs/integrations/document_loaders/embaas.ipynb delete mode 100644 libs/langchain/langchain/document_loaders/embaas.py delete mode 100644 libs/langchain/tests/integration_tests/document_loaders/test_embaas.py diff --git a/docs/docs/integrations/document_loaders/embaas.ipynb b/docs/docs/integrations/document_loaders/embaas.ipynb deleted file mode 100644 index 26129822674ce..0000000000000 --- a/docs/docs/integrations/document_loaders/embaas.ipynb +++ /dev/null @@ -1,167 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "# Embaas\n", - "[embaas](https://embaas.io) is a fully managed NLP API service that offers features like embedding generation, document text extraction, document to embeddings and more. You can choose a [variety of pre-trained models](https://embaas.io/docs/models/embeddings).\n", - "\n", - "### Prerequisites\n", - "Create a free embaas account at [https://embaas.io/register](https://embaas.io/register) and generate an [API key](https://embaas.io/dashboard/api-keys)\n", - "\n", - "### Document Text Extraction API\n", - "The document text extraction API allows you to extract the text from a given document. The API supports a variety of document formats, including PDF, mp3, mp4 and more. For a full list of supported formats, check out the API docs (link below)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Set API key\n", - "embaas_api_key = \"YOUR_API_KEY\"\n", - "# or set environment variable\n", - "os.environ[\"EMBAAS_API_KEY\"] = \"YOUR_API_KEY\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "#### Using a blob (bytes)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from langchain.document_loaders.blob_loaders import Blob\n", - "from langchain.document_loaders.embaas import EmbaasBlobLoader" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "blob_loader = EmbaasBlobLoader()\n", - "blob = Blob.from_path(\"example.pdf\")\n", - "documents = blob_loader.load(blob)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-06-12T22:19:48.380467Z", - "start_time": "2023-06-12T22:19:48.366886Z" - }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# You can also directly create embeddings with your preferred embeddings model\n", - "blob_loader = EmbaasBlobLoader(params={\"model\": \"e5-large-v2\", \"should_embed\": True})\n", - "blob = Blob.from_path(\"example.pdf\")\n", - "documents = blob_loader.load(blob)\n", - "\n", - "print(documents[0][\"metadata\"][\"embedding\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "#### Using a file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from langchain.document_loaders.embaas import EmbaasLoader" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "file_loader = EmbaasLoader(file_path=\"example.pdf\")\n", - "documents = file_loader.load()" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "ExecuteTime": { - "end_time": "2023-06-12T22:24:31.894665Z", - "start_time": "2023-06-12T22:24:31.880857Z" - }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# Disable automatic text splitting\n", - "file_loader = EmbaasLoader(file_path=\"example.mp3\", params={\"should_chunk\": False})\n", - "documents = file_loader.load()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "For more detailed information about the embaas document text extraction API, please refer to [the official embaas API documentation](https://embaas.io/api-reference)." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/libs/langchain/langchain/document_loaders/__init__.py b/libs/langchain/langchain/document_loaders/__init__.py index 119496f9c66e5..d52d3b955b072 100644 --- a/libs/langchain/langchain/document_loaders/__init__.py +++ b/libs/langchain/langchain/document_loaders/__init__.py @@ -77,7 +77,6 @@ OutlookMessageLoader, UnstructuredEmailLoader, ) -from langchain.document_loaders.embaas import EmbaasBlobLoader, EmbaasLoader from langchain.document_loaders.epub import UnstructuredEPubLoader from langchain.document_loaders.etherscan import EtherscanLoader from langchain.document_loaders.evernote import EverNoteLoader @@ -259,8 +258,6 @@ "Docx2txtLoader", "DropboxLoader", "DuckDBLoader", - "EmbaasBlobLoader", - "EmbaasLoader", "EtherscanLoader", "EverNoteLoader", "FacebookChatLoader", diff --git a/libs/langchain/langchain/document_loaders/embaas.py b/libs/langchain/langchain/document_loaders/embaas.py deleted file mode 100644 index 6f4fc800a1266..0000000000000 --- a/libs/langchain/langchain/document_loaders/embaas.py +++ /dev/null @@ -1,244 +0,0 @@ -import base64 -import warnings -from typing import Any, Dict, Iterator, List, Optional - -import requests -from langchain_core.documents import Document -from langchain_core.pydantic_v1 import BaseModel, root_validator, validator -from typing_extensions import NotRequired, TypedDict - -from langchain.document_loaders.base import BaseBlobParser, BaseLoader -from langchain.document_loaders.blob_loaders import Blob -from langchain.text_splitter import TextSplitter -from langchain.utils import get_from_dict_or_env - -EMBAAS_DOC_API_URL = "https://api.embaas.io/v1/document/extract-text/bytes/" - - -class EmbaasDocumentExtractionParameters(TypedDict): - """Parameters for the embaas document extraction API.""" - - mime_type: NotRequired[str] - """The mime type of the document.""" - file_extension: NotRequired[str] - """The file extension of the document.""" - file_name: NotRequired[str] - """The file name of the document.""" - - should_chunk: NotRequired[bool] - """Whether to chunk the document into pages.""" - chunk_size: NotRequired[int] - """The maximum size of the text chunks.""" - chunk_overlap: NotRequired[int] - """The maximum overlap allowed between chunks.""" - chunk_splitter: NotRequired[str] - """The text splitter class name for creating chunks.""" - separators: NotRequired[List[str]] - """The separators for chunks.""" - - should_embed: NotRequired[bool] - """Whether to create embeddings for the document in the response.""" - model: NotRequired[str] - """The model to pass to the Embaas document extraction API.""" - instruction: NotRequired[str] - """The instruction to pass to the Embaas document extraction API.""" - - -class EmbaasDocumentExtractionPayload(EmbaasDocumentExtractionParameters): - """Payload for the Embaas document extraction API.""" - - bytes: str - """The base64 encoded bytes of the document to extract text from.""" - - -class BaseEmbaasLoader(BaseModel): - """Base loader for `Embaas` document extraction API.""" - - embaas_api_key: Optional[str] = None - """The API key for the Embaas document extraction API.""" - api_url: str = EMBAAS_DOC_API_URL - """The URL of the Embaas document extraction API.""" - params: EmbaasDocumentExtractionParameters = EmbaasDocumentExtractionParameters() - """Additional parameters to pass to the Embaas document extraction API.""" - - @root_validator(pre=True) - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - embaas_api_key = get_from_dict_or_env( - values, "embaas_api_key", "EMBAAS_API_KEY" - ) - values["embaas_api_key"] = embaas_api_key - return values - - -class EmbaasBlobLoader(BaseEmbaasLoader, BaseBlobParser): - """Load `Embaas` blob. - - To use, you should have the - environment variable ``EMBAAS_API_KEY`` set with your API key, or pass - it as a named parameter to the constructor. - - Example: - .. code-block:: python - - # Default parsing - from langchain.document_loaders.embaas import EmbaasBlobLoader - loader = EmbaasBlobLoader() - blob = Blob.from_path(path="example.mp3") - documents = loader.parse(blob=blob) - - # Custom api parameters (create embeddings automatically) - from langchain.document_loaders.embaas import EmbaasBlobLoader - loader = EmbaasBlobLoader( - params={ - "should_embed": True, - "model": "e5-large-v2", - "chunk_size": 256, - "chunk_splitter": "CharacterTextSplitter" - } - ) - blob = Blob.from_path(path="example.pdf") - documents = loader.parse(blob=blob) - """ - - def lazy_parse(self, blob: Blob) -> Iterator[Document]: - """Parses the blob lazily. - - Args: - blob: The blob to parse. - """ - yield from self._get_documents(blob=blob) - - @staticmethod - def _api_response_to_documents(chunks: List[Dict[str, Any]]) -> List[Document]: - """Convert the API response to a list of documents.""" - docs = [] - for chunk in chunks: - metadata = chunk["metadata"] - if chunk.get("embedding", None) is not None: - metadata["embedding"] = chunk["embedding"] - doc = Document(page_content=chunk["text"], metadata=metadata) - docs.append(doc) - - return docs - - def _generate_payload(self, blob: Blob) -> EmbaasDocumentExtractionPayload: - """Generates payload for the API request.""" - base64_byte_str = base64.b64encode(blob.as_bytes()).decode() - payload: EmbaasDocumentExtractionPayload = EmbaasDocumentExtractionPayload( - bytes=base64_byte_str, - # Workaround for mypy issue: https://github.com/python/mypy/issues/9408 - # type: ignore - **self.params, - ) - - if blob.mimetype is not None and payload.get("mime_type", None) is None: - payload["mime_type"] = blob.mimetype - - return payload - - def _handle_request( - self, payload: EmbaasDocumentExtractionPayload - ) -> List[Document]: - """Sends a request to the embaas API and handles the response.""" - headers = { - "Authorization": f"Bearer {self.embaas_api_key}", - "Content-Type": "application/json", - } - - response = requests.post(self.api_url, headers=headers, json=payload) - response.raise_for_status() - - parsed_response = response.json() - return EmbaasBlobLoader._api_response_to_documents( - chunks=parsed_response["data"]["chunks"] - ) - - def _get_documents(self, blob: Blob) -> Iterator[Document]: - """Get the documents from the blob.""" - payload = self._generate_payload(blob=blob) - - try: - documents = self._handle_request(payload=payload) - except requests.exceptions.RequestException as e: - if e.response is None or not e.response.text: - raise ValueError( - f"Error raised by Embaas document text extraction API: {e}" - ) - - parsed_response = e.response.json() - if "message" in parsed_response: - raise ValueError( - f"Validation Error raised by Embaas document text extraction API:" - f" {parsed_response['message']}" - ) - raise - - yield from documents - - -class EmbaasLoader(BaseEmbaasLoader, BaseLoader): - """Load from `Embaas`. - - To use, you should have the - environment variable ``EMBAAS_API_KEY`` set with your API key, or pass - it as a named parameter to the constructor. - - Example: - .. code-block:: python - - # Default parsing - from langchain.document_loaders.embaas import EmbaasLoader - loader = EmbaasLoader(file_path="example.mp3") - documents = loader.load() - - # Custom api parameters (create embeddings automatically) - from langchain.document_loaders.embaas import EmbaasBlobLoader - loader = EmbaasBlobLoader( - file_path="example.pdf", - params={ - "should_embed": True, - "model": "e5-large-v2", - "chunk_size": 256, - "chunk_splitter": "CharacterTextSplitter" - } - ) - documents = loader.load() - """ - - file_path: str - """The path to the file to load.""" - blob_loader: Optional[EmbaasBlobLoader] - """The blob loader to use. If not provided, a default one will be created.""" - - @validator("blob_loader", always=True) - def validate_blob_loader( - cls, v: EmbaasBlobLoader, values: Dict - ) -> EmbaasBlobLoader: - return v or EmbaasBlobLoader( - embaas_api_key=values["embaas_api_key"], - api_url=values["api_url"], - params=values["params"], - ) - - def lazy_load(self) -> Iterator[Document]: - """Load the documents from the file path lazily.""" - blob = Blob.from_path(path=self.file_path) - - assert self.blob_loader is not None - # Should never be None, but mypy doesn't know that. - yield from self.blob_loader.lazy_parse(blob=blob) - - def load(self) -> List[Document]: - return list(self.lazy_load()) - - def load_and_split( - self, text_splitter: Optional[TextSplitter] = None - ) -> List[Document]: - if self.params.get("should_embed", False): - warnings.warn( - "Embeddings are not supported with load_and_split." - " Use the API splitter to properly generate embeddings." - " For more information see embaas.io docs." - ) - return super().load_and_split(text_splitter=text_splitter) diff --git a/libs/langchain/tests/integration_tests/document_loaders/test_embaas.py b/libs/langchain/tests/integration_tests/document_loaders/test_embaas.py deleted file mode 100644 index 2170a143c66ac..0000000000000 --- a/libs/langchain/tests/integration_tests/document_loaders/test_embaas.py +++ /dev/null @@ -1,59 +0,0 @@ -from typing import Any -from unittest.mock import MagicMock, patch - -import responses - -from langchain.document_loaders import EmbaasBlobLoader, EmbaasLoader -from langchain.document_loaders.blob_loaders import Blob -from langchain.document_loaders.embaas import EMBAAS_DOC_API_URL - - -@responses.activate -def test_handle_request() -> None: - responses.add( - responses.POST, - EMBAAS_DOC_API_URL, - json={ - "data": { - "chunks": [ - { - "text": "Hello", - "metadata": {"start_page": 1, "end_page": 2}, - "embeddings": [0.0], - } - ] - } - }, - status=200, - ) - - loader = EmbaasBlobLoader(embaas_api_key="api_key", params={"should_embed": True}) - documents = loader.parse(blob=Blob.from_data(data="Hello")) - assert len(documents) == 1 - assert documents[0].page_content == "Hello" - assert documents[0].metadata["start_page"] == 1 - assert documents[0].metadata["end_page"] == 2 - assert documents[0].metadata["embeddings"] == [0.0] - - -@responses.activate -def test_handle_request_exception() -> None: - responses.add( - responses.POST, - EMBAAS_DOC_API_URL, - json={"message": "Invalid request"}, - status=400, - ) - loader = EmbaasBlobLoader(embaas_api_key="api_key") - try: - loader.parse(blob=Blob.from_data(data="Hello")) - except Exception as e: - assert "Invalid request" in str(e) - - -@patch.object(EmbaasBlobLoader, "_handle_request") -def test_load(mock_handle_request: Any) -> None: - mock_handle_request.return_value = [MagicMock()] - loader = EmbaasLoader(file_path="test_embaas.py", embaas_api_key="api_key") - documents = loader.load() - assert len(documents) == 1 diff --git a/libs/langchain/tests/unit_tests/document_loaders/test_imports.py b/libs/langchain/tests/unit_tests/document_loaders/test_imports.py index db754275234ba..5da2e800ab87b 100644 --- a/libs/langchain/tests/unit_tests/document_loaders/test_imports.py +++ b/libs/langchain/tests/unit_tests/document_loaders/test_imports.py @@ -52,8 +52,6 @@ "Docx2txtLoader", "DropboxLoader", "DuckDBLoader", - "EmbaasBlobLoader", - "EmbaasLoader", "EtherscanLoader", "EverNoteLoader", "FacebookChatLoader", From 641e401ba857e7f6f895b54fbb02c0560d2283a5 Mon Sep 17 00:00:00 2001 From: Perry Lee Date: Mon, 4 Dec 2023 12:03:47 -0800 Subject: [PATCH 07/49] Shorten wget commands (#14211) - **Description:** The commands can be more efficient if the output name is set to the destined filename instead of renaming in the second command. --- docs/docs/integrations/toolkits/openapi.ipynb | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/docs/integrations/toolkits/openapi.ipynb b/docs/docs/integrations/toolkits/openapi.ipynb index fa7e5e98304c4..3460495be8c3d 100644 --- a/docs/docs/integrations/toolkits/openapi.ipynb +++ b/docs/docs/integrations/toolkits/openapi.ipynb @@ -93,12 +93,9 @@ } ], "source": [ - "!wget https://raw.githubusercontent.com/openai/openai-openapi/master/openapi.yaml\n", - "!mv openapi.yaml openai_openapi.yaml\n", - "!wget https://www.klarna.com/us/shopping/public/openai/v0/api-docs\n", - "!mv api-docs klarna_openapi.yaml\n", - "!wget https://raw.githubusercontent.com/APIs-guru/openapi-directory/main/APIs/spotify.com/1.0.0/openapi.yaml\n", - "!mv openapi.yaml spotify_openapi.yaml" + "!wget https://raw.githubusercontent.com/openai/openai-openapi/master/openapi.yaml -O openai_openapi.yaml\n", + "!wget https://www.klarna.com/us/shopping/public/openai/v0/api-docs -O klarna_openapi.yaml\n", + "!wget https://raw.githubusercontent.com/APIs-guru/openapi-directory/main/APIs/spotify.com/1.0.0/openapi.yaml -O spotify_openapi.yaml" ] }, { From 702a6d7044b01b82730e32dd61c83fd99c6272ab Mon Sep 17 00:00:00 2001 From: Bob Lin Date: Mon, 4 Dec 2023 14:06:04 -0600 Subject: [PATCH 08/49] Closed #14159 (#14165) ### Description Fix: #14159 Use `from pydantic.v1 import BaseModel, Field` instead of `from pydantic import BaseModel, Field` ### [lin_bob57617](https://twitter.com/lin_bob57617) --- docs/docs/use_cases/tagging.ipynb | 32 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/docs/use_cases/tagging.ipynb b/docs/docs/use_cases/tagging.ipynb index cffa8bebfd32d..6d820e331b661 100644 --- a/docs/docs/use_cases/tagging.ipynb +++ b/docs/docs/use_cases/tagging.ipynb @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "bafb496a", "metadata": {}, "outputs": [], @@ -77,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "id": "39f3ce3e", "metadata": {}, "outputs": [], @@ -98,7 +98,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "5509b6a6", "metadata": {}, "outputs": [ @@ -108,7 +108,7 @@ "{'sentiment': 'positive', 'language': 'Spanish'}" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "9154474c", "metadata": {}, "outputs": [ @@ -130,7 +130,7 @@ "{'sentiment': 'enojado', 'aggressiveness': 1, 'language': 'es'}" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -178,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "id": "6a5f7961", "metadata": {}, "outputs": [], @@ -201,7 +201,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "id": "e5a5881f", "metadata": {}, "outputs": [], @@ -318,17 +318,17 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 5, "id": "bf1f367e", "metadata": {}, "outputs": [], "source": [ - "from pydantic import BaseModel, Field" + "from langchain.pydantic_v1 import BaseModel, Field" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 7, "id": "83a2e826", "metadata": {}, "outputs": [], @@ -347,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 8, "id": "6e404892", "metadata": {}, "outputs": [], @@ -357,7 +357,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 9, "id": "b5fc43c4", "metadata": {}, "outputs": [], @@ -368,7 +368,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 10, "id": "5074bcc3", "metadata": {}, "outputs": [ @@ -378,7 +378,7 @@ "Tags(sentiment='sad', aggressiveness=5, language='spanish')" ] }, - "execution_count": 17, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -415,7 +415,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.11.5" } }, "nbformat": 4, From aaabc1574fbeb8e0b4e2d4d9d0660df68b4a4e47 Mon Sep 17 00:00:00 2001 From: Dmitrii Rashchenko Date: Mon, 4 Dec 2023 22:08:51 +0200 Subject: [PATCH 09/49] Support of custom hugging face inference endpoints url (#14125) - **Description:** to support not only publicly available Hugging Face endpoints, but also protected ones (created with "Inference Endpoints" Hugging Face feature), I have added ability to specify custom api_url. But if not specified, default behaviour won't change - **Issue:** #9181, - **Dependencies:** no extra dependencies --- libs/langchain/langchain/embeddings/huggingface.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/langchain/langchain/embeddings/huggingface.py b/libs/langchain/langchain/embeddings/huggingface.py index 823e1902a3c15..a835da8775b7d 100644 --- a/libs/langchain/langchain/embeddings/huggingface.py +++ b/libs/langchain/langchain/embeddings/huggingface.py @@ -279,9 +279,15 @@ class HuggingFaceInferenceAPIEmbeddings(BaseModel, Embeddings): """Your API key for the HuggingFace Inference API.""" model_name: str = "sentence-transformers/all-MiniLM-L6-v2" """The name of the model to use for text embeddings.""" + api_url: Optional[str] = None + """Custom inference endpoint url. None for using default public url.""" @property def _api_url(self) -> str: + return self.api_url or self._default_api_url + + @property + def _default_api_url(self) -> str: return ( "https://api-inference.huggingface.co" "/pipeline" From 0f020813922727febd856368cf328e8e91925b83 Mon Sep 17 00:00:00 2001 From: billytrend-cohere <144115527+billytrend-cohere@users.noreply.github.com> Date: Mon, 4 Dec 2023 20:10:24 +0000 Subject: [PATCH 10/49] Add input_type override (#14068) Add option to override input_type for cohere's v3 embeddings models --------- Co-authored-by: Bagatur --- .../integrations/text_embedding/cohere.ipynb | 40 ++++++++---- libs/langchain/langchain/embeddings/cohere.py | 64 +++++++++---------- 2 files changed, 60 insertions(+), 44 deletions(-) diff --git a/docs/docs/integrations/text_embedding/cohere.ipynb b/docs/docs/integrations/text_embedding/cohere.ipynb index a23ffb5995823..5b0fbbdae1ba0 100644 --- a/docs/docs/integrations/text_embedding/cohere.ipynb +++ b/docs/docs/integrations/text_embedding/cohere.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "6b82f59f", "metadata": {}, "outputs": [], @@ -22,17 +22,17 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "26895c60", "metadata": {}, "outputs": [], "source": [ - "embeddings = CohereEmbeddings(cohere_api_key=cohere_api_key)" + "embeddings = CohereEmbeddings(model=\"embed-english-light-v3.0\")" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "eea52814", "metadata": {}, "outputs": [], @@ -42,22 +42,40 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "fbe167bf", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[-0.072631836, 0.06921387, -0.02658081, 0.022705078, 0.027328491, 0.046905518, -0.01838684, -0.029525757, 0.0041046143, -0.028198242, 0.0496521, 0.026901245, 0.03274536, 0.01574707, -0.081726074, -0.022369385, 0.049591064, 0.06549072, -0.015083313, -0.053863525, 0.098083496, 0.034698486, -0.08557129, -0.0024662018, -0.07519531, 0.03265381, 0.006046295, -0.0060691833, 0.032196045, 0.07537842, 9.024143e-05, -0.00869751, 0.022735596, 0.06329346, 0.068481445, -0.006778717, -0.07885742, 0.049560547, -0.008811951, 0.025253296, 0.050750732, -0.05343628, 0.051361084, -0.02319336, 0.026382446, 0.088378906, 0.03567505, -0.0736084, 0.039215088, -0.020584106, -0.03112793, -0.071777344, 0.018218994, -0.01876831, 0.040863037, 0.080078125, 0.046020508, -0.030792236, -0.011779785, -0.024871826, -0.06652832, 0.04748535, -0.038116455, 0.08453369, 0.08746338, 0.059509277, -0.037628174, -0.045410156, -0.054626465, -0.0036334991, -0.035949707, -0.011070251, 0.054534912, 0.0803833, 0.052734375, 0.06689453, 0.0074310303, 0.018249512, -0.023773193, 0.03845215, -0.113220215, 0.014251709, 0.028289795, -0.03942871, 0.029525757, 0.03036499, 0.035095215, 0.031829834, -0.0015306473, 0.027252197, 0.005088806, -0.035858154, -0.113220215, 0.021606445, 0.012046814, -0.06137085, 0.0057640076, -0.06994629, 0.02532959, 0.016952515, -0.010398865, -0.0066184998, -0.020904541, -0.12030029, 0.0036029816, -0.061553955, 0.023956299, -0.07330322, 0.013053894, -0.009613037, -0.062683105, 0.00013184547, 0.12030029, 0.028167725, 0.048614502, -0.09301758, -0.020324707, 0.022369385, -0.14025879, -0.052764893, 0.07220459, 0.028198242, 0.01499939, -0.029449463, 0.004711151, -0.05947876, 0.1640625, -0.09240723, 0.019500732, -0.0031089783, 0.0032081604, -0.0049934387, -0.01676941, 0.002691269, 0.02848816, 0.013504028, -0.057800293, 0.049041748, -0.022384644, 0.05517578, -0.031982422, 0.055389404, 0.0859375, 0.019866943, -0.052978516, 0.030929565, -0.15979004, 0.068481445, -0.020080566, -0.033477783, 0.07922363, -0.020736694, -0.025680542, 0.054016113, -0.028839111, -0.016189575, 0.03564453, 0.0001078248, 0.06304932, -0.022781372, 0.06555176, 0.010093689, 0.03286743, 0.14111328, -0.008468628, -0.04849243, 0.04525757, 0.065979004, -0.012138367, -0.017044067, 0.059509277, 0.035339355, -0.017807007, -0.027267456, -0.0034656525, -0.02078247, -0.033477783, 0.05041504, -0.043518066, -0.064208984, 0.034942627, -0.009300232, -0.08148193, 0.007774353, -0.03540039, -0.008255005, -0.1060791, -0.0703125, 0.091308594, 0.10095215, -0.081970215, 0.02355957, -0.026382446, -0.0070610046, -0.051208496, -0.014961243, 0.07269287, -0.033721924, 0.017669678, -0.08972168, 0.035339355, 0.03579712, -0.07299805, -0.014144897, -0.008850098, 0.023742676, -0.05847168, -0.07873535, -0.015388489, -0.039642334, -0.028930664, 0.008926392, -0.040283203, -0.02897644, -0.013557434, -0.006088257, 0.024169922, -0.10217285, 0.014526367, 0.007381439, -0.0005607605, -0.058410645, -0.008399963, -0.08001709, 0.05065918, 0.01727295, 0.012191772, -0.016571045, 0.03717041, -0.02607727, 0.060760498, 0.057678223, -0.06585693, 0.059173584, 0.023117065, -0.034118652, -0.03189087, 0.010429382, 0.010368347, -0.011230469, -0.020980835, -0.04019165, 0.048187256, -0.019638062, -0.024414062, -0.0019989014, 0.04336548, 0.117248535, 0.00033903122, -0.0014419556, 0.013946533, -0.11541748, 0.030059814, -0.06500244, 0.05441284, 0.021759033, 0.030380249, 0.080566406, 0.02331543, -0.04586792, 0.037322998, 0.011390686, -0.01374054, 0.1459961, -0.050964355, 0.081970215, -0.061645508, 0.07067871, -0.036956787, 0.060455322, 0.051361084, -0.05831909, 0.05328369, -0.008628845, 0.054534912, -0.047332764, 0.030578613, -0.048828125, -0.018112183, 0.022979736, -0.07318115, -0.0423584, -0.094177246, -0.04071045, 0.054260254, 0.0423584, 0.075805664, -0.06365967, 0.009269714, -0.054779053, -0.007637024, -0.01876831, 0.08453369, 0.058898926, -0.07727051, 0.04360962, 0.010574341, -0.027694702, 0.024917603, -0.0463562, 0.040222168, -0.05496216, -0.048461914, 0.013710022, -0.1038208, 0.027954102, 0.031951904, -0.05618286, 0.0025730133, -0.06549072, -0.049957275, 0.01499939, -0.11090088, -0.009017944, 0.021835327, 0.03503418, 0.058746338, -0.12756348, -0.0345459, -0.04699707, -0.029830933, -0.06726074, 0.010612488, -0.024108887, 0.016464233, 0.013076782, -0.06298828, -0.0657959, -0.0025234222, -0.0625, 0.013420105, 0.05810547, -0.006362915, -0.028625488, 0.06085205, 0.12310791, 0.04751587, -0.027740479, -0.02029419, -0.02293396, 0.048858643, -0.006793976, -0.0061073303, 0.029067993, -0.0076942444, -0.00088596344, -0.007446289, 0.12756348, 0.082092285, -0.0037841797, 0.03866577, 0.040374756, 0.019104004, -0.0345459, 0.019042969, -0.038116455, 0.045410156, 0.062683105, -0.024963379, 0.085632324, 0.005897522, 0.008285522, 0.008811951, 0.026504517, 0.025558472, -0.005554199, -0.017822266, -0.112854004, -0.03768921, -0.00097227097, -0.061401367, 0.050567627, -0.010734558, 0.07220459, 0.03643799, 0.0007662773, -0.020980835, -0.04711914, -0.03488159, -0.09655762, 0.0048561096, 0.028030396, 0.04586792, -0.014915466]\n" + ] + } + ], "source": [ - "query_result = embeddings.embed_query(text)" + "query_result = embeddings.embed_query(text)\n", + "print(query_result)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "38ad3b20", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-0.072631836, 0.06921387, -0.02658081, 0.022705078, 0.027328491, 0.046905518, -0.01838684, -0.029525757, 0.0041046143, -0.028198242, 0.0496521, 0.026901245, 0.03274536, 0.01574707, -0.081726074, -0.022369385, 0.049591064, 0.06549072, -0.015083313, -0.053863525, 0.098083496, 0.034698486, -0.08557129, -0.0024662018, -0.07519531, 0.03265381, 0.006046295, -0.0060691833, 0.032196045, 0.07537842, 9.024143e-05, -0.00869751, 0.022735596, 0.06329346, 0.068481445, -0.006778717, -0.07885742, 0.049560547, -0.008811951, 0.025253296, 0.050750732, -0.05343628, 0.051361084, -0.02319336, 0.026382446, 0.088378906, 0.03567505, -0.0736084, 0.039215088, -0.020584106, -0.03112793, -0.071777344, 0.018218994, -0.01876831, 0.040863037, 0.080078125, 0.046020508, -0.030792236, -0.011779785, -0.024871826, -0.06652832, 0.04748535, -0.038116455, 0.08453369, 0.08746338, 0.059509277, -0.037628174, -0.045410156, -0.054626465, -0.0036334991, -0.035949707, -0.011070251, 0.054534912, 0.0803833, 0.052734375, 0.06689453, 0.0074310303, 0.018249512, -0.023773193, 0.03845215, -0.113220215, 0.014251709, 0.028289795, -0.03942871, 0.029525757, 0.03036499, 0.035095215, 0.031829834, -0.0015306473, 0.027252197, 0.005088806, -0.035858154, -0.113220215, 0.021606445, 0.012046814, -0.06137085, 0.0057640076, -0.06994629, 0.02532959, 0.016952515, -0.010398865, -0.0066184998, -0.020904541, -0.12030029, 0.0036029816, -0.061553955, 0.023956299, -0.07330322, 0.013053894, -0.009613037, -0.062683105, 0.00013184547, 0.12030029, 0.028167725, 0.048614502, -0.09301758, -0.020324707, 0.022369385, -0.14025879, -0.052764893, 0.07220459, 0.028198242, 0.01499939, -0.029449463, 0.004711151, -0.05947876, 0.1640625, -0.09240723, 0.019500732, -0.0031089783, 0.0032081604, -0.0049934387, -0.01676941, 0.002691269, 0.02848816, 0.013504028, -0.057800293, 0.049041748, -0.022384644, 0.05517578, -0.031982422, 0.055389404, 0.0859375, 0.019866943, -0.052978516, 0.030929565, -0.15979004, 0.068481445, -0.020080566, -0.033477783, 0.07922363, -0.020736694, -0.025680542, 0.054016113, -0.028839111, -0.016189575, 0.03564453, 0.0001078248, 0.06304932, -0.022781372, 0.06555176, 0.010093689, 0.03286743, 0.14111328, -0.008468628, -0.04849243, 0.04525757, 0.065979004, -0.012138367, -0.017044067, 0.059509277, 0.035339355, -0.017807007, -0.027267456, -0.0034656525, -0.02078247, -0.033477783, 0.05041504, -0.043518066, -0.064208984, 0.034942627, -0.009300232, -0.08148193, 0.007774353, -0.03540039, -0.008255005, -0.1060791, -0.0703125, 0.091308594, 0.10095215, -0.081970215, 0.02355957, -0.026382446, -0.0070610046, -0.051208496, -0.014961243, 0.07269287, -0.033721924, 0.017669678, -0.08972168, 0.035339355, 0.03579712, -0.07299805, -0.014144897, -0.008850098, 0.023742676, -0.05847168, -0.07873535, -0.015388489, -0.039642334, -0.028930664, 0.008926392, -0.040283203, -0.02897644, -0.013557434, -0.006088257, 0.024169922, -0.10217285, 0.014526367, 0.007381439, -0.0005607605, -0.058410645, -0.008399963, -0.08001709, 0.05065918, 0.01727295, 0.012191772, -0.016571045, 0.03717041, -0.02607727, 0.060760498, 0.057678223, -0.06585693, 0.059173584, 0.023117065, -0.034118652, -0.03189087, 0.010429382, 0.010368347, -0.011230469, -0.020980835, -0.04019165, 0.048187256, -0.019638062, -0.024414062, -0.0019989014, 0.04336548, 0.117248535, 0.00033903122, -0.0014419556, 0.013946533, -0.11541748, 0.030059814, -0.06500244, 0.05441284, 0.021759033, 0.030380249, 0.080566406, 0.02331543, -0.04586792, 0.037322998, 0.011390686, -0.01374054, 0.1459961, -0.050964355, 0.081970215, -0.061645508, 0.07067871, -0.036956787, 0.060455322, 0.051361084, -0.05831909, 0.05328369, -0.008628845, 0.054534912, -0.047332764, 0.030578613, -0.048828125, -0.018112183, 0.022979736, -0.07318115, -0.0423584, -0.094177246, -0.04071045, 0.054260254, 0.0423584, 0.075805664, -0.06365967, 0.009269714, -0.054779053, -0.007637024, -0.01876831, 0.08453369, 0.058898926, -0.07727051, 0.04360962, 0.010574341, -0.027694702, 0.024917603, -0.0463562, 0.040222168, -0.05496216, -0.048461914, 0.013710022, -0.1038208, 0.027954102, 0.031951904, -0.05618286, 0.0025730133, -0.06549072, -0.049957275, 0.01499939, -0.11090088, -0.009017944, 0.021835327, 0.03503418, 0.058746338, -0.12756348, -0.0345459, -0.04699707, -0.029830933, -0.06726074, 0.010612488, -0.024108887, 0.016464233, 0.013076782, -0.06298828, -0.0657959, -0.0025234222, -0.0625, 0.013420105, 0.05810547, -0.006362915, -0.028625488, 0.06085205, 0.12310791, 0.04751587, -0.027740479, -0.02029419, -0.02293396, 0.048858643, -0.006793976, -0.0061073303, 0.029067993, -0.0076942444, -0.00088596344, -0.007446289, 0.12756348, 0.082092285, -0.0037841797, 0.03866577, 0.040374756, 0.019104004, -0.0345459, 0.019042969, -0.038116455, 0.045410156, 0.062683105, -0.024963379, 0.085632324, 0.005897522, 0.008285522, 0.008811951, 0.026504517, 0.025558472, -0.005554199, -0.017822266, -0.112854004, -0.03768921, -0.00097227097, -0.061401367, 0.050567627, -0.010734558, 0.07220459, 0.03643799, 0.0007662773, -0.020980835, -0.04711914, -0.03488159, -0.09655762, 0.0048561096, 0.028030396, 0.04586792, -0.014915466]]\n" + ] + } + ], "source": [ - "doc_result = embeddings.embed_documents([text])" + "doc_result = embeddings.embed_documents([text])\n", + "print(doc_result)" ] }, { @@ -85,7 +103,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.11.5" }, "vscode": { "interpreter": { diff --git a/libs/langchain/langchain/embeddings/cohere.py b/libs/langchain/langchain/embeddings/cohere.py index 0f2f778827ded..08328825348e5 100644 --- a/libs/langchain/langchain/embeddings/cohere.py +++ b/libs/langchain/langchain/embeddings/cohere.py @@ -1,8 +1,7 @@ from typing import Any, Dict, List, Optional -from langchain_core.embeddings import Embeddings -from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator - +from langchain.pydantic_v1 import BaseModel, Extra, root_validator +from langchain.schema.embeddings import Embeddings from langchain.utils import get_from_dict_or_env @@ -18,7 +17,8 @@ class CohereEmbeddings(BaseModel, Embeddings): from langchain.embeddings import CohereEmbeddings cohere = CohereEmbeddings( - model="embed-english-light-v3.0", cohere_api_key="my-api-key" + model="embed-english-light-v3.0", + cohere_api_key="my-api-key" ) """ @@ -78,8 +78,30 @@ def validate_environment(cls, values: Dict) -> Dict: ) return values + def embed( + self, texts: List[str], *, input_type: Optional[str] = None + ) -> List[List[float]]: + embeddings = self.client.embed( + model=self.model, + texts=texts, + input_type=input_type, + truncate=self.truncate, + ).embeddings + return [list(map(float, e)) for e in embeddings] + + async def aembed( + self, texts: List[str], *, input_type: Optional[str] = None + ) -> List[List[float]]: + embeddings = await self.async_client.embed( + model=self.model, + texts=texts, + input_type=input_type, + truncate=self.truncate, + ).embeddings + return [list(map(float, e)) for e in embeddings] + def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Call out to Cohere's embedding endpoint. + """Embed a list of document texts. Args: texts: The list of texts to embed. @@ -87,13 +109,7 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]: Returns: List of embeddings, one for each text. """ - embeddings = self.client.embed( - model=self.model, - texts=texts, - input_type="search_document", - truncate=self.truncate, - ).embeddings - return [list(map(float, e)) for e in embeddings] + return self.embed(texts, input_type="search_document") async def aembed_documents(self, texts: List[str]) -> List[List[float]]: """Async call out to Cohere's embedding endpoint. @@ -104,13 +120,7 @@ async def aembed_documents(self, texts: List[str]) -> List[List[float]]: Returns: List of embeddings, one for each text. """ - embeddings = await self.async_client.embed( - model=self.model, - texts=texts, - input_type="search_document", - truncate=self.truncate, - ) - return [list(map(float, e)) for e in embeddings.embeddings] + return await self.aembed(texts, input_type="search_document") def embed_query(self, text: str) -> List[float]: """Call out to Cohere's embedding endpoint. @@ -121,13 +131,7 @@ def embed_query(self, text: str) -> List[float]: Returns: Embeddings for the text. """ - embeddings = self.client.embed( - model=self.model, - texts=[text], - input_type="search_query", - truncate=self.truncate, - ).embeddings - return [list(map(float, e)) for e in embeddings][0] + return self.embed([text], input_type="search_query")[0] async def aembed_query(self, text: str) -> List[float]: """Async call out to Cohere's embedding endpoint. @@ -138,10 +142,4 @@ async def aembed_query(self, text: str) -> List[float]: Returns: Embeddings for the text. """ - embeddings = await self.async_client.embed( - model=self.model, - texts=[text], - input_type="search_query", - truncate=self.truncate, - ) - return [list(map(float, e)) for e in embeddings.embeddings][0] + return (await self.aembed([text], input_type="search_query"))[0] From cd2028288e882f905931a639fa4380353de90b5c Mon Sep 17 00:00:00 2001 From: Bob Lin Date: Mon, 4 Dec 2023 14:12:30 -0600 Subject: [PATCH 11/49] Add openai v2 adapter (#14063) ### Description Starting from [openai version 1.0.0](https://github.com/openai/openai-python/tree/17ac6779958b2b74999c634c4ea4c7b74906027a#module-level-client), the camel case form of `openai.ChatCompletion` is no longer supported and has been changed to lowercase `openai.chat.completions`. In addition, the returned object only accepts attribute access instead of index access: ```python import openai # optional; defaults to `os.environ['OPENAI_API_KEY']` openai.api_key = '...' # all client options can be configured just like the `OpenAI` instantiation counterpart openai.base_url = "https://..." openai.default_headers = {"x-foo": "true"} completion = openai.chat.completions.create( model="gpt-4", messages=[ { "role": "user", "content": "How do I output all files in a directory using Python?", }, ], ) print(completion.choices[0].message.content) ``` So I implemented a compatible adapter that supports both attribute access and index access: ```python In [1]: from langchain.adapters import openai as lc_openai ...: messages = [{"role": "user", "content": "hi"}] In [2]: result = lc_openai.chat.completions.create( ...: messages=messages, model="gpt-3.5-turbo", temperature=0 ...: ) In [3]: result.choices[0].message Out[3]: {'role': 'assistant', 'content': 'Hello! How can I assist you today?'} In [4]: result["choices"][0]["message"] Out[4]: {'role': 'assistant', 'content': 'Hello! How can I assist you today?'} In [5]: result = await lc_openai.chat.completions.acreate( ...: messages=messages, model="gpt-3.5-turbo", temperature=0 ...: ) In [6]: result.choices[0].message Out[6]: {'role': 'assistant', 'content': 'Hello! How can I assist you today?'} In [7]: result["choices"][0]["message"] Out[7]: {'role': 'assistant', 'content': 'Hello! How can I assist you today?'} In [8]: for rs in lc_openai.chat.completions.create( ...: messages=messages, model="gpt-3.5-turbo", temperature=0, stream=True ...: ): ...: print(rs.choices[0].delta) ...: print(rs["choices"][0]["delta"]) ...: {'role': 'assistant', 'content': ''} {'role': 'assistant', 'content': ''} {'content': 'Hello'} {'content': 'Hello'} {'content': '!'} {'content': '!'} In [20]: async for rs in await lc_openai.chat.completions.acreate( ...: messages=messages, model="gpt-3.5-turbo", temperature=0, stream=True ...: ): ...: print(rs.choices[0].delta) ...: print(rs["choices"][0]["delta"]) ...: {'role': 'assistant', 'content': ''} {'role': 'assistant', 'content': ''} {'content': 'Hello'} {'content': 'Hello'} {'content': '!'} {'content': '!'} ... ``` ### Twitter handle [lin_bob57617](https://twitter.com/lin_bob57617) --- .../integrations/adapters/openai-old.ipynb | 285 ++++++++++++++++++ docs/docs/integrations/adapters/openai.ipynb | 113 ++++--- libs/langchain/langchain/adapters/openai.py | 137 ++++++++- 3 files changed, 495 insertions(+), 40 deletions(-) create mode 100644 docs/docs/integrations/adapters/openai-old.ipynb diff --git a/docs/docs/integrations/adapters/openai-old.ipynb b/docs/docs/integrations/adapters/openai-old.ipynb new file mode 100644 index 0000000000000..fee3ab5a50169 --- /dev/null +++ b/docs/docs/integrations/adapters/openai-old.ipynb @@ -0,0 +1,285 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "700a516b", + "metadata": {}, + "source": [ + "# OpenAI Adapter(Old)\n", + "\n", + "**Please ensure OpenAI library is less than 1.0.0; otherwise, refer to the newer doc [OpenAI Adapter](./openai.ipynb).**\n", + "\n", + "A lot of people get started with OpenAI but want to explore other models. LangChain's integrations with many model providers make this easy to do so. While LangChain has it's own message and model APIs, we've also made it as easy as possible to explore other models by exposing an adapter to adapt LangChain models to the OpenAI api.\n", + "\n", + "At the moment this only deals with output and does not return other information (token counts, stop reasons, etc)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6017f26a", + "metadata": {}, + "outputs": [], + "source": [ + "import openai\n", + "from langchain.adapters import openai as lc_openai" + ] + }, + { + "cell_type": "markdown", + "id": "b522ceda", + "metadata": {}, + "source": [ + "## ChatCompletion.create" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "1d22eb61", + "metadata": {}, + "outputs": [], + "source": [ + "messages = [{\"role\": \"user\", \"content\": \"hi\"}]" + ] + }, + { + "cell_type": "markdown", + "id": "d550d3ad", + "metadata": {}, + "source": [ + "Original OpenAI call" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "012d81ae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'role': 'assistant', 'content': 'Hello! How can I assist you today?'}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result = openai.ChatCompletion.create(\n", + " messages=messages, model=\"gpt-3.5-turbo\", temperature=0\n", + ")\n", + "result[\"choices\"][0][\"message\"].to_dict_recursive()" + ] + }, + { + "cell_type": "markdown", + "id": "db5b5500", + "metadata": {}, + "source": [ + "LangChain OpenAI wrapper call" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "c67a5ac8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'role': 'assistant', 'content': 'Hello! How can I assist you today?'}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lc_result = lc_openai.ChatCompletion.create(\n", + " messages=messages, model=\"gpt-3.5-turbo\", temperature=0\n", + ")\n", + "lc_result[\"choices\"][0][\"message\"]" + ] + }, + { + "cell_type": "markdown", + "id": "034ba845", + "metadata": {}, + "source": [ + "Swapping out model providers" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "f7c94827", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'role': 'assistant', 'content': ' Hello!'}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lc_result = lc_openai.ChatCompletion.create(\n", + " messages=messages, model=\"claude-2\", temperature=0, provider=\"ChatAnthropic\"\n", + ")\n", + "lc_result[\"choices\"][0][\"message\"]" + ] + }, + { + "cell_type": "markdown", + "id": "cb3f181d", + "metadata": {}, + "source": [ + "## ChatCompletion.stream" + ] + }, + { + "cell_type": "markdown", + "id": "f7b8cd18", + "metadata": {}, + "source": [ + "Original OpenAI call" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "fd8cb1ea", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'role': 'assistant', 'content': ''}\n", + "{'content': 'Hello'}\n", + "{'content': '!'}\n", + "{'content': ' How'}\n", + "{'content': ' can'}\n", + "{'content': ' I'}\n", + "{'content': ' assist'}\n", + "{'content': ' you'}\n", + "{'content': ' today'}\n", + "{'content': '?'}\n", + "{}\n" + ] + } + ], + "source": [ + "for c in openai.ChatCompletion.create(\n", + " messages=messages, model=\"gpt-3.5-turbo\", temperature=0, stream=True\n", + "):\n", + " print(c[\"choices\"][0][\"delta\"].to_dict_recursive())" + ] + }, + { + "cell_type": "markdown", + "id": "0b2a076b", + "metadata": {}, + "source": [ + "LangChain OpenAI wrapper call" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "9521218c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'role': 'assistant', 'content': ''}\n", + "{'content': 'Hello'}\n", + "{'content': '!'}\n", + "{'content': ' How'}\n", + "{'content': ' can'}\n", + "{'content': ' I'}\n", + "{'content': ' assist'}\n", + "{'content': ' you'}\n", + "{'content': ' today'}\n", + "{'content': '?'}\n", + "{}\n" + ] + } + ], + "source": [ + "for c in lc_openai.ChatCompletion.create(\n", + " messages=messages, model=\"gpt-3.5-turbo\", temperature=0, stream=True\n", + "):\n", + " print(c[\"choices\"][0][\"delta\"])" + ] + }, + { + "cell_type": "markdown", + "id": "0fc39750", + "metadata": {}, + "source": [ + "Swapping out model providers" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "68f0214e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'role': 'assistant', 'content': ' Hello'}\n", + "{'content': '!'}\n", + "{}\n" + ] + } + ], + "source": [ + "for c in lc_openai.ChatCompletion.create(\n", + " messages=messages,\n", + " model=\"claude-2\",\n", + " temperature=0,\n", + " stream=True,\n", + " provider=\"ChatAnthropic\",\n", + "):\n", + " print(c[\"choices\"][0][\"delta\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/adapters/openai.ipynb b/docs/docs/integrations/adapters/openai.ipynb index 8fd2c5214c767..0db8c7dbfbccf 100644 --- a/docs/docs/integrations/adapters/openai.ipynb +++ b/docs/docs/integrations/adapters/openai.ipynb @@ -7,6 +7,8 @@ "source": [ "# OpenAI Adapter\n", "\n", + "**Please ensure OpenAI library is version 1.0.0 or higher; otherwise, refer to the older doc [OpenAI Adapter(Old)](./openai-old.ipynb).**\n", + "\n", "A lot of people get started with OpenAI but want to explore other models. LangChain's integrations with many model providers make this easy to do so. While LangChain has it's own message and model APIs, we've also made it as easy as possible to explore other models by exposing an adapter to adapt LangChain models to the OpenAI api.\n", "\n", "At the moment this only deals with output and does not return other information (token counts, stop reasons, etc)." @@ -28,12 +30,12 @@ "id": "b522ceda", "metadata": {}, "source": [ - "## ChatCompletion.create" + "## chat.completions.create" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 2, "id": "1d22eb61", "metadata": {}, "outputs": [], @@ -51,26 +53,29 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 3, "id": "012d81ae", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'role': 'assistant', 'content': 'Hello! How can I assist you today?'}" + "{'content': 'Hello! How can I assist you today?',\n", + " 'role': 'assistant',\n", + " 'function_call': None,\n", + " 'tool_calls': None}" ] }, - "execution_count": 15, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "result = openai.ChatCompletion.create(\n", + "result = openai.chat.completions.create(\n", " messages=messages, model=\"gpt-3.5-turbo\", temperature=0\n", ")\n", - "result[\"choices\"][0][\"message\"].to_dict_recursive()" + "result.choices[0].message.model_dump()" ] }, { @@ -83,26 +88,48 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 4, "id": "c67a5ac8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'role': 'assistant', 'content': 'Hello! How can I assist you today?'}" + "{'role': 'assistant', 'content': 'Hello! How can I help you today?'}" ] }, - "execution_count": 17, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "lc_result = lc_openai.ChatCompletion.create(\n", + "lc_result = lc_openai.chat.completions.create(\n", " messages=messages, model=\"gpt-3.5-turbo\", temperature=0\n", ")\n", - "lc_result[\"choices\"][0][\"message\"]" + "\n", + "lc_result.choices[0].message # Attribute access" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "37a6e461-8608-47f6-ac45-12ad753c062a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'role': 'assistant', 'content': 'Hello! How can I help you today?'}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lc_result[\"choices\"][0][\"message\"] # Also compatible with index access" ] }, { @@ -115,26 +142,26 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 6, "id": "f7c94827", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'role': 'assistant', 'content': ' Hello!'}" + "{'role': 'assistant', 'content': 'Hello! How can I assist you today?'}" ] }, - "execution_count": 19, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "lc_result = lc_openai.ChatCompletion.create(\n", + "lc_result = lc_openai.chat.completions.create(\n", " messages=messages, model=\"claude-2\", temperature=0, provider=\"ChatAnthropic\"\n", ")\n", - "lc_result[\"choices\"][0][\"message\"]" + "lc_result.choices[0].message" ] }, { @@ -142,7 +169,7 @@ "id": "cb3f181d", "metadata": {}, "source": [ - "## ChatCompletion.stream" + "## chat.completions.stream" ] }, { @@ -155,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 7, "id": "fd8cb1ea", "metadata": {}, "outputs": [ @@ -163,25 +190,25 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'role': 'assistant', 'content': ''}\n", - "{'content': 'Hello'}\n", - "{'content': '!'}\n", - "{'content': ' How'}\n", - "{'content': ' can'}\n", - "{'content': ' I'}\n", - "{'content': ' assist'}\n", - "{'content': ' you'}\n", - "{'content': ' today'}\n", - "{'content': '?'}\n", - "{}\n" + "{'content': '', 'function_call': None, 'role': 'assistant', 'tool_calls': None}\n", + "{'content': 'Hello', 'function_call': None, 'role': None, 'tool_calls': None}\n", + "{'content': '!', 'function_call': None, 'role': None, 'tool_calls': None}\n", + "{'content': ' How', 'function_call': None, 'role': None, 'tool_calls': None}\n", + "{'content': ' can', 'function_call': None, 'role': None, 'tool_calls': None}\n", + "{'content': ' I', 'function_call': None, 'role': None, 'tool_calls': None}\n", + "{'content': ' assist', 'function_call': None, 'role': None, 'tool_calls': None}\n", + "{'content': ' you', 'function_call': None, 'role': None, 'tool_calls': None}\n", + "{'content': ' today', 'function_call': None, 'role': None, 'tool_calls': None}\n", + "{'content': '?', 'function_call': None, 'role': None, 'tool_calls': None}\n", + "{'content': None, 'function_call': None, 'role': None, 'tool_calls': None}\n" ] } ], "source": [ - "for c in openai.ChatCompletion.create(\n", + "for c in openai.chat.completions.create(\n", " messages=messages, model=\"gpt-3.5-turbo\", temperature=0, stream=True\n", "):\n", - " print(c[\"choices\"][0][\"delta\"].to_dict_recursive())" + " print(c.choices[0].delta.model_dump())" ] }, { @@ -194,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 8, "id": "9521218c", "metadata": {}, "outputs": [ @@ -217,10 +244,10 @@ } ], "source": [ - "for c in lc_openai.ChatCompletion.create(\n", + "for c in lc_openai.chat.completions.create(\n", " messages=messages, model=\"gpt-3.5-turbo\", temperature=0, stream=True\n", "):\n", - " print(c[\"choices\"][0][\"delta\"])" + " print(c.choices[0].delta)" ] }, { @@ -233,7 +260,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 9, "id": "68f0214e", "metadata": {}, "outputs": [ @@ -241,14 +268,22 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'role': 'assistant', 'content': ' Hello'}\n", + "{'role': 'assistant', 'content': ''}\n", + "{'content': 'Hello'}\n", "{'content': '!'}\n", + "{'content': ' How'}\n", + "{'content': ' can'}\n", + "{'content': ' I'}\n", + "{'content': ' assist'}\n", + "{'content': ' you'}\n", + "{'content': ' today'}\n", + "{'content': '?'}\n", "{}\n" ] } ], "source": [ - "for c in lc_openai.ChatCompletion.create(\n", + "for c in lc_openai.chat.completions.create(\n", " messages=messages,\n", " model=\"claude-2\",\n", " temperature=0,\n", @@ -275,7 +310,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/libs/langchain/langchain/adapters/openai.py b/libs/langchain/langchain/adapters/openai.py index 8607468b81d23..0af759ebf5b08 100644 --- a/libs/langchain/langchain/adapters/openai.py +++ b/libs/langchain/langchain/adapters/openai.py @@ -25,6 +25,7 @@ SystemMessage, ToolMessage, ) +from langchain_core.pydantic_v1 import BaseModel from typing_extensions import Literal @@ -38,6 +39,29 @@ async def aenumerate( i += 1 +class IndexableBaseModel(BaseModel): + """Allows a BaseModel to return its fields by string variable indexing""" + + def __getitem__(self, item: str) -> Any: + return getattr(self, item) + + +class Choice(IndexableBaseModel): + message: dict + + +class ChatCompletions(IndexableBaseModel): + choices: List[Choice] + + +class ChoiceChunk(IndexableBaseModel): + delta: dict + + +class ChatCompletionChunk(IndexableBaseModel): + choices: List[ChoiceChunk] + + def convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage: """Convert a dictionary to a LangChain message. @@ -129,7 +153,7 @@ def convert_openai_messages(messages: Sequence[Dict[str, Any]]) -> List[BaseMess return [convert_dict_to_message(m) for m in messages] -def _convert_message_chunk_to_delta(chunk: BaseMessageChunk, i: int) -> Dict[str, Any]: +def _convert_message_chunk(chunk: BaseMessageChunk, i: int) -> dict: _dict: Dict[str, Any] = {} if isinstance(chunk, AIMessageChunk): if i == 0: @@ -148,6 +172,11 @@ def _convert_message_chunk_to_delta(chunk: BaseMessageChunk, i: int) -> Dict[str # This only happens at the end of streams, and OpenAI returns as empty dict if _dict == {"content": ""}: _dict = {} + return _dict + + +def _convert_message_chunk_to_delta(chunk: BaseMessageChunk, i: int) -> Dict[str, Any]: + _dict = _convert_message_chunk(chunk, i) return {"choices": [{"delta": _dict}]} @@ -262,3 +291,109 @@ def convert_messages_for_finetuning( for session in sessions if _has_assistant_message(session) ] + + +class Completions: + """Completion.""" + + @overload + @staticmethod + def create( + messages: Sequence[Dict[str, Any]], + *, + provider: str = "ChatOpenAI", + stream: Literal[False] = False, + **kwargs: Any, + ) -> ChatCompletions: + ... + + @overload + @staticmethod + def create( + messages: Sequence[Dict[str, Any]], + *, + provider: str = "ChatOpenAI", + stream: Literal[True], + **kwargs: Any, + ) -> Iterable: + ... + + @staticmethod + def create( + messages: Sequence[Dict[str, Any]], + *, + provider: str = "ChatOpenAI", + stream: bool = False, + **kwargs: Any, + ) -> Union[ChatCompletions, Iterable]: + models = importlib.import_module("langchain.chat_models") + model_cls = getattr(models, provider) + model_config = model_cls(**kwargs) + converted_messages = convert_openai_messages(messages) + if not stream: + result = model_config.invoke(converted_messages) + return ChatCompletions( + choices=[Choice(message=convert_message_to_dict(result))] + ) + else: + return ( + ChatCompletionChunk( + choices=[ChoiceChunk(delta=_convert_message_chunk(c, i))] + ) + for i, c in enumerate(model_config.stream(converted_messages)) + ) + + @overload + @staticmethod + async def acreate( + messages: Sequence[Dict[str, Any]], + *, + provider: str = "ChatOpenAI", + stream: Literal[False] = False, + **kwargs: Any, + ) -> ChatCompletions: + ... + + @overload + @staticmethod + async def acreate( + messages: Sequence[Dict[str, Any]], + *, + provider: str = "ChatOpenAI", + stream: Literal[True], + **kwargs: Any, + ) -> AsyncIterator: + ... + + @staticmethod + async def acreate( + messages: Sequence[Dict[str, Any]], + *, + provider: str = "ChatOpenAI", + stream: bool = False, + **kwargs: Any, + ) -> Union[ChatCompletions, AsyncIterator]: + models = importlib.import_module("langchain.chat_models") + model_cls = getattr(models, provider) + model_config = model_cls(**kwargs) + converted_messages = convert_openai_messages(messages) + if not stream: + result = await model_config.ainvoke(converted_messages) + return ChatCompletions( + choices=[Choice(message=convert_message_to_dict(result))] + ) + else: + return ( + ChatCompletionChunk( + choices=[ChoiceChunk(delta=_convert_message_chunk(c, i))] + ) + async for i, c in aenumerate(model_config.astream(converted_messages)) + ) + + +class Chat: + def __init__(self) -> None: + self.completions = Completions() + + +chat = Chat() From 25f72944a0930482ca1fe62db938a2744d5bd54e Mon Sep 17 00:00:00 2001 From: Muntaqa Mahmood <69706702+muntaqamahmood@users.noreply.github.com> Date: Mon, 4 Dec 2023 15:27:38 -0500 Subject: [PATCH 12/49] Add: Steam API tool (#14008) - **Description:** Our PR is an integration of a Steam API Tool that makes recommendations on steam games based on user's Steam profile and provides information on games based on user provided queries. - **Issue:** the issue # our PR implements: https://github.com/langchain-ai/langchain/issues/12120 - **Dependencies:** python-steam-api library, steamspypi library and decouple library - **Tag maintainer:** @baskaryan, @hwchase17 - **Twitter handle:** N/A Hello langchain Maintainers, We are a team of 4 University of Toronto students contributing to langchain as part of our course [CSCD01 (link to course page)](https://cscd01.com/work/open-source-project). We hope our changes help the community. We have run make format, make lint and make test locally before submitting the PR. To our knowledge, our changes do not introduce any new errors. Our PR integrates the python-steam-api, steamspypi and decouple packages. We have added integration tests to test our python API integration into langchain and an example notebook is also provided. Our amazing team that contributed to this PR: @JohnY2002, @shenceyang, @andrewqian2001 and @muntaqamahmood Thank you in advance to all the maintainers for reviewing our PR! --------- Co-authored-by: Shence Co-authored-by: JohnY2002 Co-authored-by: Andrew Qian Co-authored-by: Harrison Chase Co-authored-by: JohnY <94477598+JohnY2002@users.noreply.github.com> --- docs/docs/integrations/toolkits/steam.ipynb | 105 +++++++++++ .../agents/agent_toolkits/__init__.py | 2 + .../agents/agent_toolkits/steam/__init__.py | 1 + .../agents/agent_toolkits/steam/toolkit.py | 48 +++++ libs/langchain/langchain/tools/__init__.py | 9 + .../langchain/tools/steam/__init__.py | 1 + .../langchain/langchain/tools/steam/prompt.py | 26 +++ libs/langchain/langchain/tools/steam/tool.py | 29 ++++ .../langchain/langchain/utilities/__init__.py | 9 + libs/langchain/langchain/utilities/steam.py | 164 ++++++++++++++++++ .../utilities/test_steam_api.py | 22 +++ .../tests/unit_tests/tools/test_imports.py | 1 + .../tests/unit_tests/tools/test_public_api.py | 1 + .../unit_tests/utilities/test_imports.py | 1 + 14 files changed, 419 insertions(+) create mode 100644 docs/docs/integrations/toolkits/steam.ipynb create mode 100644 libs/langchain/langchain/agents/agent_toolkits/steam/__init__.py create mode 100644 libs/langchain/langchain/agents/agent_toolkits/steam/toolkit.py create mode 100644 libs/langchain/langchain/tools/steam/__init__.py create mode 100644 libs/langchain/langchain/tools/steam/prompt.py create mode 100644 libs/langchain/langchain/tools/steam/tool.py create mode 100644 libs/langchain/langchain/utilities/steam.py create mode 100644 libs/langchain/tests/integration_tests/utilities/test_steam_api.py diff --git a/docs/docs/integrations/toolkits/steam.ipynb b/docs/docs/integrations/toolkits/steam.ipynb new file mode 100644 index 0000000000000..365ce8639bdaf --- /dev/null +++ b/docs/docs/integrations/toolkits/steam.ipynb @@ -0,0 +1,105 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Steam Game Recommendation & Game Details Tool" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from langchain.agents import AgentType, initialize_agent\n", + "from langchain.agents.agent_toolkits.steam.toolkit import SteamToolkit\n", + "from langchain.llms import OpenAI\n", + "from langchain.utilities.steam import SteamWebAPIWrapper" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"STEAM_KEY\"] = \"xyz\"\n", + "os.environ[\"STEAM_ID\"] = \"123\"\n", + "os.environ[\"OPENAI_API_KEY\"] = \"abc\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "llm = OpenAI(temperature=0)\n", + "Steam = SteamWebAPIWrapper()\n", + "toolkit = SteamToolkit.from_steam_api_wrapper(Steam)\n", + "agent = initialize_agent(\n", + " toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to find the game details\n", + "Action: Get Games Details\n", + "Action Input: Terraria\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mThe id is: 105600\n", + "The link is: https://store.steampowered.com/app/105600/Terraria/?snr=1_7_15__13\n", + "The price is: $9.99\n", + "The summary of the game is: Dig, Fight, Explore, Build: The very world is at your fingertips as you fight for survival, fortune, and glory. Will you delve deep into cavernous expanses in search of treasure and raw materials with which to craft ever-evolving gear, machinery, and aesthetics? Perhaps you will choose instead to seek out ever-greater foes to test your mettle in combat? Maybe you will decide to construct your own city to house the host of mysterious allies you may encounter along your travels? In the World of Terraria, the choice is yours!Blending elements of classic action games with the freedom of sandbox-style creativity, Terraria is a unique gaming experience where both the journey and the destination are completely in the player’s control. The Terraria adventure is truly as unique as the players themselves! Are you up for the monumental task of exploring, creating, and defending a world of your own? Key features: Sandbox Play Randomly generated worlds Free Content Updates \n", + "The supported languages of the game are: English, French, Italian, German, Spanish - Spain, Polish, Portuguese - Brazil, Russian, Simplified Chinese\n", + "\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: Terraria is a game with an id of 105600, a link of https://store.steampowered.com/app/105600/Terraria/?snr=1_7_15__13, a price of $9.99, a summary of \"Dig, Fight, Explore, Build: The very world is at your fingertips as you fight for survival, fortune, and glory. Will you delve deep into cavernous expanses in search of treasure and raw materials with which to craft ever-evolving gear, machinery, and aesthetics? Perhaps you will choose instead to seek out ever-greater foes to test your mettle in combat? Maybe you will decide to construct your own city to house the host of mysterious allies you may encounter along your travels? In the World of Terraria, the choice is yours!Blending elements of classic action games with the freedom of sandbox-style creativity, Terraria is a unique gaming experience where both the journey and the destination are completely in the player’s control. The Terraria adventure is truly as unique as the players themselves! Are you up for the monumental task of exploring, creating, and defending a\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "{'input': 'can you give the information about the game Terraria', 'output': 'Terraria is a game with an id of 105600, a link of https://store.steampowered.com/app/105600/Terraria/?snr=1_7_15__13, a price of $9.99, a summary of \"Dig, Fight, Explore, Build: The very world is at your fingertips as you fight for survival, fortune, and glory. Will you delve deep into cavernous expanses in search of treasure and raw materials with which to craft ever-evolving gear, machinery, and aesthetics? Perhaps you will choose instead to seek out ever-greater foes to test your mettle in combat? Maybe you will decide to construct your own city to house the host of mysterious allies you may encounter along your travels? In the World of Terraria, the choice is yours!Blending elements of classic action games with the freedom of sandbox-style creativity, Terraria is a unique gaming experience where both the journey and the destination are completely in the player’s control. The Terraria adventure is truly as unique as the players themselves! Are you up for the monumental task of exploring, creating, and defending a'}\n" + ] + } + ], + "source": [ + "out = agent(\"can you give the information about the game Terraria\")\n", + "print(out)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/libs/langchain/langchain/agents/agent_toolkits/__init__.py b/libs/langchain/langchain/agents/agent_toolkits/__init__.py index 2266b2413bbf8..b40975cc73299 100644 --- a/libs/langchain/langchain/agents/agent_toolkits/__init__.py +++ b/libs/langchain/langchain/agents/agent_toolkits/__init__.py @@ -47,6 +47,7 @@ from langchain.agents.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit from langchain.agents.agent_toolkits.sql.base import create_sql_agent from langchain.agents.agent_toolkits.sql.toolkit import SQLDatabaseToolkit +from langchain.agents.agent_toolkits.steam.toolkit import SteamToolkit from langchain.agents.agent_toolkits.vectorstore.base import ( create_vectorstore_agent, create_vectorstore_router_agent, @@ -98,6 +99,7 @@ def __getattr__(name: str) -> Any: "PlayWrightBrowserToolkit", "PowerBIToolkit", "SlackToolkit", + "SteamToolkit", "SQLDatabaseToolkit", "SparkSQLToolkit", "VectorStoreInfo", diff --git a/libs/langchain/langchain/agents/agent_toolkits/steam/__init__.py b/libs/langchain/langchain/agents/agent_toolkits/steam/__init__.py new file mode 100644 index 0000000000000..f99981082424e --- /dev/null +++ b/libs/langchain/langchain/agents/agent_toolkits/steam/__init__.py @@ -0,0 +1 @@ +"""Steam Toolkit.""" diff --git a/libs/langchain/langchain/agents/agent_toolkits/steam/toolkit.py b/libs/langchain/langchain/agents/agent_toolkits/steam/toolkit.py new file mode 100644 index 0000000000000..49625cf9e1a8d --- /dev/null +++ b/libs/langchain/langchain/agents/agent_toolkits/steam/toolkit.py @@ -0,0 +1,48 @@ +"""Steam Toolkit.""" +from typing import List + +from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.tools import BaseTool +from langchain.tools.steam.prompt import ( + STEAM_GET_GAMES_DETAILS, + STEAM_GET_RECOMMENDED_GAMES, +) +from langchain.tools.steam.tool import SteamWebAPIQueryRun +from langchain.utilities.steam import SteamWebAPIWrapper + + +class SteamToolkit(BaseToolkit): + """Steam Toolkit.""" + + tools: List[BaseTool] = [] + + @classmethod + def from_steam_api_wrapper( + cls, steam_api_wrapper: SteamWebAPIWrapper + ) -> "SteamToolkit": + operations: List[dict] = [ + { + "mode": "get_games_details", + "name": "Get Games Details", + "description": STEAM_GET_GAMES_DETAILS, + }, + { + "mode": "get_recommended_games", + "name": "Get Recommended Games", + "description": STEAM_GET_RECOMMENDED_GAMES, + }, + ] + tools = [ + SteamWebAPIQueryRun( + name=action["name"], + description=action["description"], + mode=action["mode"], + api_wrapper=steam_api_wrapper, + ) + for action in operations + ] + return cls(tools=tools) + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return self.tools diff --git a/libs/langchain/langchain/tools/__init__.py b/libs/langchain/langchain/tools/__init__.py index 19c91b10230a4..7422c13662095 100644 --- a/libs/langchain/langchain/tools/__init__.py +++ b/libs/langchain/langchain/tools/__init__.py @@ -534,6 +534,12 @@ def _import_requests_tool_RequestsPutTool() -> Any: return RequestsPutTool +def _import_steam_webapi_tool() -> Any: + from langchain.tools.steam.tool import SteamWebAPIQueryRun + + return SteamWebAPIQueryRun + + def _import_scenexplain_tool() -> Any: from langchain.tools.scenexplain.tool import SceneXplainTool @@ -887,6 +893,8 @@ def __getattr__(name: str) -> Any: return _import_requests_tool_RequestsPostTool() elif name == "RequestsPutTool": return _import_requests_tool_RequestsPutTool() + elif name == "SteamWebAPIQueryRun": + return _import_steam_webapi_tool() elif name == "SceneXplainTool": return _import_scenexplain_tool() elif name == "SearxSearchResults": @@ -1044,6 +1052,7 @@ def __getattr__(name: str) -> Any: "RequestsPatchTool", "RequestsPostTool", "RequestsPutTool", + "SteamWebAPIQueryRun", "SceneXplainTool", "SearxSearchResults", "SearxSearchRun", diff --git a/libs/langchain/langchain/tools/steam/__init__.py b/libs/langchain/langchain/tools/steam/__init__.py new file mode 100644 index 0000000000000..9367fd95b3089 --- /dev/null +++ b/libs/langchain/langchain/tools/steam/__init__.py @@ -0,0 +1 @@ +"""Steam API toolkit""" diff --git a/libs/langchain/langchain/tools/steam/prompt.py b/libs/langchain/langchain/tools/steam/prompt.py new file mode 100644 index 0000000000000..6f82e2ff4f2f1 --- /dev/null +++ b/libs/langchain/langchain/tools/steam/prompt.py @@ -0,0 +1,26 @@ +STEAM_GET_GAMES_DETAILS = """ + This tool is a wrapper around python-steam-api's steam.apps.search_games API and + steam.apps.get_app_details API, useful when you need to search for a game. + The input to this tool is a string specifying the name of the game you want to + search for. For example, to search for a game called "Counter-Strike: Global + Offensive", you would input "Counter-Strike: Global Offensive" as the game name. + This input will be passed into steam.apps.search_games to find the game id, link + and price, and then the game id will be passed into steam.apps.get_app_details to + get the detailed description and supported languages of the game. Finally the + results are combined and returned as a string. +""" + +STEAM_GET_RECOMMENDED_GAMES = """ + This tool is a wrapper around python-steam-api's steam.users.get_owned_games API + and steamspypi's steamspypi.download API, useful when you need to get a list of + recommended games. The input to this tool is a string specifying the steam id of + the user you want to get recommended games for. For example, to get recommended + games for a user with steam id 76561197960435530, you would input + "76561197960435530" as the steam id. This steamid is then utilized to form a + data_request sent to steamspypi's steamspypi.download to retrieve genres of user's + owned games. Then, calculates the frequency of each genre, identifying the most + popular one, and stored it in a dictionary. Subsequently, use steamspypi.download + to returns all games in this genre and return 5 most-played games that is not owned + by the user. + +""" diff --git a/libs/langchain/langchain/tools/steam/tool.py b/libs/langchain/langchain/tools/steam/tool.py new file mode 100644 index 0000000000000..1556f7ae599d0 --- /dev/null +++ b/libs/langchain/langchain/tools/steam/tool.py @@ -0,0 +1,29 @@ +"""Tool for Steam Web API""" + +from typing import Optional + +from langchain.callbacks.manager import CallbackManagerForToolRun +from langchain.tools.base import BaseTool +from langchain.utilities.steam import SteamWebAPIWrapper + + +class SteamWebAPIQueryRun(BaseTool): + """Tool that searches the Steam Web API.""" + + mode: str + name: str = "Steam" + description: str = ( + "A wrapper around Steam Web API." + "Steam Tool is useful for fetching User profiles and stats, Game data and more!" + "Input should be the User or Game you want to query." + ) + + api_wrapper: SteamWebAPIWrapper + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the Steam-WebAPI tool.""" + return self.api_wrapper.run(self.mode, query) diff --git a/libs/langchain/langchain/utilities/__init__.py b/libs/langchain/langchain/utilities/__init__.py index 817a5475ab3ff..8c8242e3a6a9c 100644 --- a/libs/langchain/langchain/utilities/__init__.py +++ b/libs/langchain/langchain/utilities/__init__.py @@ -218,6 +218,12 @@ def _import_sql_database() -> Any: return SQLDatabase +def _import_steam_webapi() -> Any: + from langchain.utilities.steam import SteamWebAPIWrapper + + return SteamWebAPIWrapper + + def _import_stackexchange() -> Any: from langchain.utilities.stackexchange import StackExchangeAPIWrapper @@ -327,6 +333,8 @@ def __getattr__(name: str) -> Any: return _import_stackexchange() elif name == "SQLDatabase": return _import_sql_database() + elif name == "SteamWebAPIWrapper": + return _import_steam_webapi() elif name == "TensorflowDatasets": return _import_tensorflow_datasets() elif name == "TwilioAPIWrapper": @@ -373,6 +381,7 @@ def __getattr__(name: str) -> Any: "PythonREPL", "Requests", "RequestsWrapper", + "SteamWebAPIWrapper", "SQLDatabase", "SceneXplainAPIWrapper", "SearchApiAPIWrapper", diff --git a/libs/langchain/langchain/utilities/steam.py b/libs/langchain/langchain/utilities/steam.py new file mode 100644 index 0000000000000..48754c7ce4d00 --- /dev/null +++ b/libs/langchain/langchain/utilities/steam.py @@ -0,0 +1,164 @@ +"""Util that calls Steam-WebAPI.""" + +from typing import Any, List + +from langchain.pydantic_v1 import BaseModel, Extra, root_validator + + +class SteamWebAPIWrapper(BaseModel): + """Wrapper for Steam API.""" + + steam: Any # for python-steam-api + + from langchain.tools.steam.prompt import ( + STEAM_GET_GAMES_DETAILS, + STEAM_GET_RECOMMENDED_GAMES, + ) + + # operations: a list of dictionaries, each representing a specific operation that + # can be performed with the API + operations: List[dict] = [ + { + "mode": "get_game_details", + "name": "Get Game Details", + "description": STEAM_GET_GAMES_DETAILS, + }, + { + "mode": "get_recommended_games", + "name": "Get Recommended Games", + "description": STEAM_GET_RECOMMENDED_GAMES, + }, + ] + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def get_operations(self) -> List[dict]: + """Return a list of operations.""" + return self.operations + + @root_validator + def validate_environment(cls, values: dict) -> dict: + """Validate api key and python package has been configured.""" + + # check if the python package is installed + try: + from steam import Steam + except ImportError: + raise ImportError("python-steam-api library is not installed. ") + + try: + from decouple import config + except ImportError: + raise ImportError("decouple library is not installed. ") + + # initialize the steam attribute for python-steam-api usage + KEY = config("STEAM_KEY") + steam = Steam(KEY) + values["steam"] = steam + return values + + def parse_to_str(self, details: dict) -> str: # For later parsing + """Parse the details result.""" + result = "" + for key, value in details.items(): + result += "The " + str(key) + " is: " + str(value) + "\n" + return result + + def get_id_link_price(self, games: dict) -> dict: + """The response may contain more than one game, so we need to choose the right + one and return the id.""" + + game_info = {} + for app in games["apps"]: + game_info["id"] = app["id"] + game_info["link"] = app["link"] + game_info["price"] = app["price"] + break + return game_info + + def remove_html_tags(self, html_string: str) -> str: + from bs4 import BeautifulSoup + + soup = BeautifulSoup(html_string, "html.parser") + return soup.get_text() + + def details_of_games(self, name: str) -> str: + games = self.steam.apps.search_games(name) + info_partOne_dict = self.get_id_link_price(games) + info_partOne = self.parse_to_str(info_partOne_dict) + id = str(info_partOne_dict.get("id")) + info_dict = self.steam.apps.get_app_details(id) + data = info_dict.get(id).get("data") + detailed_description = data.get("detailed_description") + + # detailed_description contains

  • some other html tags, so we need to + # remove them + detailed_description = self.remove_html_tags(detailed_description) + supported_languages = info_dict.get(id).get("data").get("supported_languages") + info_partTwo = ( + "The summary of the game is: " + + detailed_description + + "\n" + + "The supported languages of the game are: " + + supported_languages + + "\n" + ) + info = info_partOne + info_partTwo + return info + + def get_steam_id(self, name: str) -> str: + user = self.steam.users.search_user(name) + steam_id = user["player"]["steamid"] + return steam_id + + def get_users_games(self, steam_id: str) -> List[str]: + return self.steam.users.get_owned_games(steam_id, False, False) + + def recommended_games(self, steam_id: str) -> str: + try: + import steamspypi + except ImportError: + raise ImportError("steamspypi library is not installed.") + users_games = self.get_users_games(steam_id) + result = {} # type: ignore + most_popular_genre = "" + most_popular_genre_count = 0 + for game in users_games["games"]: # type: ignore + appid = game["appid"] + data_request = {"request": "appdetails", "appid": appid} + genreStore = steamspypi.download(data_request) + genreList = genreStore.get("genre", "").split(", ") + + for genre in genreList: + if genre in result: + result[genre] += 1 + else: + result[genre] = 1 + if result[genre] > most_popular_genre_count: + most_popular_genre_count = result[genre] + most_popular_genre = genre + + data_request = dict() + data_request["request"] = "genre" + data_request["genre"] = most_popular_genre + data = steamspypi.download(data_request) + sorted_data = sorted( + data.values(), key=lambda x: x.get("average_forever", 0), reverse=True + ) + owned_games = [game["appid"] for game in users_games["games"]] # type: ignore + remaining_games = [ + game for game in sorted_data if game["appid"] not in owned_games + ] + top_5_popular_not_owned = [game["name"] for game in remaining_games[:5]] + return str(top_5_popular_not_owned) + + def run(self, mode: str, game: str) -> str: + if mode == "get_games_details": + return self.details_of_games(game) + elif mode == "get_recommended_games": + return self.recommended_games(game) + else: + raise ValueError(f"Invalid mode {mode} for Steam API.") diff --git a/libs/langchain/tests/integration_tests/utilities/test_steam_api.py b/libs/langchain/tests/integration_tests/utilities/test_steam_api.py new file mode 100644 index 0000000000000..24664b3943852 --- /dev/null +++ b/libs/langchain/tests/integration_tests/utilities/test_steam_api.py @@ -0,0 +1,22 @@ +import ast + +from langchain.utilities.steam import SteamWebAPIWrapper + + +def test_get_game_details() -> None: + """Test for getting game details on Steam""" + steam = SteamWebAPIWrapper() + output = steam.run("get_game_details", "Terraria") + assert "id" in output + assert "link" in output + assert "detailed description" in output + assert "supported languages" in output + assert "price" in output + + +def test_get_recommended_games() -> None: + """Test for getting recommended games on Steam""" + steam = SteamWebAPIWrapper() + output = steam.run("get_recommended_games", "76561198362745711") + output = ast.literal_eval(output) + assert len(output) == 5 diff --git a/libs/langchain/tests/unit_tests/tools/test_imports.py b/libs/langchain/tests/unit_tests/tools/test_imports.py index 58bd210e9bb00..fb30a00393c0c 100644 --- a/libs/langchain/tests/unit_tests/tools/test_imports.py +++ b/libs/langchain/tests/unit_tests/tools/test_imports.py @@ -101,6 +101,7 @@ "SleepTool", "StackExchangeTool", "StdInInquireTool", + "SteamWebAPIQueryRun", "SteamshipImageGenerationTool", "StructuredTool", "Tool", diff --git a/libs/langchain/tests/unit_tests/tools/test_public_api.py b/libs/langchain/tests/unit_tests/tools/test_public_api.py index 4db38fd13e13d..b0b3844c96489 100644 --- a/libs/langchain/tests/unit_tests/tools/test_public_api.py +++ b/libs/langchain/tests/unit_tests/tools/test_public_api.py @@ -105,6 +105,7 @@ "StackExchangeTool", "SteamshipImageGenerationTool", "StructuredTool", + "SteamWebAPIQueryRun", "Tool", "VectorStoreQATool", "VectorStoreQAWithSourcesTool", diff --git a/libs/langchain/tests/unit_tests/utilities/test_imports.py b/libs/langchain/tests/unit_tests/utilities/test_imports.py index f1e6a27eda96f..9650889869f55 100644 --- a/libs/langchain/tests/unit_tests/utilities/test_imports.py +++ b/libs/langchain/tests/unit_tests/utilities/test_imports.py @@ -38,6 +38,7 @@ "SerpAPIWrapper", "SparkSQL", "StackExchangeAPIWrapper", + "SteamWebAPIWrapper", "TensorflowDatasets", "TextRequestsWrapper", "TwilioAPIWrapper", From 805e9bfc244abae39deec00d40dca8a5a97ac4e5 Mon Sep 17 00:00:00 2001 From: Bob Lin Date: Mon, 4 Dec 2023 14:27:57 -0600 Subject: [PATCH 13/49] Add doc for the development of core and experimental sections (#13966) ### **Description** Hi, I just started learning the source code of `langchain` and hope to contribute code. However, according to the instructions in the [CONTRIBUTING.md](https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md) document, I could not run the test command `make test` to run normally. I found that many modules did not exist after [splitting `langchain_core`](https://github.com/langchain-ai/langchain/discussions/13823), so I updated the document. ### **Twitter handle** lin_bob57617 --- .github/CONTRIBUTING.md | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 766458c414ea8..8a39944be096c 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -72,9 +72,10 @@ tell Poetry to use the virtualenv python environment (`poetry config virtualenvs ### Core vs. Experimental -This repository contains two separate projects: +This repository contains three separate projects: - `langchain`: core langchain code, abstractions, and use cases. -- `langchain.experimental`: see the [Experimental README](https://github.com/langchain-ai/langchain/tree/master/libs/experimental/README.md) for more information. +- `langchain_core`: contain interfaces for key abstractions as well as logic for combining them in chains (LCEL). +- `langchain_experimental`: see the [Experimental README](https://github.com/langchain-ai/langchain/tree/master/libs/experimental/README.md) for more information. Each of these has its own development environment. Docs are run from the top-level makefile, but development is split across separate test & release flows. @@ -128,6 +129,24 @@ make docker_tests There are also [integration tests and code-coverage](https://github.com/langchain-ai/langchain/tree/master/libs/langchain/tests/README.md) available. +### Only develop langchain_core or langchain_experimental + +If you are only developing `langchain_core` or `langchain_experimental`, you can simply install the dependencies for the respective projects and run tests: + +```bash +cd libs/core +poetry install --with test +make test +``` + +Or: + +```bash +cd libs/experimental +poetry install --with test +make test +``` + ### Formatting and Linting Run these locally before submitting a PR; the CI system will check also. From eecfa3f9e5daab7e082bd01c9f2cff06a4fa9116 Mon Sep 17 00:00:00 2001 From: Nithish Raghunandanan Date: Tue, 5 Dec 2023 01:58:12 +0530 Subject: [PATCH 14/49] Add Couchbase document loader (#13979) **Description:** Adds the document loader for [Couchbase](http://couchbase.com/), a distributed NoSQL database. **Dependencies:** Added the Couchbase SDK as an optional dependency. **Twitter handle:** nithishr --------- Co-authored-by: Bagatur --- .../document_loaders/couchbase.ipynb | 203 ++++++++++++++++++ .../langchain/document_loaders/__init__.py | 2 + .../langchain/document_loaders/couchbase.py | 100 +++++++++ libs/langchain/poetry.lock | 38 +++- libs/langchain/pyproject.toml | 2 + .../document_loaders/test_couchbase.py | 44 ++++ .../document_loaders/test_couchbase.py | 6 + .../document_loaders/test_imports.py | 1 + 8 files changed, 394 insertions(+), 2 deletions(-) create mode 100644 docs/docs/integrations/document_loaders/couchbase.ipynb create mode 100644 libs/langchain/langchain/document_loaders/couchbase.py create mode 100644 libs/langchain/tests/integration_tests/document_loaders/test_couchbase.py create mode 100644 libs/langchain/tests/unit_tests/document_loaders/test_couchbase.py diff --git a/docs/docs/integrations/document_loaders/couchbase.ipynb b/docs/docs/integrations/document_loaders/couchbase.ipynb new file mode 100644 index 0000000000000..f4f0d5c36cda3 --- /dev/null +++ b/docs/docs/integrations/document_loaders/couchbase.ipynb @@ -0,0 +1,203 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2e895699-5aef-4d4a-aefc-c5e9cd4f4c79", + "metadata": {}, + "source": [ + "# Couchbase\n", + "[Couchbase](http://couchbase.com/) is an award-winning distributed NoSQL cloud database that delivers unmatched versatility, performance, scalability, and financial value for all of your cloud, mobile, AI, and edge computing applications.\n" + ] + }, + { + "cell_type": "markdown", + "id": "635ed775-2eb6-4e2f-ba76-cc1ada020b06", + "metadata": {}, + "source": [ + "## Installation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "63156d35-0557-42f0-a9f6-f3d3e24670d0", + "metadata": {}, + "outputs": [], + "source": [ + "#!pip install couchbase" + ] + }, + { + "cell_type": "markdown", + "id": "e78ff273-c636-4c1d-8f8d-832b66bd1489", + "metadata": {}, + "source": [ + "## Querying for Documents from Couchbase\n", + "For more details on connecting to a Couchbase cluster, please check the [Python SDK documentation](https://docs.couchbase.com/python-sdk/current/howtos/managing-connections.html#connection-strings).\n", + "\n", + "For help with querying for documents using SQL++ (SQL for JSON), please check the [documentation](https://docs.couchbase.com/server/current/n1ql/n1ql-language-reference/index.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1889164a-9ab7-4551-aaba-ac687b712c17", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders.couchbase import CouchbaseLoader\n", + "\n", + "connection_string = \"couchbase://localhost\" # valid Couchbase connection string\n", + "db_username = (\n", + " \"Administrator\" # valid database user with read access to the bucket being queried\n", + ")\n", + "db_password = \"Password\" # password for the database user\n", + "\n", + "# query is a valid SQL++ query\n", + "query = \"\"\"\n", + " SELECT h.* FROM `travel-sample`.inventory.hotel h \n", + " WHERE h.country = 'United States'\n", + " LIMIT 1\n", + " \"\"\"" + ] + }, + { + "cell_type": "markdown", + "id": "8f0c1dd5-1b35-4274-9f0f-a83e78925ff8", + "metadata": {}, + "source": [ + "## Create the Loader" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "68fe7115-d687-437a-a01f-71dcd4c49cae", + "metadata": {}, + "outputs": [], + "source": [ + "loader = CouchbaseLoader(\n", + " connection_string,\n", + " db_username,\n", + " db_password,\n", + " query,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f34edaf2-fa9c-4395-97da-1465bb424fc7", + "metadata": {}, + "source": [ + "You can fetch the documents by calling the `load` method of the loader. It will return a list with all the documents. If you want to avoid this blocking call, you can call `lazy_load` method that returns an Iterator." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3c9cca7e-736c-4a32-91cb-00f0a6b9d043", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Document(page_content='address: 8301 Hollister Ave\\nalias: None\\ncheckin: 12PM\\ncheckout: 4PM\\ncity: Santa Barbara\\ncountry: United States\\ndescription: Located on 78 acres of oceanfront property, this resort is an upscale experience that caters to luxury travelers. There are 354 guest rooms in 19 separate villas, each in a Spanish style. Property amenities include saline infinity pools, a private beach, clay tennis courts, a 42,000 foot spa and fitness center, and nature trails through the adjoining wetland and forest. The onsite Miro restaurant provides great views of the coast with excellent food and service. With all that said, you pay for the experience, and this resort is not for the budget traveler. In addition to quoted rates there is a $25 per day resort fee that includes a bottle of wine in your room, two bottles of water, access to fitness center and spa, and internet access.\\ndirections: None\\nemail: None\\nfax: None\\nfree_breakfast: True\\nfree_internet: False\\nfree_parking: False\\ngeo: {\\'accuracy\\': \\'ROOFTOP\\', \\'lat\\': 34.43429, \\'lon\\': -119.92137}\\nid: 10180\\nname: Bacara Resort & Spa\\npets_ok: False\\nphone: None\\nprice: $300-$1000+\\npublic_likes: [\\'Arnoldo Towne\\', \\'Olaf Turcotte\\', \\'Ruben Volkman\\', \\'Adella Aufderhar\\', \\'Elwyn Franecki\\']\\nreviews: [{\\'author\\': \\'Delmer Cole\\', \\'content\\': \"Jane and Joyce make every effort to see to your personal needs and comfort. The rooms take one back in time to the original styles and designs of the 1800\\'s. A real connection to local residents, the 905 is a regular tour stop and the oldest hotel in the French Quarter. My wife and I prefer to stay in the first floor rooms where there is a sitting room with TV, bedroom, bath and kitchen. The kitchen has a stove and refrigerator, sink, coffeemaker, etc. Plus there is a streetside private entrance (very good security system) and a covered balcony area with seating so you can watch passersby. Quaint, cozy, and most of all: ORIGINAL. No plastic remods. Feels like my great Grandmother\\'s place. While there are more luxurious places to stay, if you want the real flavor and eclectic style of N.O. you have to stay here. It just FEELS like New Orleans. The location is one block towards the river from Bourbon Street and smack dab in the middle of everything. Royal street is one of the nicest residential streets in the Quarter and you can walk back to your room and get some peace and quiet whenever you like. The French Quarter is always busy so we bring a small fan to turn on to make some white noise so we can sleep more soundly. Works great. You might not need it at the 905 but it\\'s a necessity it if you stay on or near Bourbon Street, which is very loud all the time. Parking tips: You can park right in front to unload and it\\'s only a couple blocks to the secure riverfront parking area. Plus there are several public parking lots nearby. My strategy is to get there early, unload, and drive around for a while near the hotel. It\\'s not too hard to find a parking place but be careful about where it is. Stay away from corner spots since streets are narrow and delivery trucks don\\'t have the room to turn and they will hit your car. Take note of the signs. Tuesday and Thursday they clean the streets and you can\\'t park in many areas when they do or they will tow your car. Once you find a spot don\\'t move it since everything is walking distance. If you find a good spot and get a ticket it will cost $20, which is cheaper than the daily rate at most parking garages. Even if you don\\'t get a ticket make sure to go online to N.O. traffic ticket site to check your license number for violations. Some local kids think it\\'s funny to take your ticket and throw it away since the fine doubles every month it\\'s not paid. You don\\'t know you got a ticket but your fine is getting bigger. We\\'ve been coming to the French Quarter for years and have stayed at many of the local hotels. The 905 Royal is our favorite.\", \\'date\\': \\'2013-12-05 09:27:07 +0300\\', \\'ratings\\': {\\'Cleanliness\\': 5, \\'Location\\': 5, \\'Overall\\': 5, \\'Rooms\\': 5, \\'Service\\': 5, \\'Sleep Quality\\': 5, \\'Value\\': 5}}, {\\'author\\': \\'Orval Lebsack\\', \\'content\\': \\'I stayed there with a friend for a girls trip around St. Patricks Day. This was my third time to NOLA, my first at Chateau Lemoyne. The location is excellent....very easy walking distance to everything, without the chaos of staying right on Bourbon Street. Even though its a Holiday Inn, it still has the historical feel and look of NOLA. The pool looked nice too, even though we never used it. The staff was friendly and helpful. Chateau Lemoyne would be hard to top, considering the price.\\', \\'date\\': \\'2013-10-26 15:01:39 +0300\\', \\'ratings\\': {\\'Cleanliness\\': 5, \\'Location\\': 5, \\'Overall\\': 4, \\'Rooms\\': 4, \\'Service\\': 4, \\'Sleep Quality\\': 5, \\'Value\\': 4}}, {\\'author\\': \\'Hildegard Larkin\\', \\'content\\': \\'This hotel is a safe bet for a value stay in French Quarter. Close enough to all sites and action but just out of the real loud & noisy streets. Check in is quick and friendly and room ( king side balcony) while dated was good size and clean. Small balcony with table & chairs is a nice option for evening drink & passing sites below. Down side is no mimi bar fridge ( they are available upon request on a first come basis apparently, so book one when you make initial reservation if necessary) Bathroom is adequate with ok shower pressure and housekeeping is quick and efficient. TIP; forget paying high price for conducted local tours, just take the red trams to end of line and back and then next day the green tram to cross town garden district and zoo and museums. cost for each ride $2.00 each way!! fantastic. Tip: If you stay during hot weather make sure you top up on ice early as later guests can \"run the machine dry\" for short time. Overall experience met expectations and would recommend for value stay.\\', \\'date\\': \\'2012-01-01 18:48:30 +0300\\', \\'ratings\\': {\\'Cleanliness\\': 4, \\'Location\\': 4, \\'Overall\\': 4, \\'Rooms\\': 3, \\'Service\\': 4, \\'Sleep Quality\\': 3, \\'Value\\': 4}}, {\\'author\\': \\'Uriah Rohan\\', \\'content\\': \\'The Chateau Le Moyne Holiday Inn is in a perfect location in the French Quarter, a block away from the craziness on Bourbon St. We got a fantastic deal on Priceline and were expecting a standard room for the price. The pleasant hotel clerk upgraded our room much to our delight, without us asking and the concierge also went above an beyond to assist us with information and suggestions for places to dine and possessed an \"can do\" attitude. Nice pool area to cool off in during the midday NOLA heat. It is definitely a three star establishment, not super luxurious but the beds were comfy and the location superb! If you can get a deal on Priceline, etc, it\\\\\\'s a great value.\\', \\'date\\': \\'2014-08-04 15:17:49 +0300\\', \\'ratings\\': {\\'Cleanliness\\': 4, \\'Location\\': 5, \\'Overall\\': 4, \\'Rooms\\': 3, \\'Service\\': 5, \\'Sleep Quality\\': 4, \\'Value\\': 4}}]\\nstate: California\\ntitle: Goleta\\ntollfree: None\\ntype: hotel\\nurl: http://www.bacararesort.com/\\nvacancy: True')]\n" + ] + } + ], + "source": [ + "docs = loader.load()\n", + "print(docs)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c1cd6e4b-c38f-4ef8-8795-94d59a3320f9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "page_content='address: 8301 Hollister Ave\\nalias: None\\ncheckin: 12PM\\ncheckout: 4PM\\ncity: Santa Barbara\\ncountry: United States\\ndescription: Located on 78 acres of oceanfront property, this resort is an upscale experience that caters to luxury travelers. There are 354 guest rooms in 19 separate villas, each in a Spanish style. Property amenities include saline infinity pools, a private beach, clay tennis courts, a 42,000 foot spa and fitness center, and nature trails through the adjoining wetland and forest. The onsite Miro restaurant provides great views of the coast with excellent food and service. With all that said, you pay for the experience, and this resort is not for the budget traveler. In addition to quoted rates there is a $25 per day resort fee that includes a bottle of wine in your room, two bottles of water, access to fitness center and spa, and internet access.\\ndirections: None\\nemail: None\\nfax: None\\nfree_breakfast: True\\nfree_internet: False\\nfree_parking: False\\ngeo: {\\'accuracy\\': \\'ROOFTOP\\', \\'lat\\': 34.43429, \\'lon\\': -119.92137}\\nid: 10180\\nname: Bacara Resort & Spa\\npets_ok: False\\nphone: None\\nprice: $300-$1000+\\npublic_likes: [\\'Arnoldo Towne\\', \\'Olaf Turcotte\\', \\'Ruben Volkman\\', \\'Adella Aufderhar\\', \\'Elwyn Franecki\\']\\nreviews: [{\\'author\\': \\'Delmer Cole\\', \\'content\\': \"Jane and Joyce make every effort to see to your personal needs and comfort. The rooms take one back in time to the original styles and designs of the 1800\\'s. A real connection to local residents, the 905 is a regular tour stop and the oldest hotel in the French Quarter. My wife and I prefer to stay in the first floor rooms where there is a sitting room with TV, bedroom, bath and kitchen. The kitchen has a stove and refrigerator, sink, coffeemaker, etc. Plus there is a streetside private entrance (very good security system) and a covered balcony area with seating so you can watch passersby. Quaint, cozy, and most of all: ORIGINAL. No plastic remods. Feels like my great Grandmother\\'s place. While there are more luxurious places to stay, if you want the real flavor and eclectic style of N.O. you have to stay here. It just FEELS like New Orleans. The location is one block towards the river from Bourbon Street and smack dab in the middle of everything. Royal street is one of the nicest residential streets in the Quarter and you can walk back to your room and get some peace and quiet whenever you like. The French Quarter is always busy so we bring a small fan to turn on to make some white noise so we can sleep more soundly. Works great. You might not need it at the 905 but it\\'s a necessity it if you stay on or near Bourbon Street, which is very loud all the time. Parking tips: You can park right in front to unload and it\\'s only a couple blocks to the secure riverfront parking area. Plus there are several public parking lots nearby. My strategy is to get there early, unload, and drive around for a while near the hotel. It\\'s not too hard to find a parking place but be careful about where it is. Stay away from corner spots since streets are narrow and delivery trucks don\\'t have the room to turn and they will hit your car. Take note of the signs. Tuesday and Thursday they clean the streets and you can\\'t park in many areas when they do or they will tow your car. Once you find a spot don\\'t move it since everything is walking distance. If you find a good spot and get a ticket it will cost $20, which is cheaper than the daily rate at most parking garages. Even if you don\\'t get a ticket make sure to go online to N.O. traffic ticket site to check your license number for violations. Some local kids think it\\'s funny to take your ticket and throw it away since the fine doubles every month it\\'s not paid. You don\\'t know you got a ticket but your fine is getting bigger. We\\'ve been coming to the French Quarter for years and have stayed at many of the local hotels. The 905 Royal is our favorite.\", \\'date\\': \\'2013-12-05 09:27:07 +0300\\', \\'ratings\\': {\\'Cleanliness\\': 5, \\'Location\\': 5, \\'Overall\\': 5, \\'Rooms\\': 5, \\'Service\\': 5, \\'Sleep Quality\\': 5, \\'Value\\': 5}}, {\\'author\\': \\'Orval Lebsack\\', \\'content\\': \\'I stayed there with a friend for a girls trip around St. Patricks Day. This was my third time to NOLA, my first at Chateau Lemoyne. The location is excellent....very easy walking distance to everything, without the chaos of staying right on Bourbon Street. Even though its a Holiday Inn, it still has the historical feel and look of NOLA. The pool looked nice too, even though we never used it. The staff was friendly and helpful. Chateau Lemoyne would be hard to top, considering the price.\\', \\'date\\': \\'2013-10-26 15:01:39 +0300\\', \\'ratings\\': {\\'Cleanliness\\': 5, \\'Location\\': 5, \\'Overall\\': 4, \\'Rooms\\': 4, \\'Service\\': 4, \\'Sleep Quality\\': 5, \\'Value\\': 4}}, {\\'author\\': \\'Hildegard Larkin\\', \\'content\\': \\'This hotel is a safe bet for a value stay in French Quarter. Close enough to all sites and action but just out of the real loud & noisy streets. Check in is quick and friendly and room ( king side balcony) while dated was good size and clean. Small balcony with table & chairs is a nice option for evening drink & passing sites below. Down side is no mimi bar fridge ( they are available upon request on a first come basis apparently, so book one when you make initial reservation if necessary) Bathroom is adequate with ok shower pressure and housekeeping is quick and efficient. TIP; forget paying high price for conducted local tours, just take the red trams to end of line and back and then next day the green tram to cross town garden district and zoo and museums. cost for each ride $2.00 each way!! fantastic. Tip: If you stay during hot weather make sure you top up on ice early as later guests can \"run the machine dry\" for short time. Overall experience met expectations and would recommend for value stay.\\', \\'date\\': \\'2012-01-01 18:48:30 +0300\\', \\'ratings\\': {\\'Cleanliness\\': 4, \\'Location\\': 4, \\'Overall\\': 4, \\'Rooms\\': 3, \\'Service\\': 4, \\'Sleep Quality\\': 3, \\'Value\\': 4}}, {\\'author\\': \\'Uriah Rohan\\', \\'content\\': \\'The Chateau Le Moyne Holiday Inn is in a perfect location in the French Quarter, a block away from the craziness on Bourbon St. We got a fantastic deal on Priceline and were expecting a standard room for the price. The pleasant hotel clerk upgraded our room much to our delight, without us asking and the concierge also went above an beyond to assist us with information and suggestions for places to dine and possessed an \"can do\" attitude. Nice pool area to cool off in during the midday NOLA heat. It is definitely a three star establishment, not super luxurious but the beds were comfy and the location superb! If you can get a deal on Priceline, etc, it\\\\\\'s a great value.\\', \\'date\\': \\'2014-08-04 15:17:49 +0300\\', \\'ratings\\': {\\'Cleanliness\\': 4, \\'Location\\': 5, \\'Overall\\': 4, \\'Rooms\\': 3, \\'Service\\': 5, \\'Sleep Quality\\': 4, \\'Value\\': 4}}]\\nstate: California\\ntitle: Goleta\\ntollfree: None\\ntype: hotel\\nurl: http://www.bacararesort.com/\\nvacancy: True'\n" + ] + } + ], + "source": [ + "docs_iterator = loader.lazy_load()\n", + "for doc in docs_iterator:\n", + " print(doc)\n", + " break" + ] + }, + { + "cell_type": "markdown", + "id": "9e48f5c1-d573-4859-846a-09d127340a3c", + "metadata": {}, + "source": [ + "## Specifying Fields with Content and Metadata\n", + "The fields that are part of the Document content can be specified using the `page_content_fields` parameter.\n", + "The metadata fields for the Document can be specified using the `metadata_fields` parameter." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "9312bf1c-a32b-458d-9407-0e204c3e08f0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Document(page_content='address: 8301 Hollister Ave\\ncity: Santa Barbara\\ncountry: United States\\ndescription: Located on 78 acres of oceanfront property, this resort is an upscale experience that caters to luxury travelers. There are 354 guest rooms in 19 separate villas, each in a Spanish style. Property amenities include saline infinity pools, a private beach, clay tennis courts, a 42,000 foot spa and fitness center, and nature trails through the adjoining wetland and forest. The onsite Miro restaurant provides great views of the coast with excellent food and service. With all that said, you pay for the experience, and this resort is not for the budget traveler. In addition to quoted rates there is a $25 per day resort fee that includes a bottle of wine in your room, two bottles of water, access to fitness center and spa, and internet access.\\ngeo: {\\'accuracy\\': \\'ROOFTOP\\', \\'lat\\': 34.43429, \\'lon\\': -119.92137}\\nname: Bacara Resort & Spa\\nphone: None\\nreviews: [{\\'author\\': \\'Delmer Cole\\', \\'content\\': \"Jane and Joyce make every effort to see to your personal needs and comfort. The rooms take one back in time to the original styles and designs of the 1800\\'s. A real connection to local residents, the 905 is a regular tour stop and the oldest hotel in the French Quarter. My wife and I prefer to stay in the first floor rooms where there is a sitting room with TV, bedroom, bath and kitchen. The kitchen has a stove and refrigerator, sink, coffeemaker, etc. Plus there is a streetside private entrance (very good security system) and a covered balcony area with seating so you can watch passersby. Quaint, cozy, and most of all: ORIGINAL. No plastic remods. Feels like my great Grandmother\\'s place. While there are more luxurious places to stay, if you want the real flavor and eclectic style of N.O. you have to stay here. It just FEELS like New Orleans. The location is one block towards the river from Bourbon Street and smack dab in the middle of everything. Royal street is one of the nicest residential streets in the Quarter and you can walk back to your room and get some peace and quiet whenever you like. The French Quarter is always busy so we bring a small fan to turn on to make some white noise so we can sleep more soundly. Works great. You might not need it at the 905 but it\\'s a necessity it if you stay on or near Bourbon Street, which is very loud all the time. Parking tips: You can park right in front to unload and it\\'s only a couple blocks to the secure riverfront parking area. Plus there are several public parking lots nearby. My strategy is to get there early, unload, and drive around for a while near the hotel. It\\'s not too hard to find a parking place but be careful about where it is. Stay away from corner spots since streets are narrow and delivery trucks don\\'t have the room to turn and they will hit your car. Take note of the signs. Tuesday and Thursday they clean the streets and you can\\'t park in many areas when they do or they will tow your car. Once you find a spot don\\'t move it since everything is walking distance. If you find a good spot and get a ticket it will cost $20, which is cheaper than the daily rate at most parking garages. Even if you don\\'t get a ticket make sure to go online to N.O. traffic ticket site to check your license number for violations. Some local kids think it\\'s funny to take your ticket and throw it away since the fine doubles every month it\\'s not paid. You don\\'t know you got a ticket but your fine is getting bigger. We\\'ve been coming to the French Quarter for years and have stayed at many of the local hotels. The 905 Royal is our favorite.\", \\'date\\': \\'2013-12-05 09:27:07 +0300\\', \\'ratings\\': {\\'Cleanliness\\': 5, \\'Location\\': 5, \\'Overall\\': 5, \\'Rooms\\': 5, \\'Service\\': 5, \\'Sleep Quality\\': 5, \\'Value\\': 5}}, {\\'author\\': \\'Orval Lebsack\\', \\'content\\': \\'I stayed there with a friend for a girls trip around St. Patricks Day. This was my third time to NOLA, my first at Chateau Lemoyne. The location is excellent....very easy walking distance to everything, without the chaos of staying right on Bourbon Street. Even though its a Holiday Inn, it still has the historical feel and look of NOLA. The pool looked nice too, even though we never used it. The staff was friendly and helpful. Chateau Lemoyne would be hard to top, considering the price.\\', \\'date\\': \\'2013-10-26 15:01:39 +0300\\', \\'ratings\\': {\\'Cleanliness\\': 5, \\'Location\\': 5, \\'Overall\\': 4, \\'Rooms\\': 4, \\'Service\\': 4, \\'Sleep Quality\\': 5, \\'Value\\': 4}}, {\\'author\\': \\'Hildegard Larkin\\', \\'content\\': \\'This hotel is a safe bet for a value stay in French Quarter. Close enough to all sites and action but just out of the real loud & noisy streets. Check in is quick and friendly and room ( king side balcony) while dated was good size and clean. Small balcony with table & chairs is a nice option for evening drink & passing sites below. Down side is no mimi bar fridge ( they are available upon request on a first come basis apparently, so book one when you make initial reservation if necessary) Bathroom is adequate with ok shower pressure and housekeeping is quick and efficient. TIP; forget paying high price for conducted local tours, just take the red trams to end of line and back and then next day the green tram to cross town garden district and zoo and museums. cost for each ride $2.00 each way!! fantastic. Tip: If you stay during hot weather make sure you top up on ice early as later guests can \"run the machine dry\" for short time. Overall experience met expectations and would recommend for value stay.\\', \\'date\\': \\'2012-01-01 18:48:30 +0300\\', \\'ratings\\': {\\'Cleanliness\\': 4, \\'Location\\': 4, \\'Overall\\': 4, \\'Rooms\\': 3, \\'Service\\': 4, \\'Sleep Quality\\': 3, \\'Value\\': 4}}, {\\'author\\': \\'Uriah Rohan\\', \\'content\\': \\'The Chateau Le Moyne Holiday Inn is in a perfect location in the French Quarter, a block away from the craziness on Bourbon St. We got a fantastic deal on Priceline and were expecting a standard room for the price. The pleasant hotel clerk upgraded our room much to our delight, without us asking and the concierge also went above an beyond to assist us with information and suggestions for places to dine and possessed an \"can do\" attitude. Nice pool area to cool off in during the midday NOLA heat. It is definitely a three star establishment, not super luxurious but the beds were comfy and the location superb! If you can get a deal on Priceline, etc, it\\\\\\'s a great value.\\', \\'date\\': \\'2014-08-04 15:17:49 +0300\\', \\'ratings\\': {\\'Cleanliness\\': 4, \\'Location\\': 5, \\'Overall\\': 4, \\'Rooms\\': 3, \\'Service\\': 5, \\'Sleep Quality\\': 4, \\'Value\\': 4}}]', metadata={'id': 10180})]\n" + ] + } + ], + "source": [ + "loader_with_selected_fields = CouchbaseLoader(\n", + " connection_string,\n", + " db_username,\n", + " db_password,\n", + " query,\n", + " page_content_fields=[\n", + " \"address\",\n", + " \"name\",\n", + " \"city\",\n", + " \"phone\",\n", + " \"country\",\n", + " \"geo\",\n", + " \"description\",\n", + " \"reviews\",\n", + " ],\n", + " metadata_fields=[\"id\"],\n", + ")\n", + "docs_with_selected_fields = loader_with_selected_fields.load()\n", + "print(docs_with_selected_fields)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/document_loaders/__init__.py b/libs/langchain/langchain/document_loaders/__init__.py index d52d3b955b072..ba3867ffcb43c 100644 --- a/libs/langchain/langchain/document_loaders/__init__.py +++ b/libs/langchain/langchain/document_loaders/__init__.py @@ -62,6 +62,7 @@ from langchain.document_loaders.concurrent import ConcurrentLoader from langchain.document_loaders.confluence import ConfluenceLoader from langchain.document_loaders.conllu import CoNLLULoader +from langchain.document_loaders.couchbase import CouchbaseLoader from langchain.document_loaders.csv_loader import CSVLoader, UnstructuredCSVLoader from langchain.document_loaders.cube_semantic import CubeSemanticLoader from langchain.document_loaders.datadog_logs import DatadogLogsLoader @@ -247,6 +248,7 @@ "CollegeConfidentialLoader", "ConcurrentLoader", "ConfluenceLoader", + "CouchbaseLoader", "CubeSemanticLoader", "DataFrameLoader", "DatadogLogsLoader", diff --git a/libs/langchain/langchain/document_loaders/couchbase.py b/libs/langchain/langchain/document_loaders/couchbase.py new file mode 100644 index 0000000000000..8f8659a0c60b8 --- /dev/null +++ b/libs/langchain/langchain/document_loaders/couchbase.py @@ -0,0 +1,100 @@ +import logging +from typing import Iterator, List, Optional + +from langchain_core.documents import Document + +from langchain.document_loaders.base import BaseLoader + +logger = logging.getLogger(__name__) + + +class CouchbaseLoader(BaseLoader): + """Load documents from `Couchbase`. + + Each document represents one row of the result. The `page_content_fields` are + written into the `page_content`of the document. The `metadata_fields` are written + into the `metadata` of the document. By default, all columns are written into + the `page_content` and none into the `metadata`. + """ + + def __init__( + self, + connection_string: str, + db_username: str, + db_password: str, + query: str, + *, + page_content_fields: Optional[List[str]] = None, + metadata_fields: Optional[List[str]] = None, + ) -> None: + """Initialize Couchbase document loader. + + Args: + connection_string (str): The connection string to the Couchbase cluster. + db_username (str): The username to connect to the Couchbase cluster. + db_password (str): The password to connect to the Couchbase cluster. + query (str): The SQL++ query to execute. + page_content_fields (Optional[List[str]]): The columns to write into the + `page_content` field of the document. By default, all columns are + written. + metadata_fields (Optional[List[str]]): The columns to write into the + `metadata` field of the document. By default, no columns are written. + """ + try: + from couchbase.auth import PasswordAuthenticator + from couchbase.cluster import Cluster + from couchbase.options import ClusterOptions + except ImportError as e: + raise ImportError( + "Could not import couchbase package." + "Please install couchbase SDK with `pip install couchbase`." + ) from e + if not connection_string: + raise ValueError("connection_string must be provided.") + + if not db_username: + raise ValueError("db_username must be provided.") + + if not db_password: + raise ValueError("db_password must be provided.") + + auth = PasswordAuthenticator( + db_username, + db_password, + ) + + self.cluster: Cluster = Cluster(connection_string, ClusterOptions(auth)) + self.query = query + self.page_content_fields = page_content_fields + self.metadata_fields = metadata_fields + + def load(self) -> List[Document]: + """Load Couchbase data into Document objects.""" + return list(self.lazy_load()) + + def lazy_load(self) -> Iterator[Document]: + """Load Couchbase data into Document objects lazily.""" + from datetime import timedelta + + # Ensure connection to Couchbase cluster + self.cluster.wait_until_ready(timedelta(seconds=5)) + + # Run SQL++ Query + result = self.cluster.query(self.query) + for row in result: + metadata_fields = self.metadata_fields + page_content_fields = self.page_content_fields + + if not page_content_fields: + page_content_fields = list(row.keys()) + + if not metadata_fields: + metadata_fields = [] + + metadata = {field: row[field] for field in metadata_fields} + + document = "\n".join( + f"{k}: {v}" for k, v in row.items() if k in page_content_fields + ) + + yield (Document(page_content=document, metadata=metadata)) diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index 717a022829d74..4c983673e8c00 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -1662,6 +1662,40 @@ lint = ["black (>=22.6.0)", "mdformat (>0.7)", "mdformat-gfm (>=0.3.5)", "ruff ( test = ["pytest"] typing = ["mypy (>=0.990)"] +[[package]] +name = "couchbase" +version = "4.1.9" +description = "Python Client for Couchbase" +optional = true +python-versions = ">=3.7" +files = [ + {file = "couchbase-4.1.9-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:f36f65d5ea66ebebe8f9055feb44c72b60b64b8c466ee177c7eaf6d97b71b41a"}, + {file = "couchbase-4.1.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b750cb641a44084137444e86ba2cf596e713dceaaa8dcd4a09c370ddd5e3bca2"}, + {file = "couchbase-4.1.9-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:371f4c5e00965d6579e98cd6e49eb8543e3aeabb64d9ac41dae5b85c831faed4"}, + {file = "couchbase-4.1.9-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cfe53bfa29d72d5fa921554408ff7fada301e4641b652f2551060ebd3d1cc096"}, + {file = "couchbase-4.1.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d675d0d862eae34ebedd09e4f40e26ac0092ea0dca93520616cd68d195a1fb3a"}, + {file = "couchbase-4.1.9-cp310-cp310-win_amd64.whl", hash = "sha256:c8adc08a70cbe5e1b1e0e45ebbb4ea5879b3f1aba64d09770d6e35a760201609"}, + {file = "couchbase-4.1.9-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:8f3e16fedb2dd79dba81df5eb1fb6e493ee720ef12be5a2699ac540955775647"}, + {file = "couchbase-4.1.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8bb93e17304499fb9b6972efe8a75ea156a097eed983b4802a478ad6cef500b3"}, + {file = "couchbase-4.1.9-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:85da68da9efd5ed35d031a5725744ee36653f940ad16c252d9927f481581366c"}, + {file = "couchbase-4.1.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e04f014a8990c89195689af4d332028a6769b45221d861778c079e9f67184e6e"}, + {file = "couchbase-4.1.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:50db238605620ca1a2f4ed36f7820a2d61323a8a425986fd3caf1d9be4eb7f46"}, + {file = "couchbase-4.1.9-cp311-cp311-win_amd64.whl", hash = "sha256:ba9312755c88d39d86cae7ba11c15a6255d8afe5c552bbc1e2f6b66c880bd08e"}, + {file = "couchbase-4.1.9-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:17bdf8db3721e4f7c54b7e50db16fa6c65733d45cfd6c3bf50cd80a7f1672ea8"}, + {file = "couchbase-4.1.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a2fb14336b78843691a9f007fbbd0c33959ea4ae4e323112614673601772fb84"}, + {file = "couchbase-4.1.9-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3af36a4b25f948a4dd1a349ba5ddfa87a228cbdfbb8228a5045e187849392857"}, + {file = "couchbase-4.1.9-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f1a6d03fb4fc76aedeede7a55f957936863256b654ce38f05a508925cbd1c713"}, + {file = "couchbase-4.1.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:454c46c9fb6e485f1aba53f64a4b794e2146db480ccd32eaa80b2bba0f53895e"}, + {file = "couchbase-4.1.9-cp38-cp38-win_amd64.whl", hash = "sha256:4c35c2ef600677121b95540c8e78bb43ce5d18cafd49036ea256643ed00ac042"}, + {file = "couchbase-4.1.9-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:40bebe809042efceae95fba8d2a1f0bfecd144c090cf638d8283e038ffea6f19"}, + {file = "couchbase-4.1.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f9e956b6580baf4365c4a1b4e22622dc0948447f5ce106d24ed59532302b164f"}, + {file = "couchbase-4.1.9-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:150916388ee2813d242de014fb3ad5e259103e5cd0f1ce600280cc1c11732980"}, + {file = "couchbase-4.1.9-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bf2d1fc8fe22f6e3e4b5e41c7fc367a3a4537dd272a26859f01796724d2ae977"}, + {file = "couchbase-4.1.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9d9ffbb6897a3e68193a8611032230e5d520ae07ae74923305acf8670eb5281b"}, + {file = "couchbase-4.1.9-cp39-cp39-win_amd64.whl", hash = "sha256:b11ff93f4b5da9437fdfb384943dfbf0dac054394d30d21b5e50852dc1d27d2a"}, + {file = "couchbase-4.1.9.tar.gz", hash = "sha256:ee476c5e5b420610e5f4ce778b8c6c7a513f9f4dd4b57fe25000e94ad6eefb9e"}, +] + [[package]] name = "coverage" version = "7.3.2" @@ -11475,7 +11509,7 @@ cli = ["typer"] cohere = ["cohere"] docarray = ["docarray"] embeddings = ["sentence-transformers"] -extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "dashvector", "databricks-vectorsearch", "datasets", "dgml-utils", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "hologres-vector", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"] +extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cohere", "couchbase", "dashvector", "databricks-vectorsearch", "datasets", "dgml-utils", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "hologres-vector", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "praw", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"] javascript = ["esprima"] llms = ["clarifai", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"] openai = ["openai", "tiktoken"] @@ -11485,4 +11519,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "0cd9769243ade0dc1df941e902aa66c18a57333ae50309f004b4f60e6e27b5cf" +content-hash = "f4791327aca4bf3db1b46731d987347b537e638a1be85b2a6a771e52f95d3f29" diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index 31e0a60f89a6a..7377756009ab5 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -147,6 +147,7 @@ hologres-vector = {version = "^0.0.6", optional = true} praw = {version = "^7.7.1", optional = true} msal = {version = "^1.25.0", optional = true} databricks-vectorsearch = {version = "^0.21", optional = true} +couchbase = {version = "^4.1.9", optional = true} dgml-utils = {version = "^0.3.0", optional = true} datasets = {version = "^2.15.0", optional = true} @@ -391,6 +392,7 @@ extended_testing = [ "hologres-vector", "praw", "databricks-vectorsearch", + "couchbase", "dgml-utils", "cohere", ] diff --git a/libs/langchain/tests/integration_tests/document_loaders/test_couchbase.py b/libs/langchain/tests/integration_tests/document_loaders/test_couchbase.py new file mode 100644 index 0000000000000..d4585d0796b54 --- /dev/null +++ b/libs/langchain/tests/integration_tests/document_loaders/test_couchbase.py @@ -0,0 +1,44 @@ +import unittest + +from langchain.document_loaders.couchbase import CouchbaseLoader + +try: + import couchbase # noqa: F401 + + couchbase_installed = True +except ImportError: + couchbase_installed = False + + +@unittest.skipIf(not couchbase_installed, "couchbase not installed") +class TestCouchbaseLoader(unittest.TestCase): + def setUp(self) -> None: + self.conn_string = "" + self.database_user = "" + self.database_password = "" + self.valid_query = "select h.* from `travel-sample`.inventory.hotel h limit 10" + self.valid_page_content_fields = ["country", "name", "description"] + self.valid_metadata_fields = ["id"] + + def test_couchbase_loader(self) -> None: + """Test Couchbase loader.""" + loader = CouchbaseLoader( + connection_string=self.conn_string, + db_username=self.database_user, + db_password=self.database_password, + query=self.valid_query, + page_content_fields=self.valid_page_content_fields, + metadata_fields=self.valid_metadata_fields, + ) + docs = loader.load() + print(docs) + + assert len(docs) > 0 # assuming the query returns at least one document + for doc in docs: + print(doc) + assert ( + doc.page_content != "" + ) # assuming that every document has page_content + assert ( + "id" in doc.metadata and doc.metadata["id"] != "" + ) # assuming that every document has 'id' diff --git a/libs/langchain/tests/unit_tests/document_loaders/test_couchbase.py b/libs/langchain/tests/unit_tests/document_loaders/test_couchbase.py new file mode 100644 index 0000000000000..ec05691cbb0ec --- /dev/null +++ b/libs/langchain/tests/unit_tests/document_loaders/test_couchbase.py @@ -0,0 +1,6 @@ +"""Test importing the Couchbase document loader.""" + + +def test_couchbase_import() -> None: + """Test that the Couchbase document loader can be imported.""" + from langchain.document_loaders import CouchbaseLoader # noqa: F401 diff --git a/libs/langchain/tests/unit_tests/document_loaders/test_imports.py b/libs/langchain/tests/unit_tests/document_loaders/test_imports.py index 5da2e800ab87b..18f6f22a5f00d 100644 --- a/libs/langchain/tests/unit_tests/document_loaders/test_imports.py +++ b/libs/langchain/tests/unit_tests/document_loaders/test_imports.py @@ -41,6 +41,7 @@ "CollegeConfidentialLoader", "ConcurrentLoader", "ConfluenceLoader", + "CouchbaseLoader", "CubeSemanticLoader", "DataFrameLoader", "DatadogLogsLoader", From f6d68d78f38cb0874432331d29892728a066229b Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 4 Dec 2023 12:50:56 -0800 Subject: [PATCH 15/49] nbdoc -> quarto (#14156) Switches to a more maintained solution for building ipynb -> md files (`quarto`) Also bumps us down to python3.8 because it's significantly faster in the vercel build step. Uses default openssl version instead of upgrading as well. --- docs/.local_build.sh | 10 ++- docs/docs/expression_language/why.ipynb | 47 +++++++------- docs/docs/guides/debugging.md | 2 +- docs/docs/guides/local_llms.ipynb | 6 +- .../modules/chains/document/map_reduce.ipynb | 2 +- .../modules/chains/document/map_rerank.ipynb | 2 +- .../docs/modules/chains/document/refine.ipynb | 2 +- docs/docs/modules/chains/document/stuff.ipynb | 2 +- docs/docs/use_cases/apis.ipynb | 8 +-- docs/docs/use_cases/chatbots.ipynb | 4 +- docs/docs/use_cases/extraction.ipynb | 14 +++-- docs/docs/use_cases/qa_structured/sql.ipynb | 8 +-- .../code_understanding.ipynb | 4 +- .../use_cases/question_answering/index.ipynb | 4 +- docs/docs/use_cases/summarization.ipynb | 6 +- docs/docs/use_cases/tagging.ipynb | 4 +- docs/docs/use_cases/web_scraping.ipynb | 6 +- docs/vercel_build.sh | 61 +++++-------------- docs/vercel_requirements.txt | 2 +- 19 files changed, 84 insertions(+), 110 deletions(-) diff --git a/docs/.local_build.sh b/docs/.local_build.sh index ee1bd67b30c73..0c67d35024a70 100755 --- a/docs/.local_build.sh +++ b/docs/.local_build.sh @@ -12,14 +12,12 @@ mkdir -p ../_dist rsync -ruv . ../_dist cd ../_dist poetry run python scripts/model_feat_table.py -poetry run nbdoc_build --srcdir docs --pause 0 -mkdir docs/templates -cp ../templates/docs/INDEX.md docs/templates/index.md cp ../cookbook/README.md src/pages/cookbook.mdx cp ../.github/CONTRIBUTING.md docs/contributing.md mkdir -p docs/templates cp ../templates/docs/INDEX.md docs/templates/index.md wget https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O docs/langserve.md -poetry run python scripts/generate_api_reference_links.py -yarn install -yarn start + +yarn + +quarto preview docs diff --git a/docs/docs/expression_language/why.ipynb b/docs/docs/expression_language/why.ipynb index aac03fed562df..a1e17a1cbddae 100644 --- a/docs/docs/expression_language/why.ipynb +++ b/docs/docs/expression_language/why.ipynb @@ -10,7 +10,7 @@ "title: Why use LCEL\n", "---\n", "\n", - "import { ColumnContainer, Column } from '@theme/Columns';" + "{ import { ColumnContainer, Column } from \"@theme/Columns\"; }" ] }, { @@ -18,7 +18,8 @@ "id": "919a5ae2-ed21-4923-b98f-723c111bac67", "metadata": {}, "source": [ - ":::tip We recommend reading the LCEL [Get started](/docs/expression_language/get_started) section first.\n", + ":::tip \n", + "We recommend reading the LCEL [Get started](/docs/expression_language/get_started) section first.\n", ":::" ] }, @@ -62,11 +63,12 @@ "In the simplest case, we just want to pass in a topic string and get back a joke string:\n", "\n", "\n", + "\n", "\n", "\n", "#### Without LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -76,6 +78,7 @@ "metadata": {}, "outputs": [], "source": [ + "\n", "from typing import List\n", "\n", "import openai\n", @@ -111,7 +114,7 @@ "\n", "#### LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -156,7 +159,7 @@ "\n", "#### Without LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -201,7 +204,7 @@ "\n", "#### LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -233,7 +236,7 @@ "\n", "#### Without LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -265,7 +268,7 @@ "\n", "#### LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -296,7 +299,7 @@ "\n", "#### Without LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -337,7 +340,7 @@ "\n", "#### LCEL\n", "\n", - "
    \n", + "
    \n", "\n", "```python\n", "chain.ainvoke(\"ice cream\")\n", @@ -362,7 +365,7 @@ "\n", "#### Without LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -398,7 +401,7 @@ "\n", "#### LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -439,7 +442,7 @@ "\n", "#### Without LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -481,7 +484,7 @@ "\n", "#### LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -522,7 +525,7 @@ "\n", "#### Without LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -607,7 +610,7 @@ "\n", "#### With LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -677,7 +680,7 @@ "\n", "We'll `print` intermediate steps for illustrative purposes\n", "\n", - "
    " + "
    " ] }, { @@ -711,7 +714,7 @@ "#### LCEL\n", "Every component has built-in integrations with LangSmith. If we set the following two environment variables, all chain traces are logged to LangSmith.\n", "\n", - "
    " + "
    " ] }, { @@ -757,7 +760,7 @@ "#### Without LCEL\n", "\n", "\n", - "
    " + "
    " ] }, { @@ -804,7 +807,7 @@ "\n", "#### LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -845,7 +848,7 @@ "\n", "#### Without LCEL\n", "\n", - "
    " + "
    " ] }, { @@ -1029,7 +1032,7 @@ "\n", "#### LCEL\n", "\n", - "
    " + "
    " ] }, { diff --git a/docs/docs/guides/debugging.md b/docs/docs/guides/debugging.md index 6fefa9fa5bf26..a0ac5a5e894bf 100644 --- a/docs/docs/guides/debugging.md +++ b/docs/docs/guides/debugging.md @@ -12,7 +12,7 @@ Platforms with tracing capabilities like [LangSmith](/docs/langsmith/) and [Wand For anyone building production-grade LLM applications, we highly recommend using a platform like this. -![LangSmith run](/img/run_details.png) +![LangSmith run](../../static/img/run_details.png) ## `set_debug` and `set_verbose` diff --git a/docs/docs/guides/local_llms.ipynb b/docs/docs/guides/local_llms.ipynb index 60e271463d598..5541d1d608f99 100644 --- a/docs/docs/guides/local_llms.ipynb +++ b/docs/docs/guides/local_llms.ipynb @@ -32,7 +32,7 @@ "1. `Base model`: What is the base-model and how was it trained?\n", "2. `Fine-tuning approach`: Was the base-model fine-tuned and, if so, what [set of instructions](https://cameronrwolfe.substack.com/p/beyond-llama-the-power-of-open-llms#%C2%A7alpaca-an-instruction-following-llama-model) was used?\n", "\n", - "![Image description](/img/OSS_LLM_overview.png)\n", + "![Image description](../../static/img/OSS_LLM_overview.png)\n", "\n", "The relative performance of these models can be assessed using several leaderboards, including:\n", "\n", @@ -55,7 +55,7 @@ "\n", "In particular, see [this excellent post](https://finbarr.ca/how-is-llama-cpp-possible/) on the importance of quantization.\n", "\n", - "![Image description](/img/llama-memory-weights.png)\n", + "![Image description](../../static/img/llama-memory-weights.png)\n", "\n", "With less precision, we radically decrease the memory needed to store the LLM in memory.\n", "\n", @@ -63,7 +63,7 @@ "\n", "A Mac M2 Max is 5-6x faster than a M1 for inference due to the larger GPU memory bandwidth.\n", "\n", - "![Image description](/img/llama_t_put.png)\n", + "![Image description](../../static/img/llama_t_put.png)\n", "\n", "## Quickstart\n", "\n", diff --git a/docs/docs/modules/chains/document/map_reduce.ipynb b/docs/docs/modules/chains/document/map_reduce.ipynb index 2214fa75c4438..126ce5c8fa847 100644 --- a/docs/docs/modules/chains/document/map_reduce.ipynb +++ b/docs/docs/modules/chains/document/map_reduce.ipynb @@ -9,7 +9,7 @@ "\n", "The map reduce documents chain first applies an LLM chain to each document individually (the Map step), treating the chain output as a new document. It then passes all the new documents to a separate combine documents chain to get a single output (the Reduce step). It can optionally first compress, or collapse, the mapped documents to make sure that they fit in the combine documents chain (which will often pass them to an LLM). This compression step is performed recursively if necessary.\n", "\n", - "![map_reduce_diagram](/img/map_reduce.jpg)" + "![map_reduce_diagram](../../../../static/img/map_reduce.jpg)" ] }, { diff --git a/docs/docs/modules/chains/document/map_rerank.ipynb b/docs/docs/modules/chains/document/map_rerank.ipynb index 1367b7abeedbf..dd6445decae5a 100644 --- a/docs/docs/modules/chains/document/map_rerank.ipynb +++ b/docs/docs/modules/chains/document/map_rerank.ipynb @@ -9,7 +9,7 @@ "\n", "The map re-rank documents chain runs an initial prompt on each document, that not only tries to complete a task but also gives a score for how certain it is in its answer. The highest scoring response is returned.\n", "\n", - "![map_rerank_diagram](/img/map_rerank.jpg)" + "![map_rerank_diagram](../../../../static/img/map_rerank.jpg)" ] }, { diff --git a/docs/docs/modules/chains/document/refine.ipynb b/docs/docs/modules/chains/document/refine.ipynb index 6869336021036..d84724def4df3 100644 --- a/docs/docs/modules/chains/document/refine.ipynb +++ b/docs/docs/modules/chains/document/refine.ipynb @@ -24,7 +24,7 @@ "The obvious tradeoff is that this chain will make far more LLM calls than, for example, the Stuff documents chain.\n", "There are also certain tasks which are difficult to accomplish iteratively. For example, the Refine chain can perform poorly when documents frequently cross-reference one another or when a task requires detailed information from many documents.\n", "\n", - "![refine_diagram](/img/refine.jpg)\n" + "![refine_diagram](../../../../static/img/refine.jpg)\n" ] }, { diff --git a/docs/docs/modules/chains/document/stuff.ipynb b/docs/docs/modules/chains/document/stuff.ipynb index e97da5bceb630..92798440fcbc8 100644 --- a/docs/docs/modules/chains/document/stuff.ipynb +++ b/docs/docs/modules/chains/document/stuff.ipynb @@ -20,7 +20,7 @@ "\n", "This chain is well-suited for applications where documents are small and only a few are passed in for most calls.\n", "\n", - "![stuff_diagram](/img/stuff.jpg)" + "![stuff_diagram](../../../../static/img/stuff.jpg)" ] }, { diff --git a/docs/docs/use_cases/apis.ipynb b/docs/docs/use_cases/apis.ipynb index 0c8d3cb239440..a051fc250f73a 100644 --- a/docs/docs/use_cases/apis.ipynb +++ b/docs/docs/use_cases/apis.ipynb @@ -34,7 +34,7 @@ "* `Functions`: For example, [OpenAI functions](https://platform.openai.com/docs/guides/gpt/function-calling) is one popular means of doing this.\n", "* `LLM-generated interface`: Use an LLM with access to API documentation to create an interface.\n", "\n", - "![Image description](/img/api_use_case.png)" + "![Image description](../../static/img/api_use_case.png)" ] }, { @@ -188,7 +188,7 @@ " }\n", " ```\n", " \n", - "![Image description](/img/api_function_call.png)\n", + "![Image description](../../static/img/api_function_call.png)\n", " \n", "* This `Dict` above split and the [API is called here](https://github.com/langchain-ai/langchain/blob/7fc07ba5df99b9fa8bef837b0fafa220bc5c932c/libs/langchain/langchain/chains/openai_functions/openapi.py#L215)." ] @@ -293,12 +293,12 @@ "\n", "* The `api_request_chain` produces the API url from our question and the API documentation:\n", "\n", - "![Image description](/img/api_chain.png)\n", + "![Image description](../../static/img/api_chain.png)\n", "\n", "* [Here](https://github.com/langchain-ai/langchain/blob/bbd22b9b761389a5e40fc45b0570e1830aabb707/libs/langchain/langchain/chains/api/base.py#L82) we make the API request with the API url.\n", "* The `api_answer_chain` takes the response from the API and provides us with a natural language response:\n", "\n", - "![Image description](/img/api_chain_response.png)" + "![Image description](../../static/img/api_chain_response.png)" ] }, { diff --git a/docs/docs/use_cases/chatbots.ipynb b/docs/docs/use_cases/chatbots.ipynb index 68a1ffc6de19c..711dedd7a44c6 100644 --- a/docs/docs/use_cases/chatbots.ipynb +++ b/docs/docs/use_cases/chatbots.ipynb @@ -30,7 +30,7 @@ "id": "56615b45", "metadata": {}, "source": [ - "![Image description](/img/chat_use_case.png)" + "![Image description](../../static/img/chat_use_case.png)" ] }, { @@ -546,7 +546,7 @@ "source": [ "We can see the chat history preserved in the prompt using the [LangSmith trace](https://smith.langchain.com/public/dce34c57-21ca-4283-9020-a8e0d78a59de/r).\n", "\n", - "![Image description](/img/chat_use_case_2.png)" + "![Image description](../../static/img/chat_use_case_2.png)" ] }, { diff --git a/docs/docs/use_cases/extraction.ipynb b/docs/docs/use_cases/extraction.ipynb index 4b5d580b5d905..7fdf12694defd 100644 --- a/docs/docs/use_cases/extraction.ipynb +++ b/docs/docs/use_cases/extraction.ipynb @@ -34,7 +34,7 @@ "id": "178dbc59", "metadata": {}, "source": [ - "![Image description](/img/extraction.png)" + "![Image description](../../static/img/extraction.png)" ] }, { @@ -139,7 +139,7 @@ "\n", "The [LangSmith trace](https://smith.langchain.com/public/72bc3205-7743-4ca6-929a-966a9d4c2a77/r) shows that we call the function `information_extraction` on the input string, `inp`.\n", "\n", - "![Image description](/img/extraction_trace_function.png)\n", + "![Image description](../../static/img/extraction_trace_function.png)\n", "\n", "This `information_extraction` function is defined [here](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/chains/openai_functions/extraction.py) and returns a dict.\n", "\n", @@ -497,7 +497,7 @@ "source": [ "We can see from the [LangSmith trace](https://smith.langchain.com/public/8e3aa858-467e-46a5-aa49-5db65f0a2b9a/r) that we get the same output as above.\n", "\n", - "![Image description](/img/extraction_trace_function_2.png)\n", + "![Image description](../../static/img/extraction_trace_function_2.png)\n", "\n", "We can see that we provide a two-shot prompt in order to instruct the LLM to output in our desired format.\n", "\n", @@ -577,7 +577,7 @@ "\n", "We can look at the [LangSmith trace](https://smith.langchain.com/public/69f11d41-41be-4319-93b0-6d0eda66e969/r) to see exactly what is going on under the hood.\n", "\n", - "![Image description](/img/extraction_trace_joke.png)\n", + "![Image description](../../static/img/extraction_trace_joke.png)\n", "\n", "### Going deeper\n", "\n", @@ -587,6 +587,12 @@ "* [JSONFormer](/docs/integrations/llms/jsonformer_experimental) offers another way for structured decoding of a subset of the JSON Schema.\n", "* [Kor](https://eyurtsev.github.io/kor/) is another library for extraction where schema and examples can be provided to the LLM." ] + }, + { + "cell_type": "markdown", + "id": "aab95ecf", + "metadata": {}, + "source": [] } ], "metadata": { diff --git a/docs/docs/use_cases/qa_structured/sql.ipynb b/docs/docs/use_cases/qa_structured/sql.ipynb index 32718d4e1a757..3f5b0981d0504 100644 --- a/docs/docs/use_cases/qa_structured/sql.ipynb +++ b/docs/docs/use_cases/qa_structured/sql.ipynb @@ -40,7 +40,7 @@ "2. `Query a SQL database` using chains for query creation and execution\n", "3. `Interact with a SQL database` using agents for robust and flexible querying \n", "\n", - "![sql_usecase.png](/img/sql_usecase.png)\n", + "![sql_usecase.png](../../../static/img/sql_usecase.png)\n", "\n", "## Quickstart\n", "\n", @@ -240,7 +240,7 @@ "* Followed by three example rows in a `SELECT` statement\n", "\n", "`create_sql_query_chain` adopts this the best practice (see more in this [blog](https://blog.langchain.dev/llms-and-sql/)). \n", - "![sql_usecase.png](/img/create_sql_query_chain.png)\n", + "![sql_usecase.png](../../../static/img/create_sql_query_chain.png)\n", "\n", "**Improvements**\n", "\n", @@ -397,7 +397,7 @@ "\n", "* Then, it executes the query and passes the results to an LLM for synthesis.\n", "\n", - "![sql_usecase.png](/img/sqldbchain_trace.png)\n", + "![sql_usecase.png](../../../static/img/sqldbchain_trace.png)\n", "\n", "**Improvements**\n", "\n", @@ -661,7 +661,7 @@ "\n", "* It finally executes the generated query using tool `sql_db_query`\n", "\n", - "![sql_usecase.png](/img/SQLDatabaseToolkit.png)" + "![sql_usecase.png](../../../static/img/SQLDatabaseToolkit.png)" ] }, { diff --git a/docs/docs/use_cases/question_answering/code_understanding.ipynb b/docs/docs/use_cases/question_answering/code_understanding.ipynb index 9d1823abafab2..d848db2c9d57c 100644 --- a/docs/docs/use_cases/question_answering/code_understanding.ipynb +++ b/docs/docs/use_cases/question_answering/code_understanding.ipynb @@ -24,7 +24,7 @@ "- Using LLMs for suggesting refactors or improvements\n", "- Using LLMs for documenting the code\n", "\n", - "![Image description](/img/code_understanding.png)\n", + "![Image description](../../../static/img/code_understanding.png)\n", "\n", "## Overview\n", "\n", @@ -339,7 +339,7 @@ "* In particular, the code well structured and kept together in the retrieval output\n", "* The retrieved code and chat history are passed to the LLM for answer distillation\n", "\n", - "![Image description](/img/code_retrieval.png)" + "![Image description](../../../static/img/code_retrieval.png)" ] }, { diff --git a/docs/docs/use_cases/question_answering/index.ipynb b/docs/docs/use_cases/question_answering/index.ipynb index b1264e11952ad..8e5fbe2a2c14d 100644 --- a/docs/docs/use_cases/question_answering/index.ipynb +++ b/docs/docs/use_cases/question_answering/index.ipynb @@ -58,13 +58,13 @@ "2. **Split**: [Text splitters](/docs/modules/data_connection/document_transformers/) break large `Documents` into smaller chunks. This is useful both for indexing data and for passing it in to a model, since large chunks are harder to search over and won't in a model's finite context window.\n", "3. **Store**: We need somewhere to store and index our splits, so that they can later be searched over. This is often done using a [VectorStore](/docs/modules/data_connection/vectorstores/) and [Embeddings](/docs/modules/data_connection/text_embedding/) model.\n", "\n", - "![index_diagram](/img/rag_indexing.png)\n", + "![index_diagram](../../../static/img/rag_indexing.png)\n", "\n", "#### Retrieval and generation\n", "4. **Retrieve**: Given a user input, relevant splits are retrieved from storage using a [Retriever](/docs/modules/data_connection/retrievers/).\n", "5. **Generate**: A [ChatModel](/docs/modules/model_io/chat_models) / [LLM](/docs/modules/model_io/llms/) produces an answer using a prompt that includes the question and the retrieved data\n", "\n", - "![retrieval_diagram](/img/rag_retrieval_generation.png)" + "![retrieval_diagram](../../../static/img/rag_retrieval_generation.png)" ] }, { diff --git a/docs/docs/use_cases/summarization.ipynb b/docs/docs/use_cases/summarization.ipynb index 72583aaf6bde7..d025bdd612415 100644 --- a/docs/docs/use_cases/summarization.ipynb +++ b/docs/docs/use_cases/summarization.ipynb @@ -32,7 +32,7 @@ "id": "8e233997", "metadata": {}, "source": [ - "![Image description](/img/summarization_use_case_1.png)" + "![Image description](../../static/img/summarization_use_case_1.png)" ] }, { @@ -56,7 +56,7 @@ "id": "08ec66bc", "metadata": {}, "source": [ - "![Image description](/img/summarization_use_case_2.png)" + "![Image description](../../static/img/summarization_use_case_2.png)" ] }, { @@ -514,7 +514,7 @@ "* The blog post and associated [repo](https://github.com/mendableai/QA_clustering) also introduce clustering as a means of summarization.\n", "* This opens up a third path beyond the `stuff` or `map-reduce` approaches that is worth considering.\n", "\n", - "![Image description](/img/summarization_use_case_3.png)" + "![Image description](../../static/img/summarization_use_case_3.png)" ] }, { diff --git a/docs/docs/use_cases/tagging.ipynb b/docs/docs/use_cases/tagging.ipynb index 6d820e331b661..248b33b9f0b62 100644 --- a/docs/docs/use_cases/tagging.ipynb +++ b/docs/docs/use_cases/tagging.ipynb @@ -28,7 +28,7 @@ "- covered topics\n", "- political tendency\n", "\n", - "![Image description](/img/tagging.png)\n", + "![Image description](../../static/img/tagging.png)\n", "\n", "## Overview\n", "\n", @@ -293,7 +293,7 @@ "* As with [extraction](/docs/use_cases/extraction), we call the `information_extraction` function [here](https://github.com/langchain-ai/langchain/blob/269f85b7b7ffd74b38cd422d4164fc033388c3d0/libs/langchain/langchain/chains/openai_functions/extraction.py#L20) on the input string.\n", "* This OpenAI function extraction information based upon the provided schema.\n", "\n", - "![Image description](/img/tagging_trace.png)" + "![Image description](../../static/img/tagging_trace.png)" ] }, { diff --git a/docs/docs/use_cases/web_scraping.ipynb b/docs/docs/use_cases/web_scraping.ipynb index 62d64d1b33325..c536e5136f30c 100644 --- a/docs/docs/use_cases/web_scraping.ipynb +++ b/docs/docs/use_cases/web_scraping.ipynb @@ -25,7 +25,7 @@ "* Users have [highlighted it](https://twitter.com/GregKamradt/status/1679913813297225729?s=20) as one of his top desired AI tools. \n", "* OSS repos like [gpt-researcher](https://github.com/assafelovic/gpt-researcher) are growing in popularity. \n", " \n", - "![Image description](/img/web_scraping.png)\n", + "![Image description](../../static/img/web_scraping.png)\n", " \n", "## Overview\n", "\n", @@ -443,7 +443,7 @@ "source": [ "We can compare the headlines scraped to the page:\n", "\n", - "![Image description](/img/wsj_page.png)\n", + "![Image description](../../static/img/wsj_page.png)\n", "\n", "Looking at the [LangSmith trace](https://smith.langchain.com/public/c3070198-5b13-419b-87bf-3821cdf34fa6/r), we can see what is going on under the hood:\n", "\n", @@ -463,7 +463,7 @@ "\n", "We can automate the process of [web research](https://blog.langchain.dev/automating-web-research/) using a retriever, such as the `WebResearchRetriever` ([docs](https://python.langchain.com/docs/modules/data_connection/retrievers/web_research)).\n", "\n", - "![Image description](/img/web_research.png)\n", + "![Image description](../../static/img/web_research.png)\n", "\n", "Copy requirements [from here](https://github.com/langchain-ai/web-explorer/blob/main/requirements.txt):\n", "\n", diff --git a/docs/vercel_build.sh b/docs/vercel_build.sh index 3793a77123ce1..145a7e9acbc62 100755 --- a/docs/vercel_build.sh +++ b/docs/vercel_build.sh @@ -1,57 +1,24 @@ #!/bin/bash -version_compare() { - local v1=(${1//./ }) - local v2=(${2//./ }) - for i in {0..2}; do - if (( ${v1[i]} < ${v2[i]} )); then - return 1 - fi - done - return 0 -} +yum -y update +yum install gcc bzip2-devel libffi-devel zlib-devel wget tar gzip -y +amazon-linux-extras install python3.8 -y -openssl_version=$(openssl version | awk '{print $2}') -required_openssl_version="1.1.1" +# install quarto +wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.3.450/quarto-1.3.450-linux-amd64.tar.gz +tar -xzf quarto-1.3.450-linux-amd64.tar.gz +export PATH=$PATH:$(pwd)/quarto-1.3.450/bin/ -python_version=$(python3 --version 2>&1 | awk '{print $2}') -required_python_version="3.10" -echo "OpenSSL Version" -echo $openssl_version -echo "Python Version" -echo $python_version -# If openssl version is less than 1.1.1 AND python version is less than 3.10 -if ! version_compare $openssl_version $required_openssl_version && ! version_compare $python_version $required_python_version; then -### See: https://github.com/urllib3/urllib3/issues/2168 -# Requests lib breaks for old SSL versions, -# which are defaults on Amazon Linux 2 (which Vercel uses for builds) - yum -y update - yum remove openssl-devel -y - yum install gcc bzip2-devel libffi-devel zlib-devel wget tar -y - yum install openssl11 -y - yum install openssl11-devel -y - - wget https://www.python.org/ftp/python/3.11.4/Python-3.11.4.tgz - tar xzf Python-3.11.4.tgz - cd Python-3.11.4 - ./configure - make altinstall - echo "Python Version" - python3.11 --version - cd .. -fi - -python3.11 -m venv .venv +python3.8 -m venv .venv source .venv/bin/activate -python3.11 -m pip install --upgrade pip -python3.11 -m pip install -r vercel_requirements.txt -python3.11 scripts/model_feat_table.py +python3.8 -m pip install --upgrade pip +python3.8 -m pip install -r vercel_requirements.txt +python3.8 scripts/model_feat_table.py mkdir docs/templates cp ../templates/docs/INDEX.md docs/templates/index.md -python3.11 scripts/copy_templates.py +python3.8 scripts/copy_templates.py cp ../cookbook/README.md src/pages/cookbook.mdx cp ../.github/CONTRIBUTING.md docs/contributing.md -wget https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O docs/langserve.md -nbdoc_build --srcdir docs --pause 0 -python3.11 scripts/generate_api_reference_links.py +wget -q https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O docs/langserve.md +quarto render docs/ diff --git a/docs/vercel_requirements.txt b/docs/vercel_requirements.txt index 6b44d4069b798..bffe000f94e35 100644 --- a/docs/vercel_requirements.txt +++ b/docs/vercel_requirements.txt @@ -1,3 +1,3 @@ -e ../libs/langchain -e ../libs/core -nbdoc \ No newline at end of file +urllib3==1.26.18 From e09b876863cc6663b5dd58ede34693b3b6f4a89a Mon Sep 17 00:00:00 2001 From: ealt Date: Mon, 4 Dec 2023 20:55:37 +0000 Subject: [PATCH 16/49] Fixes error loading Obsidian templates (#13888) - **Description:** Obsidian templates can include [variables](https://help.obsidian.md/Plugins/Templates#Template+variables) using double curly braces. `ObsidianLoader` uses PyYaml to parse the frontmatter of documents. This parsing throws an error when encountering variables' curly braces. This is avoided by temporarily substituting safe strings before parsing. - **Issue:** #13887 - **Tag maintainer:** @hwchase17 --- .../langchain/document_loaders/obsidian.py | 36 +++++++++++++++++-- .../obsidian/template_var_frontmatter.md | 12 +++++++ .../document_loaders/test_obsidian.py | 22 ++++++++++-- 3 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 libs/langchain/tests/unit_tests/document_loaders/sample_documents/obsidian/template_var_frontmatter.md diff --git a/libs/langchain/langchain/document_loaders/obsidian.py b/libs/langchain/langchain/document_loaders/obsidian.py index 85a64860d11dd..7d6bf6e89e3e9 100644 --- a/libs/langchain/langchain/document_loaders/obsidian.py +++ b/libs/langchain/langchain/document_loaders/obsidian.py @@ -1,7 +1,8 @@ +import functools import logging import re from pathlib import Path -from typing import List +from typing import Any, Dict, List import yaml from langchain_core.documents import Document @@ -15,6 +16,7 @@ class ObsidianLoader(BaseLoader): """Load `Obsidian` files from directory.""" FRONT_MATTER_REGEX = re.compile(r"^---\n(.*?)\n---\n", re.DOTALL) + TEMPLATE_VARIABLE_REGEX = re.compile(r"{{(.*?)}}", re.DOTALL) TAG_REGEX = re.compile(r"[^\S\/]#([a-zA-Z_]+[-_/\w]*)") DATAVIEW_LINE_REGEX = re.compile(r"^\s*(\w+)::\s*(.*)$", re.MULTILINE) DATAVIEW_INLINE_BRACKET_REGEX = re.compile(r"\[(\w+)::\s*(.*)\]", re.MULTILINE) @@ -35,6 +37,27 @@ def __init__( self.encoding = encoding self.collect_metadata = collect_metadata + def _replace_template_var( + self, placeholders: Dict[str, str], match: re.Match + ) -> str: + """Replace a template variable with a placeholder.""" + placeholder = f"__TEMPLATE_VAR_{len(placeholders)}__" + placeholders[placeholder] = match.group(1) + return placeholder + + def _restore_template_vars(self, obj: Any, placeholders: Dict[str, str]) -> Any: + """Restore template variables replaced with placeholders to original values.""" + if isinstance(obj, str): + for placeholder, value in placeholders.items(): + obj = obj.replace(placeholder, f"{{{{{value}}}}}") + elif isinstance(obj, dict): + for key, value in obj.items(): + obj[key] = self._restore_template_vars(value, placeholders) + elif isinstance(obj, list): + for i, item in enumerate(obj): + obj[i] = self._restore_template_vars(item, placeholders) + return obj + def _parse_front_matter(self, content: str) -> dict: """Parse front matter metadata from the content and return it as a dict.""" if not self.collect_metadata: @@ -44,8 +67,17 @@ def _parse_front_matter(self, content: str) -> dict: if not match: return {} + placeholders: Dict[str, str] = {} + replace_template_var = functools.partial( + self._replace_template_var, placeholders + ) + front_matter_text = self.TEMPLATE_VARIABLE_REGEX.sub( + replace_template_var, match.group(1) + ) + try: - front_matter = yaml.safe_load(match.group(1)) + front_matter = yaml.safe_load(front_matter_text) + front_matter = self._restore_template_vars(front_matter, placeholders) # If tags are a string, split them into a list if "tags" in front_matter and isinstance(front_matter["tags"], str): diff --git a/libs/langchain/tests/unit_tests/document_loaders/sample_documents/obsidian/template_var_frontmatter.md b/libs/langchain/tests/unit_tests/document_loaders/sample_documents/obsidian/template_var_frontmatter.md new file mode 100644 index 0000000000000..7bab90737c31f --- /dev/null +++ b/libs/langchain/tests/unit_tests/document_loaders/sample_documents/obsidian/template_var_frontmatter.md @@ -0,0 +1,12 @@ +--- +aString: {{var}} +anArray: +- element +- {{varElement}} +aDict: + dictId1: 'val' + dictId2: '{{varVal}}' +tags: [ 'tag', '{{varTag}}' ] +--- + +Frontmatter contains template variables. diff --git a/libs/langchain/tests/unit_tests/document_loaders/test_obsidian.py b/libs/langchain/tests/unit_tests/document_loaders/test_obsidian.py index 50f29d849e17b..e25bf80199d82 100644 --- a/libs/langchain/tests/unit_tests/document_loaders/test_obsidian.py +++ b/libs/langchain/tests/unit_tests/document_loaders/test_obsidian.py @@ -17,7 +17,7 @@ def test_page_content_loaded() -> None: """Verify that all docs have page_content""" - assert len(docs) == 5 + assert len(docs) == 6 assert all(doc.page_content for doc in docs) @@ -27,7 +27,7 @@ def test_disable_collect_metadata() -> None: str(OBSIDIAN_EXAMPLE_PATH), collect_metadata=False ) docs_wo = loader_without_metadata.load() - assert len(docs_wo) == 5 + assert len(docs_wo) == 6 assert all(doc.page_content for doc in docs_wo) assert all(set(doc.metadata) == STANDARD_METADATA_FIELDS for doc in docs_wo) @@ -45,6 +45,24 @@ def test_metadata_with_frontmatter() -> None: assert set(doc.metadata["tags"].split(",")) == {"journal/entry", "obsidian"} +def test_metadata_with_template_vars_in_frontmatter() -> None: + """Verify frontmatter fields with template variables are loaded.""" + doc = next( + doc for doc in docs if doc.metadata["source"] == "template_var_frontmatter.md" + ) + FRONTMATTER_FIELDS = { + "aString", + "anArray", + "aDict", + "tags", + } + assert set(doc.metadata) == FRONTMATTER_FIELDS | STANDARD_METADATA_FIELDS + assert doc.metadata["aString"] == "{{var}}" + assert doc.metadata["anArray"] == "['element', '{{varElement}}']" + assert doc.metadata["aDict"] == "{'dictId1': 'val', 'dictId2': '{{varVal}}'}" + assert set(doc.metadata["tags"].split(",")) == {"tag", "{{varTag}}"} + + def test_metadata_with_bad_frontmatter() -> None: """Verify a doc with non-yaml frontmatter.""" doc = next(doc for doc in docs if doc.metadata["source"] == "bad_frontmatter.md") From 5fea63327b8f27ca601cbe015c9cf3fcc4f5a1ce Mon Sep 17 00:00:00 2001 From: nceccarelli <41022718+nceccarelli@users.noreply.github.com> Date: Mon, 4 Dec 2023 15:56:35 -0500 Subject: [PATCH 17/49] Support Azure gov cloud in Azure Cognitive Search retriever (#13695) --------- Co-authored-by: Nicholas Ceccarelli Co-authored-by: Harrison Chase --- .../langchain/retrievers/azure_cognitive_search.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/retrievers/azure_cognitive_search.py b/libs/langchain/langchain/retrievers/azure_cognitive_search.py index 27cdd91815715..8824d986cc872 100644 --- a/libs/langchain/langchain/retrievers/azure_cognitive_search.py +++ b/libs/langchain/langchain/retrievers/azure_cognitive_search.py @@ -13,7 +13,10 @@ AsyncCallbackManagerForRetrieverRun, CallbackManagerForRetrieverRun, ) -from langchain.utils import get_from_dict_or_env +from langchain.utils import get_from_dict_or_env, get_from_env + +DEFAULT_URL_SUFFIX = "search.windows.net" +"""Default URL Suffix for endpoint connection - commercial cloud""" class AzureCognitiveSearchRetriever(BaseRetriever): @@ -54,7 +57,10 @@ def validate_environment(cls, values: Dict) -> Dict: return values def _build_search_url(self, query: str) -> str: - base_url = f"https://{self.service_name}.search.windows.net/" + url_suffix = get_from_env( + "", "AZURE_COGNITIVE_SEARCH_URL_SUFFIX", DEFAULT_URL_SUFFIX + ) + base_url = f"https://{self.service_name}.{url_suffix}/" endpoint_path = f"indexes/{self.index_name}/docs?api-version={self.api_version}" top_param = f"&$top={self.top_k}" if self.top_k else "" return base_url + endpoint_path + f"&search={query}" + top_param From 411aa9a41e5279e912589f33a143629be15f45e7 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 4 Dec 2023 13:43:11 -0800 Subject: [PATCH 18/49] Harrison/nasa tool (#14245) Co-authored-by: Jacob Matias <88005863+matiasjacob25@users.noreply.github.com> Co-authored-by: Karam Daid Co-authored-by: Jumana Co-authored-by: KaramDaid <38271127+KaramDaid@users.noreply.github.com> Co-authored-by: Anna Chester <74325334+CodeMakesMeSmile@users.noreply.github.com> Co-authored-by: Jumana <144748640+jfanous@users.noreply.github.com> --- docs/docs/integrations/toolkits/nasa.ipynb | 108 ++++++++++++++++++ .../agents/agent_toolkits/__init__.py | 2 + .../agents/agent_toolkits/nasa/__init__.py | 1 + .../agents/agent_toolkits/nasa/toolkit.py | 57 +++++++++ libs/langchain/langchain/tools/__init__.py | 9 ++ .../langchain/tools/nasa/__init__.py | 0 libs/langchain/langchain/tools/nasa/prompt.py | 82 +++++++++++++ libs/langchain/langchain/tools/nasa/tool.py | 28 +++++ .../langchain/langchain/utilities/__init__.py | 9 ++ libs/langchain/langchain/utilities/nasa.py | 52 +++++++++ .../integration_tests/utilities/test_nasa.py | 32 ++++++ .../tests/unit_tests/tools/test_imports.py | 1 + .../tests/unit_tests/tools/test_public_api.py | 1 + .../unit_tests/utilities/test_imports.py | 1 + 14 files changed, 383 insertions(+) create mode 100644 docs/docs/integrations/toolkits/nasa.ipynb create mode 100644 libs/langchain/langchain/agents/agent_toolkits/nasa/__init__.py create mode 100644 libs/langchain/langchain/agents/agent_toolkits/nasa/toolkit.py create mode 100644 libs/langchain/langchain/tools/nasa/__init__.py create mode 100644 libs/langchain/langchain/tools/nasa/prompt.py create mode 100644 libs/langchain/langchain/tools/nasa/tool.py create mode 100644 libs/langchain/langchain/utilities/nasa.py create mode 100644 libs/langchain/tests/integration_tests/utilities/test_nasa.py diff --git a/docs/docs/integrations/toolkits/nasa.ipynb b/docs/docs/integrations/toolkits/nasa.ipynb new file mode 100644 index 0000000000000..09580d2396e36 --- /dev/null +++ b/docs/docs/integrations/toolkits/nasa.ipynb @@ -0,0 +1,108 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e6fd05db-21c2-4227-9900-0840bc62cb31", + "metadata": {}, + "source": [ + "# NASA\n", + "\n", + "This notebook shows how to use agents to interact with the NASA toolkit. The toolkit provides access to the NASA Image and Video Library API, with potential to expand and include other accessible NASA APIs in future iterations.\n", + "\n", + "**Note: NASA Image and Video Library search queries can result in large responses when the number of desired media results is not specified. Consider this prior to using the agent with LLM token credits.**" + ] + }, + { + "cell_type": "markdown", + "id": "7d93e6bd-03d7-4d3c-b915-8b73164e2ad8", + "metadata": {}, + "source": [ + "## Example Use:\n", + "---\n", + "### Initializing the agent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "648a2cb2-308e-4b2e-9b73-37109be4e258", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.agents import AgentType, initialize_agent\n", + "from langchain.agents.agent_toolkits.nasa.toolkit import NasaToolkit\n", + "from langchain.llms import OpenAI\n", + "from langchain.utilities.nasa import NasaAPIWrapper\n", + "\n", + "llm = OpenAI(temperature=0, openai_api_key=\"\")\n", + "nasa = NasaAPIWrapper()\n", + "toolkit = NasaToolkit.from_nasa_api_wrapper(nasa)\n", + "agent = initialize_agent(\n", + " toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "71f05fc9-d80d-4614-b9a3-e0a5e43cbbbb", + "metadata": {}, + "source": [ + "### Querying media assets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b97409f3-dc87-425d-b555-406cf8466a28", + "metadata": {}, + "outputs": [], + "source": [ + "agent.run(\n", + " \"Can you find three pictures of the moon published between the years 2014 and 2020?\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a86ce5ff-de45-4206-86ca-07ae03f36bdf", + "metadata": {}, + "source": [ + "### Querying details about media assets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80e86b49-749e-4026-b025-db32ed0bcc7e", + "metadata": {}, + "outputs": [], + "source": [ + "output = agent.run(\n", + " \"I've just queried an image of the moon with the NASA id NHQ_2019_0311_Go Forward to the Moon.\"\n", + " \" Where can I find the metadata manifest for this asset?\"\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/agents/agent_toolkits/__init__.py b/libs/langchain/langchain/agents/agent_toolkits/__init__.py index b40975cc73299..062ab1f317633 100644 --- a/libs/langchain/langchain/agents/agent_toolkits/__init__.py +++ b/libs/langchain/langchain/agents/agent_toolkits/__init__.py @@ -34,6 +34,7 @@ from langchain.agents.agent_toolkits.json.base import create_json_agent from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit from langchain.agents.agent_toolkits.multion.toolkit import MultionToolkit +from langchain.agents.agent_toolkits.nasa.toolkit import NasaToolkit from langchain.agents.agent_toolkits.nla.toolkit import NLAToolkit from langchain.agents.agent_toolkits.office365.toolkit import O365Toolkit from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent @@ -93,6 +94,7 @@ def __getattr__(name: str) -> Any: "JiraToolkit", "JsonToolkit", "MultionToolkit", + "NasaToolkit", "NLAToolkit", "O365Toolkit", "OpenAPIToolkit", diff --git a/libs/langchain/langchain/agents/agent_toolkits/nasa/__init__.py b/libs/langchain/langchain/agents/agent_toolkits/nasa/__init__.py new file mode 100644 index 0000000000000..a13c3ec706c6d --- /dev/null +++ b/libs/langchain/langchain/agents/agent_toolkits/nasa/__init__.py @@ -0,0 +1 @@ +"""NASA Toolkit""" diff --git a/libs/langchain/langchain/agents/agent_toolkits/nasa/toolkit.py b/libs/langchain/langchain/agents/agent_toolkits/nasa/toolkit.py new file mode 100644 index 0000000000000..0410507e0f7f5 --- /dev/null +++ b/libs/langchain/langchain/agents/agent_toolkits/nasa/toolkit.py @@ -0,0 +1,57 @@ +from typing import Dict, List + +from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.tools import BaseTool +from langchain.tools.nasa.prompt import ( + NASA_CAPTIONS_PROMPT, + NASA_MANIFEST_PROMPT, + NASA_METADATA_PROMPT, + NASA_SEARCH_PROMPT, +) +from langchain.tools.nasa.tool import NasaAction +from langchain.utilities.nasa import NasaAPIWrapper + + +class NasaToolkit(BaseToolkit): + """Nasa Toolkit.""" + + tools: List[BaseTool] = [] + + @classmethod + def from_nasa_api_wrapper(cls, nasa_api_wrapper: NasaAPIWrapper) -> "NasaToolkit": + operations: List[Dict] = [ + { + "mode": "search_media", + "name": "Search NASA Image and Video Library media", + "description": NASA_SEARCH_PROMPT, + }, + { + "mode": "get_media_metadata_manifest", + "name": "Get NASA Image and Video Library media metadata manifest", + "description": NASA_MANIFEST_PROMPT, + }, + { + "mode": "get_media_metadata_location", + "name": "Get NASA Image and Video Library media metadata location", + "description": NASA_METADATA_PROMPT, + }, + { + "mode": "get_video_captions_location", + "name": "Get NASA Image and Video Library video captions location", + "description": NASA_CAPTIONS_PROMPT, + }, + ] + tools = [ + NasaAction( + name=action["name"], + description=action["description"], + mode=action["mode"], + api_wrapper=nasa_api_wrapper, + ) + for action in operations + ] + return cls(tools=tools) + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return self.tools diff --git a/libs/langchain/langchain/tools/__init__.py b/libs/langchain/langchain/tools/__init__.py index 7422c13662095..cc8c37ee6a632 100644 --- a/libs/langchain/langchain/tools/__init__.py +++ b/libs/langchain/langchain/tools/__init__.py @@ -338,6 +338,12 @@ def _import_metaphor_search() -> Any: return MetaphorSearchResults +def _import_nasa_tool() -> Any: + from langchain.tools.nasa.tool import NasaAction + + return NasaAction + + def _import_office365_create_draft_message() -> Any: from langchain.tools.office365.create_draft_message import O365CreateDraftMessage @@ -831,6 +837,8 @@ def __getattr__(name: str) -> Any: return _import_merriam_webster_tool() elif name == "MetaphorSearchResults": return _import_metaphor_search() + elif name == "NasaAction": + return _import_nasa_tool() elif name == "O365CreateDraftMessage": return _import_office365_create_draft_message() elif name == "O365SearchEvents": @@ -1030,6 +1038,7 @@ def __getattr__(name: str) -> Any: "MerriamWebsterQueryRun", "MetaphorSearchResults", "MoveFileTool", + "NasaAction", "NavigateBackTool", "NavigateTool", "O365CreateDraftMessage", diff --git a/libs/langchain/langchain/tools/nasa/__init__.py b/libs/langchain/langchain/tools/nasa/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/langchain/langchain/tools/nasa/prompt.py b/libs/langchain/langchain/tools/nasa/prompt.py new file mode 100644 index 0000000000000..4c7a3846a7e83 --- /dev/null +++ b/libs/langchain/langchain/tools/nasa/prompt.py @@ -0,0 +1,82 @@ +# flake8: noqa +NASA_SEARCH_PROMPT = """ + This tool is a wrapper around NASA's search API, useful when you need to search through NASA's Image and Video Library. + The input to this tool is a query specified by the user, and will be passed into NASA's `search` function. + + At least one parameter must be provided. + + There are optional parameters that can be passed by the user based on their query + specifications. Each item in this list contains pound sign (#) separated values, the first value is the parameter name, + the second value is the datatype and the third value is the description: {{ + + - q#string#Free text search terms to compare to all indexed metadata. + - center#string#NASA center which published the media. + - description#string#Terms to search for in “Description” fields. + - description_508#string#Terms to search for in “508 Description” fields. + - keywords #string#Terms to search for in “Keywords” fields. Separate multiple values with commas. + - location #string#Terms to search for in “Location” fields. + - media_type#string#Media types to restrict the search to. Available types: [“image”,“video”, “audio”]. Separate multiple values with commas. + - nasa_id #string#The media asset’s NASA ID. + - page#integer#Page number, starting at 1, of results to get.- + - page_size#integer#Number of results per page. Default: 100. + - photographer#string#The primary photographer’s name. + - secondary_creator#string#A secondary photographer/videographer’s name. + - title #string#Terms to search for in “Title” fields. + - year_start#string#The start year for results. Format: YYYY. + - year_end #string#The end year for results. Format: YYYY. + + }} + + Below are several task descriptions along with their respective input examples. + Task: get the 2nd page of image and video content starting from the year 2002 to 2010 + Example Input: {{"year_start": "2002", "year_end": "2010", "page": 2}} + + Task: get the image and video content of saturn photographed by John Appleseed + Example Input: {{"q": "saturn", "photographer": "John Appleseed"}} + + Task: search for Meteor Showers with description "Search Description" with media type image + Example Input: {{"q": "Meteor Shower", "description": "Search Description", "media_type": "image"}} + + Task: get the image and video content from year 2008 to 2010 from Kennedy Center + Example Input: {{"year_start": "2002", "year_end": "2010", "location": "Kennedy Center}} + """ + + +NASA_MANIFEST_PROMPT = """ + This tool is a wrapper around NASA's media asset manifest API, useful when you need to retrieve a media + asset's manifest. The input to this tool should include a string representing a NASA ID for a media asset that the user is trying to get the media asset manifest data for. The NASA ID will be passed as a string into NASA's `get_media_metadata_manifest` function. + + The following list are some examples of NASA IDs for a media asset that you can use to better extract the NASA ID from the input string to the tool. + - GSFC_20171102_Archive_e000579 + - Launch-Sound_Delta-PAM-Random-Commentary + - iss066m260341519_Expedition_66_Education_Inflight_with_Random_Lake_School_District_220203 + - 6973610 + - GRC-2020-CM-0167.4 + - Expedition_55_Inflight_Japan_VIP_Event_May_31_2018_659970 + - NASA 60th_SEAL_SLIVER_150DPI +""" + +NASA_METADATA_PROMPT = """ + This tool is a wrapper around NASA's media asset metadata location API, useful when you need to retrieve the media asset's metadata. The input to this tool should include a string representing a NASA ID for a media asset that the user is trying to get the media asset metadata location for. The NASA ID will be passed as a string into NASA's `get_media_metadata_manifest` function. + + The following list are some examples of NASA IDs for a media asset that you can use to better extract the NASA ID from the input string to the tool. + - GSFC_20171102_Archive_e000579 + - Launch-Sound_Delta-PAM-Random-Commentary + - iss066m260341519_Expedition_66_Education_Inflight_with_Random_Lake_School_District_220203 + - 6973610 + - GRC-2020-CM-0167.4 + - Expedition_55_Inflight_Japan_VIP_Event_May_31_2018_659970 + - NASA 60th_SEAL_SLIVER_150DPI +""" + +NASA_CAPTIONS_PROMPT = """ + This tool is a wrapper around NASA's video assests caption location API, useful when you need + to retrieve the location of the captions of a specific video. The input to this tool should include a string representing a NASA ID for a video media asset that the user is trying to get the get the location of the captions for. The NASA ID will be passed as a string into NASA's `get_media_metadata_manifest` function. + + The following list are some examples of NASA IDs for a video asset that you can use to better extract the NASA ID from the input string to the tool. + - 2017-08-09 - Video File RS-25 Engine Test + - 20180415-TESS_Social_Briefing + - 201_TakingWildOutOfWildfire + - 2022-H1_V_EuropaClipper-4 + - 2022_0429_Recientemente +""" diff --git a/libs/langchain/langchain/tools/nasa/tool.py b/libs/langchain/langchain/tools/nasa/tool.py new file mode 100644 index 0000000000000..058944858cbeb --- /dev/null +++ b/libs/langchain/langchain/tools/nasa/tool.py @@ -0,0 +1,28 @@ +""" +This tool allows agents to interact with the NASA API, specifically +the the NASA Image & Video Library and Exoplanet +""" + +from typing import Optional + +from langchain.callbacks.manager import CallbackManagerForToolRun +from langchain.pydantic_v1 import Field +from langchain.tools.base import BaseTool +from langchain.utilities.nasa import NasaAPIWrapper + + +class NasaAction(BaseTool): + """Tool that queries the Atlassian Jira API.""" + + api_wrapper: NasaAPIWrapper = Field(default_factory=NasaAPIWrapper) + mode: str + name: str = "" + description: str = "" + + def _run( + self, + instructions: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the NASA API to run an operation.""" + return self.api_wrapper.run(self.mode, instructions) diff --git a/libs/langchain/langchain/utilities/__init__.py b/libs/langchain/langchain/utilities/__init__.py index 8c8242e3a6a9c..986f99de295bf 100644 --- a/libs/langchain/langchain/utilities/__init__.py +++ b/libs/langchain/langchain/utilities/__init__.py @@ -260,6 +260,12 @@ def _import_zapier() -> Any: return ZapierNLAWrapper +def _import_nasa() -> Any: + from langchain.utilities.nasa import NasaAPIWrapper + + return NasaAPIWrapper + + def __getattr__(name: str) -> Any: if name == "AlphaVantageAPIWrapper": return _import_alpha_vantage() @@ -307,6 +313,8 @@ def __getattr__(name: str) -> Any: return _import_merriam_webster() elif name == "MetaphorSearchAPIWrapper": return _import_metaphor_search() + elif name == "NasaAPIWrapper": + return _import_nasa() elif name == "OpenWeatherMapAPIWrapper": return _import_openweathermap() elif name == "OutlineAPIWrapper": @@ -373,6 +381,7 @@ def __getattr__(name: str) -> Any: "MaxComputeAPIWrapper", "MerriamWebsterAPIWrapper", "MetaphorSearchAPIWrapper", + "NasaAPIWrapper", "OpenWeatherMapAPIWrapper", "OutlineAPIWrapper", "Portkey", diff --git a/libs/langchain/langchain/utilities/nasa.py b/libs/langchain/langchain/utilities/nasa.py new file mode 100644 index 0000000000000..609a854f17db9 --- /dev/null +++ b/libs/langchain/langchain/utilities/nasa.py @@ -0,0 +1,52 @@ +"""Util that calls several NASA APIs.""" +import json + +import requests + +from langchain.pydantic_v1 import BaseModel + +IMAGE_AND_VIDEO_LIBRARY_URL = "https://images-api.nasa.gov" + + +class NasaAPIWrapper(BaseModel): + def get_media(self, query: str) -> str: + params = json.loads(query) + if params.get("q"): + queryText = params["q"] + params.pop("q") + else: + queryText = "" + response = requests.get( + IMAGE_AND_VIDEO_LIBRARY_URL + "/search?q=" + queryText, params=params + ) + data = response.json() + return data + + def get_media_metadata_manifest(self, query: str) -> str: + response = requests.get(IMAGE_AND_VIDEO_LIBRARY_URL + "/asset/" + query) + return response.json() + + def get_media_metadata_location(self, query: str) -> str: + response = requests.get(IMAGE_AND_VIDEO_LIBRARY_URL + "/metadata/" + query) + return response.json() + + def get_video_captions_location(self, query: str) -> str: + response = requests.get(IMAGE_AND_VIDEO_LIBRARY_URL + "/captions/" + query) + return response.json() + + def run(self, mode: str, query: str) -> str: + if mode == "search_media": + output = self.get_media(query) + elif mode == "get_media_metadata_manifest": + output = self.get_media_metadata_manifest(query) + elif mode == "get_media_metadata_location": + output = self.get_media_metadata_location(query) + elif mode == "get_video_captions_location": + output = self.get_video_captions_location(query) + else: + output = f"ModeError: Got unexpected mode {mode}." + + try: + return json.dumps(output) + except Exception: + return str(output) diff --git a/libs/langchain/tests/integration_tests/utilities/test_nasa.py b/libs/langchain/tests/integration_tests/utilities/test_nasa.py new file mode 100644 index 0000000000000..c605626afd865 --- /dev/null +++ b/libs/langchain/tests/integration_tests/utilities/test_nasa.py @@ -0,0 +1,32 @@ +"""Integration test for NASA API Wrapper.""" +from langchain.utilities.nasa import NasaAPIWrapper + + +def test_media_search() -> None: + """Test for NASA Image and Video Library media search""" + nasa = NasaAPIWrapper() + query = '{"q": "saturn", + "year_start": "2002", "year_end": "2010", "page": 2}' + output = nasa.run("search_media", query) + assert output is not None + assert "collection" in output + + +def test_get_media_metadata_manifest() -> None: + """Test for retrieving media metadata manifest from NASA Image and Video Library""" + nasa = NasaAPIWrapper() + output = nasa.run("get_media_metadata_manifest", "2022_0707_Recientemente") + assert output is not None + + +def test_get_media_metadata_location() -> None: + """Test for retrieving media metadata location from NASA Image and Video Library""" + nasa = NasaAPIWrapper() + output = nasa.run("get_media_metadata_location", "as11-40-5874") + assert output is not None + + +def test_get_video_captions_location() -> None: + """Test for retrieving video captions location from NASA Image and Video Library""" + nasa = NasaAPIWrapper() + output = nasa.run("get_video_captions_location", "172_ISS-Slosh.sr") + assert output is not None diff --git a/libs/langchain/tests/unit_tests/tools/test_imports.py b/libs/langchain/tests/unit_tests/tools/test_imports.py index fb30a00393c0c..dbd9bd48555ff 100644 --- a/libs/langchain/tests/unit_tests/tools/test_imports.py +++ b/libs/langchain/tests/unit_tests/tools/test_imports.py @@ -68,6 +68,7 @@ "ListSparkSQLTool", "MetaphorSearchResults", "MoveFileTool", + "NasaAction", "NavigateBackTool", "NavigateTool", "O365CreateDraftMessage", diff --git a/libs/langchain/tests/unit_tests/tools/test_public_api.py b/libs/langchain/tests/unit_tests/tools/test_public_api.py index b0b3844c96489..4d7cf8f4d0a86 100644 --- a/libs/langchain/tests/unit_tests/tools/test_public_api.py +++ b/libs/langchain/tests/unit_tests/tools/test_public_api.py @@ -70,6 +70,7 @@ "MerriamWebsterQueryRun", "MetaphorSearchResults", "MoveFileTool", + "NasaAction", "NavigateBackTool", "NavigateTool", "O365CreateDraftMessage", diff --git a/libs/langchain/tests/unit_tests/utilities/test_imports.py b/libs/langchain/tests/unit_tests/utilities/test_imports.py index 9650889869f55..49fddcff65522 100644 --- a/libs/langchain/tests/unit_tests/utilities/test_imports.py +++ b/libs/langchain/tests/unit_tests/utilities/test_imports.py @@ -23,6 +23,7 @@ "LambdaWrapper", "MaxComputeAPIWrapper", "MetaphorSearchAPIWrapper", + "NasaAPIWrapper", "OpenWeatherMapAPIWrapper", "OutlineAPIWrapper", "Portkey", From de86b84a70363fb327f7fd318b67bfa7bb7710db Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 4 Dec 2023 14:17:33 -0800 Subject: [PATCH 19/49] Prefer byte store interface for Upstash BaseStore to match other Redis (#14201) If we are not going to make the existing Docstore class also implement `BaseStore[str, Document]`, IMO all base store implementations should always be `[str, bytes]` so that they are more interchangeable. CC @rlancemartin @eyurtsev --- .../langchain/storage/upstash_redis.py | 57 ++++++++++++++++++- .../storage/test_upstash_redis.py | 18 +++--- 2 files changed, 65 insertions(+), 10 deletions(-) diff --git a/libs/langchain/langchain/storage/upstash_redis.py b/libs/langchain/langchain/storage/upstash_redis.py index 193b8c7c4b5c3..7dc436ce33e2c 100644 --- a/libs/langchain/langchain/storage/upstash_redis.py +++ b/libs/langchain/langchain/storage/upstash_redis.py @@ -1,9 +1,10 @@ from typing import Any, Iterator, List, Optional, Sequence, Tuple, cast +from langchain_core._api.deprecation import deprecated from langchain_core.stores import BaseStore -class UpstashRedisStore(BaseStore[str, str]): +class _UpstashRedisStore(BaseStore[str, str]): """BaseStore implementation using Upstash Redis as the underlying store.""" def __init__( @@ -117,3 +118,57 @@ def yield_keys(self, *, prefix: Optional[str] = None) -> Iterator[str]: yield relative_key else: yield key + + +@deprecated("0.0.335", alternative="UpstashRedisByteStore") +class UpstashRedisStore(_UpstashRedisStore): + """ + BaseStore implementation using Upstash Redis + as the underlying store to store strings. + + Deprecated in favor of the more generic UpstashRedisByteStore. + """ + + +class UpstashRedisByteStore(BaseStore[str, bytes]): + """ + BaseStore implementation using Upstash Redis + as the underlying store to store raw bytes. + """ + + def __init__( + self, + *, + client: Any = None, + url: Optional[str] = None, + token: Optional[str] = None, + ttl: Optional[int] = None, + namespace: Optional[str] = None, + ) -> None: + self.underlying_store = _UpstashRedisStore( + client=client, url=url, token=token, ttl=ttl, namespace=namespace + ) + + def mget(self, keys: Sequence[str]) -> List[Optional[bytes]]: + """Get the values associated with the given keys.""" + return [ + value.encode("utf-8") if value is not None else None + for value in self.underlying_store.mget(keys) + ] + + def mset(self, key_value_pairs: Sequence[Tuple[str, bytes]]) -> None: + """Set the given key-value pairs.""" + self.underlying_store.mset( + [ + (k, v.decode("utf-8")) if v is not None else None + for k, v in key_value_pairs + ] + ) + + def mdelete(self, keys: Sequence[str]) -> None: + """Delete the given keys.""" + self.underlying_store.mdelete(keys) + + def yield_keys(self, *, prefix: Optional[str] = None) -> Iterator[str]: + """Yield keys in the store.""" + yield from self.underlying_store.yield_keys(prefix=prefix) diff --git a/libs/langchain/tests/integration_tests/storage/test_upstash_redis.py b/libs/langchain/tests/integration_tests/storage/test_upstash_redis.py index 183e09515b6a4..01ab5831fe3ed 100644 --- a/libs/langchain/tests/integration_tests/storage/test_upstash_redis.py +++ b/libs/langchain/tests/integration_tests/storage/test_upstash_redis.py @@ -5,7 +5,7 @@ import pytest -from langchain.storage.upstash_redis import UpstashRedisStore +from langchain.storage.upstash_redis import UpstashRedisByteStore if TYPE_CHECKING: from upstash_redis import Redis @@ -34,16 +34,16 @@ def redis_client() -> Redis: def test_mget(redis_client: Redis) -> None: - store = UpstashRedisStore(client=redis_client, ttl=None) + store = UpstashRedisByteStore(client=redis_client, ttl=None) keys = ["key1", "key2"] redis_client.mset({"key1": "value1", "key2": "value2"}) result = store.mget(keys) - assert result == ["value1", "value2"] + assert result == [b"value1", b"value2"] def test_mset(redis_client: Redis) -> None: - store = UpstashRedisStore(client=redis_client, ttl=None) - key_value_pairs = [("key1", "value1"), ("key2", "value2")] + store = UpstashRedisByteStore(client=redis_client, ttl=None) + key_value_pairs = [("key1", b"value1"), ("key2", b"value2")] store.mset(key_value_pairs) result = redis_client.mget("key1", "key2") assert result == ["value1", "value2"] @@ -51,7 +51,7 @@ def test_mset(redis_client: Redis) -> None: def test_mdelete(redis_client: Redis) -> None: """Test that deletion works as expected.""" - store = UpstashRedisStore(client=redis_client, ttl=None) + store = UpstashRedisByteStore(client=redis_client, ttl=None) keys = ["key1", "key2"] redis_client.mset({"key1": "value1", "key2": "value2"}) store.mdelete(keys) @@ -60,7 +60,7 @@ def test_mdelete(redis_client: Redis) -> None: def test_yield_keys(redis_client: Redis) -> None: - store = UpstashRedisStore(client=redis_client, ttl=None) + store = UpstashRedisByteStore(client=redis_client, ttl=None) redis_client.mset({"key1": "value2", "key2": "value2"}) assert sorted(store.yield_keys()) == ["key1", "key2"] assert sorted(store.yield_keys(prefix="key*")) == ["key1", "key2"] @@ -68,8 +68,8 @@ def test_yield_keys(redis_client: Redis) -> None: def test_namespace(redis_client: Redis) -> None: - store = UpstashRedisStore(client=redis_client, ttl=None, namespace="meow") - key_value_pairs = [("key1", "value1"), ("key2", "value2")] + store = UpstashRedisByteStore(client=redis_client, ttl=None, namespace="meow") + key_value_pairs = [("key1", b"value1"), ("key2", b"value2")] store.mset(key_value_pairs) cursor, all_keys = redis_client.scan(0) From 67662564f3457e43a3615df140633c00c655942a Mon Sep 17 00:00:00 2001 From: Vincent Brouwers Date: Mon, 4 Dec 2023 23:18:30 +0100 Subject: [PATCH 20/49] langchain[patch]: Fix `config` arg detection for wrapped lambdarunnable (#14230) **Description:** When a RunnableLambda only receives a synchronous callback, this callback is wrapped into an async one since #13408. However, this wrapping with `(*args, **kwargs)` causes the `accepts_config` check at [/libs/core/langchain_core/runnables/config.py#L342](https://github.com/langchain-ai/langchain/blob/ee94ef55ee6ab064da08340817955f821dfa6261/libs/core/langchain_core/runnables/config.py#L342) to fail, as this checks for the presence of a "config" argument in the method signature. Adding a `functools.wraps` around it, resolves it. --- libs/core/langchain_core/runnables/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/core/langchain_core/runnables/base.py b/libs/core/langchain_core/runnables/base.py index 1943de8285541..d0d1d8d06f0f3 100644 --- a/libs/core/langchain_core/runnables/base.py +++ b/libs/core/langchain_core/runnables/base.py @@ -6,7 +6,7 @@ from abc import ABC, abstractmethod from concurrent.futures import FIRST_COMPLETED, wait from copy import deepcopy -from functools import partial +from functools import partial, wraps from itertools import tee from operator import itemgetter from typing import ( @@ -2518,6 +2518,7 @@ async def _ainvoke( afunc = self.afunc else: + @wraps(self.func) async def f(*args, **kwargs): # type: ignore[no-untyped-def] return await asyncio.get_running_loop().run_in_executor( None, partial(self.func, **kwargs), *args From a26c4a09302e971d54b133b540935a2fe99b2a24 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 4 Dec 2023 14:43:32 -0800 Subject: [PATCH 21/49] Allow base_store to be used directly with MultiVectorRetriever (#14202) Allow users to pass a generic `BaseStore[str, bytes]` to MultiVectorRetriever, removing the need to use the `create_kv_docstore` method. This encoding will now happen internally. @rlancemartin @eyurtsev --------- Co-authored-by: Eugene Yurtsev --- .../retrievers/multi_vector.ipynb | 78 +++++++++---------- .../langchain/retrievers/multi_vector.py | 33 ++++++-- 2 files changed, 63 insertions(+), 48 deletions(-) diff --git a/docs/docs/modules/data_connection/retrievers/multi_vector.ipynb b/docs/docs/modules/data_connection/retrievers/multi_vector.ipynb index 9d6aea1a13d10..58d888057a41b 100644 --- a/docs/docs/modules/data_connection/retrievers/multi_vector.ipynb +++ b/docs/docs/modules/data_connection/retrievers/multi_vector.ipynb @@ -88,7 +88,7 @@ "# The retriever (empty to start)\n", "retriever = MultiVectorRetriever(\n", " vectorstore=vectorstore,\n", - " docstore=store,\n", + " base_store=store,\n", " id_key=id_key,\n", ")\n", "import uuid\n", @@ -143,7 +143,7 @@ { "data": { "text/plain": [ - "Document(page_content='Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.', metadata={'doc_id': '455205f7-bb7d-4c36-b442-d1d6f9f701ed', 'source': '../../state_of_the_union.txt'})" + "Document(page_content='Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.', metadata={'doc_id': '59899493-92a0-41cb-b6ba-a854730ad74a', 'source': '../../state_of_the_union.txt'})" ] }, "execution_count": 8, @@ -188,7 +188,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 10, "id": "36739460-a737-4a8e-b70f-50bf8c8eaae7", "metadata": {}, "outputs": [ @@ -198,7 +198,7 @@ "9875" ] }, - "execution_count": 15, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -223,7 +223,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "1433dff4", "metadata": {}, "outputs": [], @@ -238,7 +238,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "35b30390", "metadata": {}, "outputs": [], @@ -253,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "41a2a738", "metadata": {}, "outputs": [], @@ -263,7 +263,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "7ac5e4b1", "metadata": {}, "outputs": [], @@ -276,7 +276,7 @@ "# The retriever (empty to start)\n", "retriever = MultiVectorRetriever(\n", " vectorstore=vectorstore,\n", - " docstore=store,\n", + " base_store=store,\n", " id_key=id_key,\n", ")\n", "doc_ids = [str(uuid.uuid4()) for _ in docs]" @@ -338,7 +338,7 @@ { "data": { "text/plain": [ - "Document(page_content=\"The document is a transcript of a speech given by the President of the United States. The President discusses several important issues and initiatives, including the nomination of a Supreme Court Justice, border security and immigration reform, protecting women's rights, advancing LGBTQ+ equality, bipartisan legislation, addressing the opioid epidemic and mental health, supporting veterans, investigating the health effects of burn pits on military personnel, ending cancer, and the strength and resilience of the American people.\", metadata={'doc_id': '79fa2e9f-28d9-4372-8af3-2caf4f1de312'})" + "Document(page_content=\"The document is a speech given by the President of the United States. The President discusses various important issues and goals for the country, including nominating a Supreme Court Justice, securing the border and fixing the immigration system, protecting women's rights, supporting veterans, addressing the opioid epidemic, improving mental health care, and ending cancer. The President emphasizes the unity and strength of the American people and expresses optimism for the future of the nation.\", metadata={'doc_id': '8fdf4009-628c-400d-949c-1d3f4daf1e66'})" ] }, "execution_count": 19, @@ -393,7 +393,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 22, "id": "5219b085", "metadata": {}, "outputs": [], @@ -418,7 +418,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 23, "id": "523deb92", "metadata": {}, "outputs": [], @@ -429,7 +429,7 @@ " {\"doc\": lambda x: x.page_content}\n", " # Only asking for 3 hypothetical questions, but this could be adjusted\n", " | ChatPromptTemplate.from_template(\n", - " \"Generate a list of 3 hypothetical questions that the below document could be used to answer:\\n\\n{doc}\"\n", + " \"Generate a list of exactly 3 hypothetical questions that the below document could be used to answer:\\n\\n{doc}\"\n", " )\n", " | ChatOpenAI(max_retries=0, model=\"gpt-4\").bind(\n", " functions=functions, function_call={\"name\": \"hypothetical_questions\"}\n", @@ -440,19 +440,19 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 24, "id": "11d30554", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[\"What was the author's initial impression of philosophy as a field of study, and how did it change when they got to college?\",\n", - " 'Why did the author decide to switch their focus to Artificial Intelligence (AI)?',\n", - " \"What led to the author's disillusionment with the field of AI as it was practiced at the time?\"]" + "[\"What were the author's initial areas of interest before college?\",\n", + " \"What was the author's experience with programming in his early years?\",\n", + " 'Why did the author switch his focus from AI to Lisp?']" ] }, - "execution_count": 33, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -463,7 +463,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 25, "id": "3eb2e48c", "metadata": {}, "outputs": [], @@ -473,7 +473,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 26, "id": "b2cd6e75", "metadata": {}, "outputs": [], @@ -488,7 +488,7 @@ "# The retriever (empty to start)\n", "retriever = MultiVectorRetriever(\n", " vectorstore=vectorstore,\n", - " docstore=store,\n", + " base_store=store,\n", " id_key=id_key,\n", ")\n", "doc_ids = [str(uuid.uuid4()) for _ in docs]" @@ -496,7 +496,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 27, "id": "18831b3b", "metadata": {}, "outputs": [], @@ -510,7 +510,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 28, "id": "224b24c5", "metadata": {}, "outputs": [], @@ -521,7 +521,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 29, "id": "7b442b90", "metadata": {}, "outputs": [], @@ -531,20 +531,20 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 30, "id": "089b5ad0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[Document(page_content=\"What is the President's stance on immigration reform?\", metadata={'doc_id': '505d73e3-8350-46ec-a58e-3af032f04ab3'}),\n", - " Document(page_content=\"What is the President's stance on immigration reform?\", metadata={'doc_id': '1c9618f0-7660-4b4f-a37c-509cbbbf6dba'}),\n", - " Document(page_content=\"What is the President's stance on immigration reform?\", metadata={'doc_id': '82c08209-b904-46a8-9532-edd2380950b7'}),\n", - " Document(page_content='What measures is the President proposing to protect the rights of LGBTQ+ Americans?', metadata={'doc_id': '82c08209-b904-46a8-9532-edd2380950b7'})]" + "[Document(page_content='What made Robert Morris advise the author to leave Y Combinator?', metadata={'doc_id': '740e484e-d67c-45f7-989d-9928aaf51c28'}),\n", + " Document(page_content=\"How did the author's mother's illness affect his decision to leave Y Combinator?\", metadata={'doc_id': '740e484e-d67c-45f7-989d-9928aaf51c28'}),\n", + " Document(page_content='What led the author to start publishing essays online?', metadata={'doc_id': '675ccee3-ce0b-4d5d-892c-b8942370babd'}),\n", + " Document(page_content='What measures are being taken to secure the border and fix the immigration system?', metadata={'doc_id': '2d51f010-969e-48a9-9e82-6b12bc7ab3d4'})]" ] }, - "execution_count": 71, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -555,7 +555,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 31, "id": "7594b24e", "metadata": {}, "outputs": [], @@ -565,17 +565,17 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 32, "id": "4c120c65", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "9194" + "9844" ] }, - "execution_count": 73, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -583,14 +583,6 @@ "source": [ "len(retrieved_docs[0].page_content)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "616cfeeb", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -609,7 +601,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.10.5" } }, "nbformat": 4, diff --git a/libs/langchain/langchain/retrievers/multi_vector.py b/libs/langchain/langchain/retrievers/multi_vector.py index f05e0859eb15b..dcc81b554c363 100644 --- a/libs/langchain/langchain/retrievers/multi_vector.py +++ b/libs/langchain/langchain/retrievers/multi_vector.py @@ -1,13 +1,13 @@ from enum import Enum -from typing import List +from typing import List, Optional from langchain_core.documents import Document -from langchain_core.pydantic_v1 import Field from langchain_core.retrievers import BaseRetriever from langchain_core.stores import BaseStore from langchain_core.vectorstores import VectorStore from langchain.callbacks.manager import CallbackManagerForRetrieverRun +from langchain.storage._lc_store import create_kv_docstore class SearchType(str, Enum): @@ -27,12 +27,35 @@ class MultiVectorRetriever(BaseRetriever): and their embedding vectors""" docstore: BaseStore[str, Document] """The storage layer for the parent documents""" - id_key: str = "doc_id" - search_kwargs: dict = Field(default_factory=dict) + id_key: str + search_kwargs: dict """Keyword arguments to pass to the search function.""" - search_type: SearchType = SearchType.similarity + search_type: SearchType """Type of search to perform (similarity / mmr)""" + def __init__( + self, + *, + vectorstore: VectorStore, + docstore: Optional[BaseStore[str, Document]] = None, + base_store: Optional[BaseStore[str, bytes]] = None, + id_key: str = "doc_id", + search_kwargs: Optional[dict] = None, + search_type: SearchType = SearchType.similarity, + ): + if base_store is not None: + docstore = create_kv_docstore(base_store) + elif docstore is None: + raise Exception("You must pass a `base_store` parameter.") + + super().__init__( + vectorstore=vectorstore, + docstore=docstore, + id_key=id_key, + search_kwargs=search_kwargs if search_kwargs is not None else {}, + search_type=search_type, + ) + def _get_relevant_documents( self, query: str, *, run_manager: CallbackManagerForRetrieverRun ) -> List[Document]: From 1750cc464d925f942d942db4a1e65465afd543a7 Mon Sep 17 00:00:00 2001 From: Leonid Ganeline Date: Mon, 4 Dec 2023 14:44:06 -0800 Subject: [PATCH 22/49] docs[patch]: moved `vectorstore` notebook file (#14181) The `/docs/integrations/toolkits/vectorstore` page is not the Integration page. The best place is in `/docs/modules/agents/how_to/` - Moved the file - Rerouted the page URL --- .../toolkits => modules/agents/how_to}/vectorstore.ipynb | 0 docs/vercel.json | 4 ++++ 2 files changed, 4 insertions(+) rename docs/docs/{integrations/toolkits => modules/agents/how_to}/vectorstore.ipynb (100%) diff --git a/docs/docs/integrations/toolkits/vectorstore.ipynb b/docs/docs/modules/agents/how_to/vectorstore.ipynb similarity index 100% rename from docs/docs/integrations/toolkits/vectorstore.ipynb rename to docs/docs/modules/agents/how_to/vectorstore.ipynb diff --git a/docs/vercel.json b/docs/vercel.json index a7dd36d288f45..ee9f02ad1ad4b 100644 --- a/docs/vercel.json +++ b/docs/vercel.json @@ -1056,6 +1056,10 @@ "source": "/en/latest/modules/agents/agents/custom_agent.html", "destination": "/docs/modules/agents/how_to/custom_agent" }, + { + "source": "/docs/integrations/toolkits/vectorstore", + "destination": "/docs/modules/agents/how_to/vectorstore" + }, { "source": "/en/latest/modules/agents/agents/custom_agent_with_tool_retrieval.html", "destination": "/docs/modules/agents/how_to/custom_agent_with_tool_retrieval" From aa8ae31e5b6fe45cfc970740957f228bf44cdced Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hynek=20Kydl=C3=AD=C4=8Dek?= Date: Tue, 5 Dec 2023 00:04:48 +0100 Subject: [PATCH 23/49] core[patch]: add response kwarg to on_llm_error # Dependencies None # Twitter handle @HKydlicek --------- Co-authored-by: Erick Friis --- libs/core/langchain_core/callbacks/base.py | 16 +++++++- libs/core/langchain_core/callbacks/manager.py | 9 +++++ .../language_models/chat_models.py | 28 ++++++++----- .../langchain_core/language_models/llms.py | 25 +++++++++--- libs/core/tests/unit_tests/fake/callbacks.py | 8 ++-- libs/core/tests/unit_tests/fake/chat_model.py | 16 +++++++- libs/core/tests/unit_tests/fake/llm.py | 18 ++++++++- .../language_models/chat_models/test_base.py | 37 +++++++++++++++++ .../language_models/llms/test_base.py | 40 ++++++++++++++++++- 9 files changed, 172 insertions(+), 25 deletions(-) diff --git a/libs/core/langchain_core/callbacks/base.py b/libs/core/langchain_core/callbacks/base.py index 14078755f4fc5..ed30e50ff14a4 100644 --- a/libs/core/langchain_core/callbacks/base.py +++ b/libs/core/langchain_core/callbacks/base.py @@ -75,7 +75,13 @@ def on_llm_error( parent_run_id: Optional[UUID] = None, **kwargs: Any, ) -> Any: - """Run when LLM errors.""" + """Run when LLM errors. + Args: + error (BaseException): The error that occurred. + kwargs (Any): Additional keyword arguments. + - response (LLMResult): The response which was generated before + the error occurred. + """ class ChainManagerMixin: @@ -351,7 +357,13 @@ async def on_llm_error( tags: Optional[List[str]] = None, **kwargs: Any, ) -> None: - """Run when LLM errors.""" + """Run when LLM errors. + Args: + error (BaseException): The error that occurred. + kwargs (Any): Additional keyword arguments. + - response (LLMResult): The response which was generated before + the error occurred. + """ async def on_chain_start( self, diff --git a/libs/core/langchain_core/callbacks/manager.py b/libs/core/langchain_core/callbacks/manager.py index 402900c632140..b1bb0119279f3 100644 --- a/libs/core/langchain_core/callbacks/manager.py +++ b/libs/core/langchain_core/callbacks/manager.py @@ -623,6 +623,9 @@ def on_llm_error( Args: error (Exception or KeyboardInterrupt): The error. + kwargs (Any): Additional keyword arguments. + - response (LLMResult): The response which was generated before + the error occurred. """ handle_event( self.handlers, @@ -689,6 +692,12 @@ async def on_llm_error( Args: error (Exception or KeyboardInterrupt): The error. + kwargs (Any): Additional keyword arguments. + - response (LLMResult): The response which was generated before + the error occurred. + + + """ await ahandle_event( self.handlers, diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 69bede4a12a50..24bb4114cb4b4 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -223,8 +223,8 @@ def stream( name=config.get("run_name"), batch_size=1, ) + generation: Optional[ChatGenerationChunk] = None try: - generation: Optional[ChatGenerationChunk] = None for chunk in self._stream( messages, stop=stop, run_manager=run_manager, **kwargs ): @@ -235,12 +235,15 @@ def stream( generation += chunk assert generation is not None except BaseException as e: - run_manager.on_llm_error(e) + run_manager.on_llm_error( + e, + response=LLMResult( + generations=[[generation]] if generation else [] + ), + ) raise e else: - run_manager.on_llm_end( - LLMResult(generations=[[generation]]), - ) + run_manager.on_llm_end(LLMResult(generations=[[generation]])) async def astream( self, @@ -277,8 +280,8 @@ async def astream( name=config.get("run_name"), batch_size=1, ) + generation: Optional[ChatGenerationChunk] = None try: - generation: Optional[ChatGenerationChunk] = None async for chunk in self._astream( messages, stop=stop, run_manager=run_manager, **kwargs ): @@ -289,7 +292,12 @@ async def astream( generation += chunk assert generation is not None except BaseException as e: - await run_manager.on_llm_error(e) + await run_manager.on_llm_error( + e, + response=LLMResult( + generations=[[generation]] if generation else [] + ), + ) raise e else: await run_manager.on_llm_end( @@ -366,7 +374,7 @@ def generate( ) except BaseException as e: if run_managers: - run_managers[i].on_llm_error(e) + run_managers[i].on_llm_error(e, response=LLMResult(generations=[])) raise e flattened_outputs = [ LLMResult(generations=[res.generations], llm_output=res.llm_output) @@ -433,7 +441,9 @@ async def agenerate( for i, res in enumerate(results): if isinstance(res, BaseException): if run_managers: - await run_managers[i].on_llm_error(res) + await run_managers[i].on_llm_error( + res, response=LLMResult(generations=[]) + ) exceptions.append(res) if exceptions: if run_managers: diff --git a/libs/core/langchain_core/language_models/llms.py b/libs/core/langchain_core/language_models/llms.py index dea4375b7f468..e0e830d10be7e 100644 --- a/libs/core/langchain_core/language_models/llms.py +++ b/libs/core/langchain_core/language_models/llms.py @@ -384,8 +384,8 @@ def stream( name=config.get("run_name"), batch_size=1, ) + generation: Optional[GenerationChunk] = None try: - generation: Optional[GenerationChunk] = None for chunk in self._stream( prompt, stop=stop, run_manager=run_manager, **kwargs ): @@ -396,7 +396,12 @@ def stream( generation += chunk assert generation is not None except BaseException as e: - run_manager.on_llm_error(e) + run_manager.on_llm_error( + e, + response=LLMResult( + generations=[[generation]] if generation else [] + ), + ) raise e else: run_manager.on_llm_end(LLMResult(generations=[[generation]])) @@ -436,8 +441,8 @@ async def astream( name=config.get("run_name"), batch_size=1, ) + generation: Optional[GenerationChunk] = None try: - generation: Optional[GenerationChunk] = None async for chunk in self._astream( prompt, stop=stop, run_manager=run_manager, **kwargs ): @@ -448,7 +453,12 @@ async def astream( generation += chunk assert generation is not None except BaseException as e: - await run_manager.on_llm_error(e) + await run_manager.on_llm_error( + e, + response=LLMResult( + generations=[[generation]] if generation else [] + ), + ) raise e else: await run_manager.on_llm_end(LLMResult(generations=[[generation]])) @@ -539,7 +549,7 @@ def _generate_helper( ) except BaseException as e: for run_manager in run_managers: - run_manager.on_llm_error(e) + run_manager.on_llm_error(e, response=LLMResult(generations=[])) raise e flattened_outputs = output.flatten() for manager, flattened_output in zip(run_managers, flattened_outputs): @@ -707,7 +717,10 @@ async def _agenerate_helper( ) except BaseException as e: await asyncio.gather( - *[run_manager.on_llm_error(e) for run_manager in run_managers] + *[ + run_manager.on_llm_error(e, response=LLMResult(generations=[])) + for run_manager in run_managers + ] ) raise e flattened_outputs = output.flatten() diff --git a/libs/core/tests/unit_tests/fake/callbacks.py b/libs/core/tests/unit_tests/fake/callbacks.py index 2a2af92269fe7..b2bef343fff88 100644 --- a/libs/core/tests/unit_tests/fake/callbacks.py +++ b/libs/core/tests/unit_tests/fake/callbacks.py @@ -14,6 +14,7 @@ class BaseFakeCallbackHandler(BaseModel): starts: int = 0 ends: int = 0 errors: int = 0 + errors_args: List[Any] = [] text: int = 0 ignore_llm_: bool = False ignore_chain_: bool = False @@ -52,8 +53,9 @@ def on_llm_end_common(self) -> None: self.llm_ends += 1 self.ends += 1 - def on_llm_error_common(self) -> None: + def on_llm_error_common(self, *args: Any, **kwargs: Any) -> None: self.errors += 1 + self.errors_args.append({"args": args, "kwargs": kwargs}) def on_llm_new_token_common(self) -> None: self.llm_streams += 1 @@ -160,7 +162,7 @@ def on_llm_error( *args: Any, **kwargs: Any, ) -> Any: - self.on_llm_error_common() + self.on_llm_error_common(*args, **kwargs) def on_retry( self, @@ -322,7 +324,7 @@ async def on_llm_error( *args: Any, **kwargs: Any, ) -> None: - self.on_llm_error_common() + self.on_llm_error_common(*args, **kwargs) async def on_chain_start( self, diff --git a/libs/core/tests/unit_tests/fake/chat_model.py b/libs/core/tests/unit_tests/fake/chat_model.py index e1268ad4fd3dd..717ab02533f37 100644 --- a/libs/core/tests/unit_tests/fake/chat_model.py +++ b/libs/core/tests/unit_tests/fake/chat_model.py @@ -45,6 +45,7 @@ class FakeListChatModel(SimpleChatModel): responses: List sleep: Optional[float] = None i: int = 0 + error_on_chunk_number: Optional[int] = None @property def _llm_type(self) -> str: @@ -77,9 +78,15 @@ def _stream( self.i += 1 else: self.i = 0 - for c in response: + for i_c, c in enumerate(response): if self.sleep is not None: time.sleep(self.sleep) + if ( + self.error_on_chunk_number is not None + and i_c == self.error_on_chunk_number + ): + raise Exception("Fake error") + yield ChatGenerationChunk(message=AIMessageChunk(content=c)) async def _astream( @@ -94,9 +101,14 @@ async def _astream( self.i += 1 else: self.i = 0 - for c in response: + for i_c, c in enumerate(response): if self.sleep is not None: await asyncio.sleep(self.sleep) + if ( + self.error_on_chunk_number is not None + and i_c == self.error_on_chunk_number + ): + raise Exception("Fake error") yield ChatGenerationChunk(message=AIMessageChunk(content=c)) @property diff --git a/libs/core/tests/unit_tests/fake/llm.py b/libs/core/tests/unit_tests/fake/llm.py index 1ebff8d8ca1b9..165e5b3d2df8d 100644 --- a/libs/core/tests/unit_tests/fake/llm.py +++ b/libs/core/tests/unit_tests/fake/llm.py @@ -60,6 +60,8 @@ def _identifying_params(self) -> Mapping[str, Any]: class FakeStreamingListLLM(FakeListLLM): """Fake streaming list LLM for testing purposes.""" + error_on_chunk_number: Optional[int] = None + def stream( self, input: LanguageModelInput, @@ -69,9 +71,15 @@ def stream( **kwargs: Any, ) -> Iterator[str]: result = self.invoke(input, config) - for c in result: + for i_c, c in enumerate(result): if self.sleep is not None: time.sleep(self.sleep) + + if ( + self.error_on_chunk_number is not None + and i_c == self.error_on_chunk_number + ): + raise Exception("Fake error") yield c async def astream( @@ -83,7 +91,13 @@ async def astream( **kwargs: Any, ) -> AsyncIterator[str]: result = await self.ainvoke(input, config) - for c in result: + for i_c, c in enumerate(result): if self.sleep is not None: await asyncio.sleep(self.sleep) + + if ( + self.error_on_chunk_number is not None + and i_c == self.error_on_chunk_number + ): + raise Exception("Fake error") yield c diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 0f406a06aef28..24c49f79a3f49 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -1,8 +1,15 @@ """Test base chat model.""" + import pytest from langchain_core.messages import HumanMessage, SystemMessage +from langchain_core.outputs.llm_result import LLMResult from langchain_core.tracers.context import collect_runs +from tests.unit_tests.fake.callbacks import ( + BaseFakeCallbackHandler, + FakeAsyncCallbackHandler, + FakeCallbackHandler, +) from tests.unit_tests.fake.chat_model import FakeListChatModel @@ -69,3 +76,33 @@ async def test_async_batch_size(messages: list, messages_2: list) -> None: pass assert len(cb.traced_runs) == 1 assert (cb.traced_runs[0].extra or {}).get("batch_size") == 1 + + +async def test_stream_error_callback() -> None: + message = "test" + + def eval_response(callback: BaseFakeCallbackHandler, i: int) -> None: + assert callback.errors == 1 + assert len(callback.errors_args) == 1 + llm_result: LLMResult = callback.errors_args[0]["kwargs"]["response"] + if i == 0: + assert llm_result.generations == [] + else: + assert llm_result.generations[0][0].text == message[:i] + + for i in range(0, 2): + llm = FakeListChatModel( + responses=[message], + error_on_chunk_number=i, + ) + with pytest.raises(Exception): + cb_async = FakeAsyncCallbackHandler() + async for _ in llm.astream("Dummy message", callbacks=[cb_async]): + pass + eval_response(cb_async, i) + + cb_sync = FakeCallbackHandler() + for _ in llm.stream("Dumy message", callbacks=[cb_sync]): + pass + + eval_response(cb_sync, i) diff --git a/libs/core/tests/unit_tests/language_models/llms/test_base.py b/libs/core/tests/unit_tests/language_models/llms/test_base.py index 37b81a0ed22a9..a6e866cf97627 100644 --- a/libs/core/tests/unit_tests/language_models/llms/test_base.py +++ b/libs/core/tests/unit_tests/language_models/llms/test_base.py @@ -1,5 +1,13 @@ +import pytest + +from langchain_core.outputs.llm_result import LLMResult from langchain_core.tracers.context import collect_runs -from tests.unit_tests.fake.llm import FakeListLLM +from tests.unit_tests.fake.callbacks import ( + BaseFakeCallbackHandler, + FakeAsyncCallbackHandler, + FakeCallbackHandler, +) +from tests.unit_tests.fake.llm import FakeListLLM, FakeStreamingListLLM def test_batch() -> None: @@ -75,3 +83,33 @@ async def test_async_batch_size() -> None: pass assert len(cb.traced_runs) == 1 assert (cb.traced_runs[0].extra or {}).get("batch_size") == 1 + + +async def test_stream_error_callback() -> None: + message = "test" + + def eval_response(callback: BaseFakeCallbackHandler, i: int) -> None: + assert callback.errors == 1 + assert len(callback.errors_args) == 1 + llm_result: LLMResult = callback.errors_args[0]["kwargs"]["response"] + if i == 0: + assert llm_result.generations == [] + else: + assert llm_result.generations[0][0].text == message[:i] + + for i in range(0, 2): + llm = FakeStreamingListLLM( + responses=[message], + error_on_chunk_number=i, + ) + with pytest.raises(Exception): + cb_async = FakeAsyncCallbackHandler() + async for _ in llm.astream("Dummy message", callbacks=[cb_async]): + pass + eval_response(cb_async, i) + + cb_sync = FakeCallbackHandler() + for _ in llm.stream("Dumy message", callbacks=[cb_sync]): + pass + + eval_response(cb_sync, i) From 65faba91ad673485b82bdf44f8028d5e28e77257 Mon Sep 17 00:00:00 2001 From: Kastan Day Date: Mon, 4 Dec 2023 15:53:36 -0800 Subject: [PATCH 24/49] langchain[patch]: Adding new Github functions for reading pull requests (#9027) The Github utilities are fantastic, so I'm adding support for deeper interaction with pull requests. Agents should read "regular" comments and review comments, and the content of PR files (with summarization or `ctags` abbreviations). Progress: - [x] Add functions to read pull requests and the full content of modified files. - [x] Function to use Github's built in code / issues search. Out of scope: - Smarter summarization of file contents of large pull requests (`tree` output, or ctags). - Smarter functions to checkout PRs and edit the files incrementally before bulk committing all changes. - Docs example for creating two agents: - One watches issues: For every new issue, open a PR with your best attempt at fixing it. - The other watches PRs: For every new PR && every new comment on a PR, check the status and try to finish the job. --------- Co-authored-by: Erick Friis --- docs/docs/integrations/toolkits/github.ipynb | 490 +++++++++++++- .../agents/agent_toolkits/github/toolkit.py | 194 ++++++ .../langchain/tools/github/prompt.py | 66 +- libs/langchain/langchain/tools/github/tool.py | 10 +- libs/langchain/langchain/utilities/github.py | 600 ++++++++++++++++-- 5 files changed, 1278 insertions(+), 82 deletions(-) diff --git a/docs/docs/integrations/toolkits/github.ipynb b/docs/docs/integrations/toolkits/github.ipynb index a14a5542cf20f..783b83c48408a 100644 --- a/docs/docs/integrations/toolkits/github.ipynb +++ b/docs/docs/integrations/toolkits/github.ipynb @@ -76,18 +76,17 @@ "* Pull requests (read and write)\n", "\n", "\n", - "\n", - "Once the app has been registered, add it to the repository you wish the bot to act upon.\n", + "Once the app has been registered, you must give your app permission to access each of the repositories you whish it to act upon. Use the App settings on [github.com here](https://github.com/settings/installations).\n", "\n", "### 3. Set Environmental Variables\n", "\n", "Before initializing your agent, the following environmental variables need to be set:\n", "\n", "* **GITHUB_APP_ID**- A six digit number found in your app's general settings\n", - "* **GITHUB_APP_PRIVATE_KEY**- The location of your app's private key .pem file\n", - "* **GITHUB_REPOSITORY**- The name of the Github repository you want your bot to act upon. Must follow the format {username}/{repo-name}. Make sure the app has been added to this repository first!\n", - "* **GITHUB_BRANCH**- The branch where the bot will make its commits. Defaults to 'master.'\n", - "* **GITHUB_BASE_BRANCH**- The base branch of your repo, usually either 'main' or 'master.' This is where pull requests will base from. Defaults to 'master.'\n" + "* **GITHUB_APP_PRIVATE_KEY**- The location of your app's private key .pem file, or the full text of that file as a string.\n", + "* **GITHUB_REPOSITORY**- The name of the Github repository you want your bot to act upon. Must follow the format {username}/{repo-name}. *Make sure the app has been added to this repository first!*\n", + "* Optional: **GITHUB_BRANCH**- The branch where the bot will make its commits. Defaults to `repo.default_branch`.\n", + "* Optional: **GITHUB_BASE_BRANCH**- The base branch of your repo upon which PRs will based from. Defaults to `repo.default_branch`.\n" ] }, { @@ -99,7 +98,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -107,13 +106,13 @@ "\n", "from langchain.agents import AgentType, initialize_agent\n", "from langchain.agents.agent_toolkits.github.toolkit import GitHubToolkit\n", - "from langchain.llms import OpenAI\n", + "from langchain.chat_models import ChatOpenAI\n", "from langchain.utilities.github import GitHubAPIWrapper" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -130,16 +129,54 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Available tools:\n", + "\tGet Issues\n", + "\tGet Issue\n", + "\tComment on Issue\n", + "\tList open pull requests (PRs)\n", + "\tGet Pull Request\n", + "\tOverview of files included in PR\n", + "\tCreate Pull Request\n", + "\tList Pull Requests' Files\n", + "\tCreate File\n", + "\tRead File\n", + "\tUpdate File\n", + "\tDelete File\n", + "\tOverview of existing files in Main branch\n", + "\tOverview of files in current working branch\n", + "\tList branches in this repository\n", + "\tSet active branch\n", + "\tCreate a new branch\n", + "\tGet files from a directory\n", + "\tSearch issues and pull requests\n", + "\tSearch code\n", + "\tCreate review request\n" + ] + } + ], "source": [ - "llm = OpenAI(temperature=0)\n", + "llm = ChatOpenAI(temperature=0, model=\"gpt-4-1106-preview\")\n", "github = GitHubAPIWrapper()\n", "toolkit = GitHubToolkit.from_github_api_wrapper(github)\n", + "tools = toolkit.get_tools()\n", + "\n", + "# STRUCTURED_CHAT includes args_schema for each tool, helps tool args parsing errors.\n", "agent = initialize_agent(\n", - " toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n", - ")" + " tools,\n", + " llm,\n", + " agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n", + " verbose=True,\n", + ")\n", + "print(\"Available tools:\")\n", + "for tool in tools:\n", + " print(\"\\t\" + tool.name)" ] }, { @@ -219,7 +256,428 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Example: Advanced Agent\n", + "## Example: Read an issue, open a pull request\n", + "\n", + "Workflow: \n", + "1. Read issues, either a specific one or just ask it to look at recent ones. \n", + "2. Write code, commit it to a new branch.\n", + "3. Open a PR\n", + "4. \"Request review\" on the PR from the original author of the issue.\n", + "\n", + "\n", + "### Input data and LangSmith Trace\n", + "* LangSmith trace for this run: https://smith.langchain.com/public/fee6643c-b214-42d0-967b-d24dcdd690fe/r\n", + "* Input issue: https://github.com/KastanDay/ML4Bio/issues/33\n", + "* Final PR created by bot: https://github.com/KastanDay/ML4Bio/pull/40" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Please implement these changes by creating or editing the necessary files. \n", + "\n", + "1. First use read_file to read any files in the repo that seem relevant. \n", + "2. Then, when you're ready, start implementing changes by creating and updating files. Implement any and all remaining code to make the project work as the commenter intended. \n", + "2. The last step is to create a PR with a clear and concise title and description, list any concerns or final changes necessary in the PR body.\n", + "3. After opening the PR, comment on the original issue and mention the new PR your just opened, you must comment \"I opened a PR for you to review here #\" (it'll be something like #30). That hashtag syntax will automatically link to the PR, as necessary. Thanks.\n", + "4. If you feel the PR is satisfactory for completing your assignment, create a review request for the original user that opened the issue. Use their username to tag them.\n", + "\n", + "Feel free to ask for help or leave a comment on the Issue or PR if you're stuck.\n", + "\n", + "Here's your latest assignment: {issue_description}\n" + ] + } + ], + "source": [ + "from langchain import hub\n", + "\n", + "gh_issue_prompt_template = hub.pull(\"kastanday/new-github-issue\")\n", + "print(gh_issue_prompt_template.template)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Please implement these changes by creating or editing the necessary files. \n", + "\n", + "1. First use read_file to read any files in the repo that seem relevant. \n", + "2. Then, when you're ready, start implementing changes by creating and updating files. Implement any and all remaining code to make the project work as the commenter intended. \n", + "2. The last step is to create a PR with a clear and concise title and description, list any concerns or final changes necessary in the PR body.\n", + "3. After opening the PR, comment on the original issue and mention the new PR your just opened, you must comment \"I opened a PR for you to review here #\" (it'll be something like #30). That hashtag syntax will automatically link to the PR, as necessary. Thanks.\n", + "4. If you feel the PR is satisfactory for completing your assignment, create a review request for the original user that opened the issue. Use their username to tag them.\n", + "\n", + "Feel free to ask for help or leave a comment on the Issue or PR if you're stuck.\n", + "\n", + "Here's your latest assignment: Title: Create a full command line executable workflow for RNA-Seq on PBMC Samples. Open a new pull request (on a separate branch) and comment the PR number here when you're done..\n", + "Opened by user: KastanDay\n", + "Body: Experiment Type:\n", + "RNA-Seq\n", + "Sequencing of total cellular RNA\n", + "\n", + "Workflow Management:\n", + "Bash/SLURM\n", + "Scripting and job scheduling\n", + "\n", + "Software Stack:\n", + "FastQC\n", + "MultiQC\n", + "STAR\n", + "RSEM\n", + "samtools\n", + "DESeq2\n", + "\n", + "What else to know about the pipeline?\n", + "I am working PBMC samples collected from patients that are undergoing immunotherapy.\n", + "\n", + "Use the data files existing in [Report_WholeBrain](https://github.com/KastanDay/ML4Bio/tree/main/Report_WholeBrain) as input for this workflow.\n", + "\n", + "You should write a series of bash scripts and R scripts that can accomplish this task. Open a PR with those scripts when you're done.\n" + ] + } + ], + "source": [ + "def format_issue(issue):\n", + " title = f\"Title: {issue.get('title')}.\"\n", + " opened_by = f\"Opened by user: {issue.get('opened_by')}\"\n", + " body = f\"Body: {issue.get('body')}\"\n", + " comments = issue.get(\"comments\") # often too long\n", + " return \"\\n\".join([title, opened_by, body])\n", + "\n", + "\n", + "issue = github.get_issue(33) # task to implement a RNA-seq pipeline (bioinformatics)\n", + "final_gh_issue_prompt = gh_issue_prompt_template.format(\n", + " issue_description=format_issue(issue)\n", + ")\n", + "print(final_gh_issue_prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.memory.summary_buffer import ConversationSummaryBufferMemory\n", + "from langchain_core.prompts.chat import MessagesPlaceholder\n", + "\n", + "summarizer_llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo\") # type: ignore\n", + "chat_history = MessagesPlaceholder(variable_name=\"chat_history\")\n", + "memory = ConversationSummaryBufferMemory(\n", + " memory_key=\"chat_history\",\n", + " return_messages=True,\n", + " llm=summarizer_llm,\n", + " max_token_limit=2_000,\n", + ")\n", + "\n", + "agent = initialize_agent(\n", + " tools,\n", + " llm,\n", + " agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n", + " verbose=True,\n", + " handle_parsing_errors=True, # or pass a function that accepts the error and returns a string\n", + " max_iterations=30,\n", + " max_execution_time=None,\n", + " early_stopping_method=\"generate\",\n", + " memory=memory,\n", + " # trim_intermediate_steps=fancier_trim_intermediate_steps,\n", + " agent_kwargs={\n", + " \"memory_prompts\": [chat_history],\n", + " \"input_variables\": [\"input\", \"agent_scratchpad\", \"chat_history\"],\n", + " \"prefix\": final_gh_issue_prompt,\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m```json\n", + "{\n", + " \"action\": \"Get files from a directory\",\n", + " \"action_input\": \"ML4Bio/tree/main/Report_WholeBrain\"\n", + "}\n", + "```\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mError: status code 404, None\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe previous action to get files from a directory failed because the path provided does not exist or is not accessible. I need to correct the path to access the files in the `Report_WholeBrain` directory. Let's try to fetch the list of files from the correct directory path. \n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"Get files from a directory\",\n", + " \"action_input\": \"Report_WholeBrain\"\n", + "}\n", + "```\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3m['Report_WholeBrain/MDSclustering_WholeBrain.html', 'Report_WholeBrain/MDSclustering_WholeBrain_RUVremoved.html', 'Report_WholeBrain/Report_Antonson_WholeBrain_2022Mar.Rmd', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 1-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 2-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 3-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 4-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 6-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 7-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 8-1.png', 'Report_WholeBrain/Report_WholeBrain_files/figure-html/Figure 9-1.png', 'Report_WholeBrain/SalmonSummarizedOutput.RData', 'Report_WholeBrain/SampleInfo_RUVvariables_WholeBrain_2022-05-12.csv', 'Report_WholeBrain/Targets_Final.txt', 'Report_WholeBrain/WholeBrain_GeneResults_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_GeneResults_RUV_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_Gene_level_counts_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_RUV_FDR0.1.html', 'Report_WholeBrain/WholeBrain_logCPMValues_RUVcorrected_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_logCPMvalues_2022-05-12.xlsx', 'Report_WholeBrain/WholeBrain_rawP05.html', 'Report_WholeBrain/getGO.R', 'Report_WholeBrain/getPath.R', 'Report_WholeBrain/interactive_plots/css/glimma.min.css', 'Report_WholeBrain/interactive_plots/css/src/images/animated-overlay.gif', 'Report_WholeBrain/interactive_plots/css/src/images/favicon.ico', 'Report_WholeBrain/interactive_plots/css/src/images/sort_asc.png', 'Report_WholeBrain/interactive_plots/css/src/images/sort_asc_disabled.png', 'Report_WholeBrain/interactive_plots/css/src/images/sort_both.png', 'Report_WholeBrain/interactive_plots/css/src/images/sort_desc.png', 'Report_WholeBrain/interactive_plots/css/src/images/sort_desc_disabled.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_flat_0_aaaaaa_40x100.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_flat_75_ffffff_40x100.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_55_fbf9ee_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_65_ffffff_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_75_dadada_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_75_e6e6e6_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_glass_95_fef1ec_1x400.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-bg_highlight-soft_75_cccccc_1x100.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_222222_256x240.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_2e83ff_256x240.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_454545_256x240.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_888888_256x240.png', 'Report_WholeBrain/interactive_plots/css/src/images/ui-icons_cd0a0a_256x240.png', 'Report_WholeBrain/interactive_plots/js/glimma.min.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_Microglia.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_Microglia_RUV.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_WholeBrain.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_WholeBrain_RUV.js', 'Report_WholeBrain/interactive_plots/js/old_MDSclustering_WholeBrain_noOUT.js', 'Report_WholeBrain/interactive_plots/js/old_Microglia_rawP05.js', 'Report_WholeBrain/interactive_plots/js/old_WholeBrain_RUV_FDR0.1.js', 'Report_WholeBrain/interactive_plots/js/old_WholeBrain_rawP05.js', 'Report_WholeBrain/interactive_plots/old_MDSclustering_Microglia.html', 'Report_WholeBrain/interactive_plots/old_MDSclustering_Microglia_RUV.html', 'Report_WholeBrain/interactive_plots/old_MDSclustering_WholeBrain.html', 'Report_WholeBrain/interactive_plots/old_MDSclustering_WholeBrain_RUV.html', 'Report_WholeBrain/interactive_plots/old_MDSclustering_WholeBrain_noOUT.html', 'Report_WholeBrain/interactive_plots/old_Microglia_rawP05.html', 'Report_WholeBrain/interactive_plots/old_WholeBrain_RUV_FDR0.1.html', 'Report_WholeBrain/interactive_plots/old_WholeBrain_rawP05.html', 'Report_WholeBrain/trx_EGids_combined.txt', 'Report_WholeBrain/⭐️____Report_Antonson_WholeBrain_2022Mar.html']\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe files in the `Report_WholeBrain` directory seem to be mostly reports, images, and some R scripts. However, none of these files appear to be raw RNA-Seq data files or scripts directly related to the RNA-Seq workflow requested by the user. To proceed with creating the workflow, I will need to create new bash and R scripts that will handle the RNA-Seq analysis using the software stack mentioned (FastQC, MultiQC, STAR, RSEM, samtools, DESeq2).\n", + "\n", + "Before creating the scripts, I should create a new branch to work on this feature.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"Create a new branch\",\n", + " \"action_input\": \"rna-seq-workflow\"\n", + "}\n", + "```\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3mBranch 'rna-seq-workflow_v1' created successfully, and set as current active branch.\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mNow that the new branch `rna-seq-workflow_v1` is created and active, I can start creating the necessary bash and R scripts for the RNA-Seq workflow. I will begin by creating a bash script for quality control using FastQC and MultiQC.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"Create File\",\n", + " \"action_input\": \"rna-seq-workflow/01_quality_control.sh\\n#!/bin/bash\\n#PBS -N quality_control\\n#PBS -l walltime=04:00:00\\n#PBS -l nodes=1:ppn=8\\n#PBS -q batch\\n\\n# Load modules for FastQC and MultiQC\\nmodule load FastQC/0.11.8\\nmodule load MultiQC/1.9\\n\\n# Define input and output directories\\nINPUT_DIR='/path/to/raw_data'\\nOUTPUT_DIR='/path/to/output/quality_control'\\n\\n# Create output directory if it doesn't exist\\nmkdir -p $OUTPUT_DIR\\n\\n# Run FastQC on all files in the input directory\\nfastqc -o $OUTPUT_DIR -t 8 $INPUT_DIR/*.fastq.gz\\n\\n# Run MultiQC to aggregate results\\nmultiqc -o $OUTPUT_DIR $OUTPUT_DIR\\n\"\n", + "}\n", + "```\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/01_quality_control.sh\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe quality control bash script has been created successfully. Next, I will create a bash script for the alignment step using STAR.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"Create File\",\n", + " \"action_input\": \"rna-seq-workflow/02_alignment.sh\\n#!/bin/bash\\n#PBS -N alignment\\n#PBS -l walltime=08:00:00\\n#PBS -l nodes=1:ppn=16\\n#PBS -q batch\\n\\n# Load module for STAR\\nmodule load STAR/2.7.3a\\n\\n# Define input and output directories\\nINPUT_DIR='/path/to/quality_control'\\nOUTPUT_DIR='/path/to/output/alignment'\\nGENOME_DIR='/path/to/genome_indices'\\n\\n# Create output directory if it doesn't exist\\nmkdir -p $OUTPUT_DIR\\n\\n# Run STAR for each sample\\nfor sample in $INPUT_DIR/*.fastq.gz\\ndo\\n # Define output file names\\n prefix=$(basename $sample .fastq.gz)\\n output_prefix=$OUTPUT_DIR/$prefix\\n\\n # Run STAR\\n STAR --runThreadN 16 --genomeDir $GENOME_DIR --readFilesIn $sample --readFilesCommand zcat --outFileNamePrefix $output_prefix --outSAMtype BAM SortedByCoordinate\\n\\ndone\\n\"\n", + "}\n", + "```\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/02_alignment.sh\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe alignment bash script has been created successfully. Next, I will create a bash script for transcript quantification using RSEM.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"Create File\",\n", + " \"action_input\": \"rna-seq-workflow/03_transcript_quantification.sh\\n#!/bin/bash\\n#PBS -N transcript_quantification\\n#PBS -l walltime=12:00:00\\n#PBS -l nodes=1:ppn=8\\n#PBS -q batch\\n\\n# Load module for RSEM\\nmodule load RSEM/1.3.3\\n\\n# Define input and output directories\\nINPUT_DIR='/path/to/alignment'\\nOUTPUT_DIR='/path/to/output/transcript_quantification'\\nREFERENCE_DIR='/path/to/reference'\\n\\n# Create output directory if it doesn't exist\\nmkdir -p $OUTPUT_DIR\\n\\n# Run RSEM for each sample\\nfor bam in $INPUT_DIR/*.bam\\ndo\\n # Define output file names\\n prefix=$(basename $bam .bam)\\n output_prefix=$OUTPUT_DIR/$prefix\\n\\n # Run RSEM\\n rsem-calculate-expression --bam --no-bam-output --estimate-rspd --append-names --output-genome-bam -p 8 $bam $REFERENCE_DIR $output_prefix\\n\\ndone\\n\"\n", + "}\n", + "```\n", + "\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/03_transcript_quantification.sh\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe transcript quantification bash script has been created successfully. Now, I will create a bash script for sorting and indexing BAM files using samtools, which is a common step before differential expression analysis.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"Create File\",\n", + " \"action_input\": \"rna-seq-workflow/04_sort_index_bam.sh\\n#!/bin/bash\\n#PBS -N sort_index_bam\\n#PBS -l walltime=02:00:00\\n#PBS -l nodes=1:ppn=4\\n#PBS -q batch\\n\\n# Load module for samtools\\nmodule load samtools/1.10\\n\\n# Define input and output directories\\nINPUT_DIR='/path/to/alignment'\\nOUTPUT_DIR='/path/to/output/sorted_bams'\\n\\n# Create output directory if it doesn't exist\\nmkdir -p $OUTPUT_DIR\\n\\n# Sort and index BAM files\\nfor bam in $INPUT_DIR/*.bam\\ndo\\n # Define output file names\\n sorted_bam=$OUTPUT_DIR/$(basename $bam .bam).sorted.bam\\n\\n # Sort BAM file\\n samtools sort -@ 4 -o $sorted_bam $bam\\n\\n # Index sorted BAM file\\n samtools index $sorted_bam\\ndone\\n\"\n", + "}\n", + "```\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/04_sort_index_bam.sh\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe sorting and indexing of BAM files script has been created successfully. The next step is to create an R script for differential expression analysis using DESeq2. This script will be executed after the previous steps in the workflow have been completed.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"Create File\",\n", + " \"action_input\": \"rna-seq-workflow/05_differential_expression.R\\n# Load DESeq2 library\\nlibrary(DESeq2)\\n\\n# Set input directory\\ninput_dir <- '/path/to/transcript_quantification'\\n\\n# Read in the count matrix and the sample information\\n# Assuming countData is a matrix with count data and colData is a DataFrame with sample information\\ncountData <- as.matrix(read.csv(file.path(input_dir, 'count_matrix.csv'), row.names=1))\\ncolData <- read.csv(file.path(input_dir, 'colData.csv'))\\n\\n# Create a DESeqDataSet object\\ndds <- DESeqDataSetFromMatrix(countData = countData,\\n colData = colData,\\n design = ~ condition)\\n\\n# Run the differential expression analysis\\ndds <- DESeq(dds)\\n\\n# Get the results\\nres <- results(dds)\\n\\n# Order the results by the adjusted p-value\\nresOrdered <- res[order(res$padj),]\\n\\n# Write the results to a CSV file\\nwrite.csv(as.data.frame(resOrdered), file.path(input_dir, 'differential_expression_results.csv'))\\n\"\n", + "}\n", + "```\n", + "\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/05_differential_expression.R\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe R script for differential expression analysis using DESeq2 has been created successfully. Now that all the necessary scripts for the RNA-Seq workflow have been created, I should create a master bash script that will call each of these scripts in sequence to run the entire workflow.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"Create File\",\n", + " \"action_input\": \"rna-seq-workflow/run_workflow.sh\\n#!/bin/bash\\n# Master script to run the RNA-Seq workflow\\n\\n# Run quality control\\nbash 01_quality_control.sh\\n\\n# Run alignment\\nbash 02_alignment.sh\\n\\n# Run transcript quantification\\nbash 03_transcript_quantification.sh\\n\\n# Sort and index BAM files\\nbash 04_sort_index_bam.sh\\n\\n# Run differential expression analysis\\nRscript 05_differential_expression.R\\n\\n# Workflow completed\\necho \\\"RNA-Seq workflow completed successfully.\\\"\\n\"\n", + "}\n", + "```\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mCreated file rna-seq-workflow/run_workflow.sh\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe master bash script to run the entire RNA-Seq workflow has been created successfully. The next step is to create a pull request with all the scripts that have been created in the `rna-seq-workflow_v1` branch.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"Create Pull Request\",\n", + " \"action_input\": \"Create a full command line executable workflow for RNA-Seq on PBMC Samples\\n\\nThis pull request includes a series of bash and R scripts to perform RNA-Seq analysis on PBMC samples. The workflow includes steps for quality control, alignment, transcript quantification, sorting and indexing BAM files, and differential expression analysis using the following software stack: FastQC, MultiQC, STAR, RSEM, samtools, and DESeq2.\\n\\nPlease review the scripts and provide feedback or request any additional changes as necessary.\"\n", + "}\n", + "```\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mSuccessfully created PR number 40\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe pull request has been successfully created. Now, I need to comment on the original issue to inform the user about the new PR.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"Comment on Issue\",\n", + " \"action_input\": \"2\\n\\nI opened a PR for you to review here #40\"\n", + "}\n", + "```\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mCommented on issue 2\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe comment has been posted on the original issue. Now, I will create a review request for the user KastanDay who opened the issue.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"Create review request\",\n", + " \"action_input\": \"KastanDay\"\n", + "}\n", + "```\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mReview request created for user KastanDay on PR #40\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mAction:\n", + "```json\n", + "{\n", + " \"action\": \"Final Answer\",\n", + " \"action_input\": \"The tasks have been completed: a new branch was created, the necessary scripts for the RNA-Seq workflow were written, a pull request was opened, the original issue was commented on with the PR number, and a review request was sent to the user KastanDay.\"\n", + "}\n", + "```\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + } + ], + "source": [ + "from langchain_core.tracers.context import tracing_v2_enabled\n", + "\n", + "# To use langsmith (recommended for these long tasks):\n", + "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", + "os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n", + "os.environ[\"LANGCHAIN_API_KEY\"] = \"ls__......\"\n", + "os.environ[\"LANGCHAIN_PROJECT\"] = \"Github_Demo_PR\"\n", + "os.environ[\"LANGCHAIN_WANDB_TRACING\"] = \"false\"\n", + "\n", + "\n", + "with tracing_v2_enabled(project_name=\"Github_Demo_PR\", tags=[\"PR_bot\"]) as cb:\n", + " agent.run(final_gh_issue_prompt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Full text of tools\n", + "\n", + "When using or building tools, it's always helpful to inspect what the model sees.\n", + "\n", + "On OpenAI models, tool descriptions are part of the `SystemPrompt`.\n", + "\n", + "The `args` are added to the prompt in structured chats, e.g. `AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION`, but not in `AgentType.ZERO_SHOT_REACT_DESCRIPTION`." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Get Issues: \n", + "This tool will fetch a list of the repository's issues. It will return the title, and issue number of 5 issues. It takes no input., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n", + "Get Issue: \n", + "This tool will fetch the title, body, and comment thread of a specific issue. **VERY IMPORTANT**: You must specify the issue number as an integer., args: {'issue_number': {'title': 'Issue Number', 'description': 'Issue number as an integer, e.g. `42`', 'default': 0, 'type': 'integer'}}\n", + "Comment on Issue: \n", + "This tool is useful when you need to comment on a GitHub issue. Simply pass in the issue number and the comment you would like to make. Please use this sparingly as we don't want to clutter the comment threads. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:\n", + "\n", + "- First you must specify the issue number as an integer\n", + "- Then you must place two newlines\n", + "- Then you must specify your comment, args: {'input': {'title': 'Input', 'description': 'Follow the required formatting.', 'type': 'string'}}\n", + "List open pull requests (PRs): \n", + "This tool will fetch a list of the repository's Pull Requests (PRs). It will return the title, and PR number of 5 PRs. It takes no input., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n", + "Get Pull Request: \n", + "This tool will fetch the title, body, comment thread and commit history of a specific Pull Request (by PR number). **VERY IMPORTANT**: You must specify the PR number as an integer., args: {'pr_number': {'title': 'Pr Number', 'description': 'The PR number as an integer, e.g. `12`', 'default': 0, 'type': 'integer'}}\n", + "Overview of files included in PR: \n", + "This tool will fetch the full text of all files in a pull request (PR) given the PR number as an input. This is useful for understanding the code changes in a PR or contributing to it. **VERY IMPORTANT**: You must specify the PR number as an integer input parameter., args: {'pr_number': {'title': 'Pr Number', 'description': 'The PR number as an integer, e.g. `12`', 'default': 0, 'type': 'integer'}}\n", + "Create Pull Request: \n", + "This tool is useful when you need to create a new pull request in a GitHub repository. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:\n", + "\n", + "- First you must specify the title of the pull request\n", + "- Then you must place two newlines\n", + "- Then you must write the body or description of the pull request\n", + "\n", + "When appropriate, always reference relevant issues in the body by using the syntax `closes #>>> OLD\n", + "- Then you must specify the new contents which you would like to replace the old contents with wrapped in NEW <<<< and >>>> NEW\n", + "\n", + "For example, if you would like to replace the contents of the file /test/test.txt from \"old contents\" to \"new contents\", you would pass in the following string:\n", + "\n", + "test/test.txt\n", + "\n", + "This is text that will not be changed\n", + "OLD <<<<\n", + "old contents\n", + ">>>> OLD\n", + "NEW <<<<\n", + "new contents\n", + ">>>> NEW, args: {'formatted_file_update': {'title': 'Formatted File Update', 'description': 'Strictly follow the provided rules.', 'type': 'string'}}\n", + "Delete File: \n", + "This tool is a wrapper for the GitHub API, useful when you need to delete a file in a GitHub repository. Simply pass in the full file path of the file you would like to delete. **IMPORTANT**: the path must not start with a slash, args: {'formatted_filepath': {'title': 'Formatted Filepath', 'description': 'The full file path of the file you would like to delete where the path must NOT start with a slash, e.g. `some_dir/my_file.py`. Only input a string, not the param name.', 'type': 'string'}}\n", + "Overview of existing files in Main branch: \n", + "This tool will provide an overview of all existing files in the main branch of the repository. It will list the file names, their respective paths, and a brief summary of their contents. This can be useful for understanding the structure and content of the repository, especially when navigating through large codebases. No input parameters are required., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n", + "Overview of files in current working branch: \n", + "This tool will provide an overview of all files in your current working branch where you should implement changes. This is great for getting a high level overview of the structure of your code. No input parameters are required., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n", + "List branches in this repository: \n", + "This tool will fetch a list of all branches in the repository. It will return the name of each branch. No input parameters are required., args: {'no_input': {'title': 'No Input', 'description': 'No input required, e.g. `` (empty string).', 'default': '', 'type': 'string'}}\n", + "Set active branch: \n", + "This tool will set the active branch in the repository, similar to `git checkout ` and `git switch -c `. **VERY IMPORTANT**: You must specify the name of the branch as a string input parameter., args: {'branch_name': {'title': 'Branch Name', 'description': 'The name of the branch, e.g. `my_branch`.', 'type': 'string'}}\n", + "Create a new branch: \n", + "This tool will create a new branch in the repository. **VERY IMPORTANT**: You must specify the name of the new branch as a string input parameter., args: {'branch_name': {'title': 'Branch Name', 'description': 'The name of the branch, e.g. `my_branch`.', 'type': 'string'}}\n", + "Get files from a directory: \n", + "This tool will fetch a list of all files in a specified directory. **VERY IMPORTANT**: You must specify the path of the directory as a string input parameter., args: {'input': {'title': 'Input', 'description': 'The path of the directory, e.g. `some_dir/inner_dir`. Only input a string, do not include the parameter name.', 'default': '', 'type': 'string'}}\n", + "Search issues and pull requests: \n", + "This tool will search for issues and pull requests in the repository. **VERY IMPORTANT**: You must specify the search query as a string input parameter., args: {'search_query': {'title': 'Search Query', 'description': 'Natural language search query, e.g. `My issue title or topic`.', 'type': 'string'}}\n", + "Search code: \n", + "This tool will search for code in the repository. **VERY IMPORTANT**: You must specify the search query as a string input parameter., args: {'search_query': {'title': 'Search Query', 'description': 'A keyword-focused natural language search query for code, e.g. `MyFunctionName()`.', 'type': 'string'}}\n", + "Create review request: \n", + "This tool will create a review request on the open pull request that matches the current active branch. **VERY IMPORTANT**: You must specify the username of the person who is being requested as a string input parameter., args: {'username': {'title': 'Username', 'description': 'GitHub username of the user being requested, e.g. `my_username`.', 'type': 'string'}}\n" + ] + } + ], + "source": [ + "from langchain.tools.render import render_text_description_and_args\n", + "\n", + "print(render_text_description_and_args(tools))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example: Agent with Search\n", "\n", "If your agent does not need to use all 8 tools, you can build tools individually to use. For this example, we'll make an agent that does not use the create_file, delete_file or create_pull_request tools, but can also use duckduckgo-search." ] @@ -382,7 +840,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/libs/langchain/langchain/agents/agent_toolkits/github/toolkit.py b/libs/langchain/langchain/agents/agent_toolkits/github/toolkit.py index 390866ac886b2..fb437b7a4f87a 100644 --- a/libs/langchain/langchain/agents/agent_toolkits/github/toolkit.py +++ b/libs/langchain/langchain/agents/agent_toolkits/github/toolkit.py @@ -1,22 +1,129 @@ """GitHub Toolkit.""" from typing import Dict, List +from langchain_core.pydantic_v1 import BaseModel + from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.pydantic_v1 import Field from langchain.tools import BaseTool from langchain.tools.github.prompt import ( COMMENT_ON_ISSUE_PROMPT, + CREATE_BRANCH_PROMPT, CREATE_FILE_PROMPT, CREATE_PULL_REQUEST_PROMPT, + CREATE_REVIEW_REQUEST_PROMPT, DELETE_FILE_PROMPT, + GET_FILES_FROM_DIRECTORY_PROMPT, GET_ISSUE_PROMPT, GET_ISSUES_PROMPT, + GET_PR_PROMPT, + LIST_BRANCHES_IN_REPO_PROMPT, + LIST_PRS_PROMPT, + LIST_PULL_REQUEST_FILES, + OVERVIEW_EXISTING_FILES_BOT_BRANCH, + OVERVIEW_EXISTING_FILES_IN_MAIN, READ_FILE_PROMPT, + SEARCH_CODE_PROMPT, + SEARCH_ISSUES_AND_PRS_PROMPT, + SET_ACTIVE_BRANCH_PROMPT, UPDATE_FILE_PROMPT, ) from langchain.tools.github.tool import GitHubAction from langchain.utilities.github import GitHubAPIWrapper +class NoInput(BaseModel): + no_input: str = Field("", description="No input required, e.g. `` (empty string).") + + +class GetIssue(BaseModel): + issue_number: int = Field(0, description="Issue number as an integer, e.g. `42`") + + +class CommentOnIssue(BaseModel): + input: str = Field(..., description="Follow the required formatting.") + + +class GetPR(BaseModel): + pr_number: int = Field(0, description="The PR number as an integer, e.g. `12`") + + +class CreatePR(BaseModel): + formatted_pr: str = Field(..., description="Follow the required formatting.") + + +class CreateFile(BaseModel): + formatted_file: str = Field(..., description="Follow the required formatting.") + + +class ReadFile(BaseModel): + formatted_filepath: str = Field( + ..., + description=( + "The full file path of the file you would like to read where the " + "path must NOT start with a slash, e.g. `some_dir/my_file.py`." + ), + ) + + +class UpdateFile(BaseModel): + formatted_file_update: str = Field( + ..., description="Strictly follow the provided rules." + ) + + +class DeleteFile(BaseModel): + formatted_filepath: str = Field( + ..., + description=( + "The full file path of the file you would like to delete" + " where the path must NOT start with a slash, e.g." + " `some_dir/my_file.py`. Only input a string," + " not the param name." + ), + ) + + +class DirectoryPath(BaseModel): + input: str = Field( + "", + description=( + "The path of the directory, e.g. `some_dir/inner_dir`." + " Only input a string, do not include the parameter name." + ), + ) + + +class BranchName(BaseModel): + branch_name: str = Field( + ..., description="The name of the branch, e.g. `my_branch`." + ) + + +class SearchCode(BaseModel): + search_query: str = Field( + ..., + description=( + "A keyword-focused natural language search" + "query for code, e.g. `MyFunctionName()`." + ), + ) + + +class CreateReviewRequest(BaseModel): + username: str = Field( + ..., + description="GitHub username of the user being requested, e.g. `my_username`.", + ) + + +class SearchIssuesAndPRs(BaseModel): + search_query: str = Field( + ..., + description="Natural language search query, e.g. `My issue title or topic`.", + ) + + class GitHubToolkit(BaseToolkit): """GitHub Toolkit. @@ -41,41 +148,127 @@ def from_github_api_wrapper( "mode": "get_issues", "name": "Get Issues", "description": GET_ISSUES_PROMPT, + "args_schema": NoInput, }, { "mode": "get_issue", "name": "Get Issue", "description": GET_ISSUE_PROMPT, + "args_schema": GetIssue, }, { "mode": "comment_on_issue", "name": "Comment on Issue", "description": COMMENT_ON_ISSUE_PROMPT, + "args_schema": CommentOnIssue, + }, + { + "mode": "list_open_pull_requests", + "name": "List open pull requests (PRs)", + "description": LIST_PRS_PROMPT, + "args_schema": NoInput, + }, + { + "mode": "get_pull_request", + "name": "Get Pull Request", + "description": GET_PR_PROMPT, + "args_schema": GetPR, + }, + { + "mode": "list_pull_request_files", + "name": "Overview of files included in PR", + "description": LIST_PULL_REQUEST_FILES, + "args_schema": GetPR, }, { "mode": "create_pull_request", "name": "Create Pull Request", "description": CREATE_PULL_REQUEST_PROMPT, + "args_schema": CreatePR, + }, + { + "mode": "list_pull_request_files", + "name": "List Pull Requests' Files", + "description": LIST_PULL_REQUEST_FILES, + "args_schema": GetPR, }, { "mode": "create_file", "name": "Create File", "description": CREATE_FILE_PROMPT, + "args_schema": CreateFile, }, { "mode": "read_file", "name": "Read File", "description": READ_FILE_PROMPT, + "args_schema": ReadFile, }, { "mode": "update_file", "name": "Update File", "description": UPDATE_FILE_PROMPT, + "args_schema": UpdateFile, }, { "mode": "delete_file", "name": "Delete File", "description": DELETE_FILE_PROMPT, + "args_schema": DeleteFile, + }, + { + "mode": "list_files_in_main_branch", + "name": "Overview of existing files in Main branch", + "description": OVERVIEW_EXISTING_FILES_IN_MAIN, + "args_schema": NoInput, + }, + { + "mode": "list_files_in_bot_branch", + "name": "Overview of files in current working branch", + "description": OVERVIEW_EXISTING_FILES_BOT_BRANCH, + "args_schema": NoInput, + }, + { + "mode": "list_branches_in_repo", + "name": "List branches in this repository", + "description": LIST_BRANCHES_IN_REPO_PROMPT, + "args_schema": NoInput, + }, + { + "mode": "set_active_branch", + "name": "Set active branch", + "description": SET_ACTIVE_BRANCH_PROMPT, + "args_schema": BranchName, + }, + { + "mode": "create_branch", + "name": "Create a new branch", + "description": CREATE_BRANCH_PROMPT, + "args_schema": BranchName, + }, + { + "mode": "get_files_from_directory", + "name": "Get files from a directory", + "description": GET_FILES_FROM_DIRECTORY_PROMPT, + "args_schema": DirectoryPath, + }, + { + "mode": "search_issues_and_prs", + "name": "Search issues and pull requests", + "description": SEARCH_ISSUES_AND_PRS_PROMPT, + "args_schema": SearchIssuesAndPRs, + }, + { + "mode": "search_code", + "name": "Search code", + "description": SEARCH_CODE_PROMPT, + "args_schema": SearchCode, + }, + { + "mode": "create_review_request", + "name": "Create review request", + "description": CREATE_REVIEW_REQUEST_PROMPT, + "args_schema": CreateReviewRequest, }, ] tools = [ @@ -84,6 +277,7 @@ def from_github_api_wrapper( description=action["description"], mode=action["mode"], api_wrapper=github_api_wrapper, + args_schema=action.get("args_schema", None), ) for action in operations ] diff --git a/libs/langchain/langchain/tools/github/prompt.py b/libs/langchain/langchain/tools/github/prompt.py index e0e72a808b802..3d66713e02b7e 100644 --- a/libs/langchain/langchain/tools/github/prompt.py +++ b/libs/langchain/langchain/tools/github/prompt.py @@ -1,19 +1,17 @@ # flake8: noqa GET_ISSUES_PROMPT = """ -This tool will fetch a list of the repository's issues. It will return the title, and issue number of 5 issues. It takes no input. -""" +This tool will fetch a list of the repository's issues. It will return the title, and issue number of 5 issues. It takes no input.""" GET_ISSUE_PROMPT = """ -This tool will fetch the title, body, and comment thread of a specific issue. **VERY IMPORTANT**: You must specify the issue number as an integer. -""" +This tool will fetch the title, body, and comment thread of a specific issue. **VERY IMPORTANT**: You must specify the issue number as an integer.""" COMMENT_ON_ISSUE_PROMPT = """ This tool is useful when you need to comment on a GitHub issue. Simply pass in the issue number and the comment you would like to make. Please use this sparingly as we don't want to clutter the comment threads. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules: - First you must specify the issue number as an integer - Then you must place two newlines -- Then you must specify your comment -""" +- Then you must specify your comment""" + CREATE_PULL_REQUEST_PROMPT = """ This tool is useful when you need to create a new pull request in a GitHub repository. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules: @@ -21,13 +19,13 @@ - Then you must place two newlines - Then you must write the body or description of the pull request -To reference an issue in the body, put its issue number directly after a #. -For example, if you would like to create a pull request called "README updates" with contents "added contributors' names, closes issue #3", you would pass in the following string: +When appropriate, always reference relevant issues in the body by using the syntax `closes #>>> OLD NEW <<<< new contents ->>>> NEW -""" +>>>> NEW""" DELETE_FILE_PROMPT = """ -This tool is a wrapper for the GitHub API, useful when you need to delete a file in a GitHub repository. Simply pass in the full file path of the file you would like to delete. **IMPORTANT**: the path must not start with a slash -""" +This tool is a wrapper for the GitHub API, useful when you need to delete a file in a GitHub repository. Simply pass in the full file path of the file you would like to delete. **IMPORTANT**: the path must not start with a slash""" + +GET_PR_PROMPT = """ +This tool will fetch the title, body, comment thread and commit history of a specific Pull Request (by PR number). **VERY IMPORTANT**: You must specify the PR number as an integer.""" + +LIST_PRS_PROMPT = """ +This tool will fetch a list of the repository's Pull Requests (PRs). It will return the title, and PR number of 5 PRs. It takes no input.""" + +LIST_PULL_REQUEST_FILES = """ +This tool will fetch the full text of all files in a pull request (PR) given the PR number as an input. This is useful for understanding the code changes in a PR or contributing to it. **VERY IMPORTANT**: You must specify the PR number as an integer input parameter.""" + +OVERVIEW_EXISTING_FILES_IN_MAIN = """ +This tool will provide an overview of all existing files in the main branch of the repository. It will list the file names, their respective paths, and a brief summary of their contents. This can be useful for understanding the structure and content of the repository, especially when navigating through large codebases. No input parameters are required.""" + +OVERVIEW_EXISTING_FILES_BOT_BRANCH = """ +This tool will provide an overview of all files in your current working branch where you should implement changes. This is great for getting a high level overview of the structure of your code. No input parameters are required.""" + +SEARCH_ISSUES_AND_PRS_PROMPT = """ +This tool will search for issues and pull requests in the repository. **VERY IMPORTANT**: You must specify the search query as a string input parameter.""" + +SEARCH_CODE_PROMPT = """ +This tool will search for code in the repository. **VERY IMPORTANT**: You must specify the search query as a string input parameter.""" + +CREATE_REVIEW_REQUEST_PROMPT = """ +This tool will create a review request on the open pull request that matches the current active branch. **VERY IMPORTANT**: You must specify the username of the person who is being requested as a string input parameter.""" + +LIST_BRANCHES_IN_REPO_PROMPT = """ +This tool will fetch a list of all branches in the repository. It will return the name of each branch. No input parameters are required.""" + +SET_ACTIVE_BRANCH_PROMPT = """ +This tool will set the active branch in the repository, similar to `git checkout ` and `git switch -c `. **VERY IMPORTANT**: You must specify the name of the branch as a string input parameter.""" + +CREATE_BRANCH_PROMPT = """ +This tool will create a new branch in the repository. **VERY IMPORTANT**: You must specify the name of the new branch as a string input parameter.""" + +GET_FILES_FROM_DIRECTORY_PROMPT = """ +This tool will fetch a list of all files in a specified directory. **VERY IMPORTANT**: You must specify the path of the directory as a string input parameter.""" diff --git a/libs/langchain/langchain/tools/github/tool.py b/libs/langchain/langchain/tools/github/tool.py index 6099ea47d8bc8..71761f635875b 100644 --- a/libs/langchain/langchain/tools/github/tool.py +++ b/libs/langchain/langchain/tools/github/tool.py @@ -7,9 +7,9 @@ GITHUB_REPOSITORY -> format: {owner}/{repo} """ -from typing import Optional +from typing import Optional, Type -from langchain_core.pydantic_v1 import Field +from langchain_core.pydantic_v1 import BaseModel, Field from langchain.callbacks.manager import CallbackManagerForToolRun from langchain.tools.base import BaseTool @@ -23,11 +23,15 @@ class GitHubAction(BaseTool): mode: str name: str = "" description: str = "" + args_schema: Optional[Type[BaseModel]] = None def _run( self, - instructions: str, + instructions: Optional[str] = "", run_manager: Optional[CallbackManagerForToolRun] = None, ) -> str: """Use the GitHub API to run an operation.""" + if not instructions or instructions == "{}": + # Catch other forms of empty input that GPT-4 likes to send. + instructions = "" return self.api_wrapper.run(self.mode, instructions) diff --git a/libs/langchain/langchain/utilities/github.py b/libs/langchain/langchain/utilities/github.py index 234ec87b1e979..940708718332b 100644 --- a/libs/langchain/langchain/utilities/github.py +++ b/libs/langchain/langchain/utilities/github.py @@ -4,12 +4,15 @@ import json from typing import TYPE_CHECKING, Any, Dict, List, Optional +import requests +import tiktoken from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator from langchain.utils import get_from_dict_or_env if TYPE_CHECKING: from github.Issue import Issue + from github.PullRequest import PullRequest class GitHubAPIWrapper(BaseModel): @@ -20,7 +23,7 @@ class GitHubAPIWrapper(BaseModel): github_repository: Optional[str] = None github_app_id: Optional[str] = None github_app_private_key: Optional[str] = None - github_branch: Optional[str] = None + active_branch: Optional[str] = None github_base_branch: Optional[str] = None class Config: @@ -41,13 +44,6 @@ def validate_environment(cls, values: Dict) -> Dict: values, "github_app_private_key", "GITHUB_APP_PRIVATE_KEY" ) - github_branch = get_from_dict_or_env( - values, "github_branch", "GITHUB_BRANCH", default="master" - ) - github_base_branch = get_from_dict_or_env( - values, "github_base_branch", "GITHUB_BASE_BRANCH", default="master" - ) - try: from github import Auth, GithubIntegration @@ -57,8 +53,13 @@ def validate_environment(cls, values: Dict) -> Dict: "Please install it with `pip install PyGithub`" ) - with open(github_app_private_key, "r") as f: - private_key = f.read() + try: + # interpret the key as a file path + # fallback to interpreting as the key itself + with open(github_app_private_key, "r") as f: + private_key = f.read() + except Exception: + private_key = github_app_private_key auth = Auth.AppAuth( github_app_id, @@ -69,13 +70,28 @@ def validate_environment(cls, values: Dict) -> Dict: # create a GitHub instance: g = installation.get_github_for_installation() + repo = g.get_repo(github_repository) + + github_base_branch = get_from_dict_or_env( + values, + "github_base_branch", + "GITHUB_BASE_BRANCH", + default=repo.default_branch, + ) + + active_branch = get_from_dict_or_env( + values, + "active_branch", + "ACTIVE_BRANCH", + default=repo.default_branch, + ) values["github"] = g - values["github_repo_instance"] = g.get_repo(github_repository) + values["github_repo_instance"] = repo values["github_repository"] = github_repository values["github_app_id"] = github_app_id values["github_app_private_key"] = github_app_private_key - values["github_branch"] = github_branch + values["active_branch"] = active_branch values["github_base_branch"] = github_base_branch return values @@ -92,19 +108,45 @@ def parse_issues(self, issues: List[Issue]) -> List[dict]: for issue in issues: title = issue.title number = issue.number - parsed.append({"title": title, "number": number}) + opened_by = issue.user.login if issue.user else None + issue_dict = {"title": title, "number": number} + if opened_by is not None: + issue_dict["opened_by"] = opened_by + parsed.append(issue_dict) + return parsed + + def parse_pull_requests(self, pull_requests: List[PullRequest]) -> List[dict]: + """ + Extracts title and number from each Issue and puts them in a dictionary + Parameters: + issues(List[Issue]): A list of Github Issue objects + Returns: + List[dict]: A dictionary of issue titles and numbers + """ + parsed = [] + for pr in pull_requests: + parsed.append( + { + "title": pr.title, + "number": pr.number, + "commits": str(pr.commits), + "comments": str(pr.comments), + } + ) return parsed def get_issues(self) -> str: """ - Fetches all open issues from the repo + Fetches all open issues from the repo excluding pull requests Returns: str: A plaintext report containing the number of issues and each issue's title and number. """ issues = self.github_repo_instance.get_issues(state="open") - if issues.totalCount > 0: + # Filter out pull requests (part of GH issues object) + issues = [issue for issue in issues if not issue.pull_request] + if issues: parsed_issues = self.parse_issues(issues) parsed_issues_str = ( "Found " + str(len(parsed_issues)) + " issues:\n" + str(parsed_issues) @@ -113,14 +155,201 @@ def get_issues(self) -> str: else: return "No open issues available" + def list_open_pull_requests(self) -> str: + """ + Fetches all open PRs from the repo + + Returns: + str: A plaintext report containing the number of PRs + and each PR's title and number. + """ + # issues = self.github_repo_instance.get_issues(state="open") + pull_requests = self.github_repo_instance.get_pulls(state="open") + if pull_requests.totalCount > 0: + parsed_prs = self.parse_pull_requests(pull_requests) + parsed_prs_str = ( + "Found " + str(len(parsed_prs)) + " pull requests:\n" + str(parsed_prs) + ) + return parsed_prs_str + else: + return "No open pull requests available" + + def list_files_in_main_branch(self) -> str: + """ + Fetches all files in the main branch of the repo. + + Returns: + str: A plaintext report containing the paths and names of the files. + """ + files: List[str] = [] + try: + contents = self.github_repo_instance.get_contents( + "", ref=self.github_base_branch + ) + for content in contents: + if content.type == "dir": + files.extend(self.get_files_from_directory(content.path)) + else: + files.append(content.path) + + if files: + files_str = "\n".join(files) + return f"Found {len(files)} files in the main branch:\n{files_str}" + else: + return "No files found in the main branch" + except Exception as e: + return str(e) + + def set_active_branch(self, branch_name: str) -> str: + """Equivalent to `git checkout branch_name` for this Agent. + Clones formatting from Github. + + Returns an Error (as a string) if branch doesn't exist. + """ + curr_branches = [ + branch.name for branch in self.github_repo_instance.get_branches() + ] + if branch_name in curr_branches: + self.active_branch = branch_name + return f"Switched to branch `{branch_name}`" + else: + return ( + f"Error {branch_name} does not exist," + f"in repo with current branches: {str(curr_branches)}" + ) + + def list_branches_in_repo(self) -> str: + """ + Fetches a list of all branches in the repository. + + Returns: + str: A plaintext report containing the names of the branches. + """ + try: + branches = [ + branch.name for branch in self.github_repo_instance.get_branches() + ] + if branches: + branches_str = "\n".join(branches) + return ( + f"Found {len(branches)} branches in the repository:" + f"\n{branches_str}" + ) + else: + return "No branches found in the repository" + except Exception as e: + return str(e) + + def create_branch(self, proposed_branch_name: str) -> str: + """ + Create a new branch, and set it as the active bot branch. + Equivalent to `git switch -c proposed_branch_name` + If the proposed branch already exists, we append _v1 then _v2... + until a unique name is found. + + Returns: + str: A plaintext success message. + """ + from github import GithubException + + i = 0 + new_branch_name = proposed_branch_name + base_branch = self.github_repo_instance.get_branch( + self.github_repo_instance.default_branch + ) + for i in range(1000): + try: + self.github_repo_instance.create_git_ref( + ref=f"refs/heads/{new_branch_name}", sha=base_branch.commit.sha + ) + self.active_branch = new_branch_name + return ( + f"Branch '{new_branch_name}' " + "created successfully, and set as current active branch." + ) + except GithubException as e: + if e.status == 422 and "Reference already exists" in e.data["message"]: + i += 1 + new_branch_name = f"{proposed_branch_name}_v{i}" + else: + # Handle any other exceptions + print(f"Failed to create branch. Error: {e}") + raise Exception( + "Unable to create branch name from proposed_branch_name: " + f"{proposed_branch_name}" + ) + return ( + "Unable to create branch. " + "At least 1000 branches exist with named derived from " + f"proposed_branch_name: `{proposed_branch_name}`" + ) + + def list_files_in_bot_branch(self) -> str: + """ + Fetches all files in the active branch of the repo, + the branch the bot uses to make changes. + + Returns: + str: A plaintext list containing the the filepaths in the branch. + """ + files: List[str] = [] + try: + contents = self.github_repo_instance.get_contents( + "", ref=self.active_branch + ) + for content in contents: + if content.type == "dir": + files.extend(self.get_files_from_directory(content.path)) + else: + files.append(content.path) + + if files: + files_str = "\n".join(files) + return ( + f"Found {len(files)} files in branch `{self.active_branch}`:\n" + f"{files_str}" + ) + else: + return f"No files found in branch: `{self.active_branch}`" + except Exception as e: + return f"Error: {e}" + + def get_files_from_directory(self, directory_path: str) -> str: + """ + Recursively fetches files from a directory in the repo. + + Parameters: + directory_path (str): Path to the directory + + Returns: + str: List of file paths, or an error message. + """ + from github import GithubException + + files: List[str] = [] + try: + contents = self.github_repo_instance.get_contents( + directory_path, ref=self.active_branch + ) + except GithubException as e: + return f"Error: status code {e.status}, {e.message}" + + for content in contents: + if content.type == "dir": + files.extend(self.get_files_from_directory(content.path)) + else: + files.append(content.path) + return str(files) + def get_issue(self, issue_number: int) -> Dict[str, Any]: """ Fetches a specific issue and its first 10 comments Parameters: issue_number(int): The number for the github issue Returns: - dict: A doctionary containing the issue's title, - body, and comments as a string + dict: A dictionary containing the issue's title, + body, comments as a string, and the username of the user + who opened the issue """ issue = self.github_repo_instance.get_issue(number=issue_number) page = 0 @@ -133,12 +362,142 @@ def get_issue(self, issue_number: int) -> Dict[str, Any]: comments.append({"body": comment.body, "user": comment.user.login}) page += 1 + opened_by = None + if issue.user and issue.user.login: + opened_by = issue.user.login + return { + "number": issue_number, "title": issue.title, "body": issue.body, "comments": str(comments), + "opened_by": str(opened_by), } + def list_pull_request_files(self, pr_number: int) -> List[Dict[str, Any]]: + """Fetches the full text of all files in a PR. Truncates after first 3k tokens. + # TODO: Enhancement to summarize files with ctags if they're getting long. + + Args: + pr_number(int): The number of the pull request on Github + + Returns: + dict: A dictionary containing the issue's title, + body, and comments as a string + """ + MAX_TOKENS_FOR_FILES = 3_000 + pr_files = [] + pr = self.github_repo_instance.get_pull(number=int(pr_number)) + total_tokens = 0 + page = 0 + while True: # or while (total_tokens + tiktoken()) < MAX_TOKENS_FOR_FILES: + files_page = pr.get_files().get_page(page) + if len(files_page) == 0: + break + for file in files_page: + try: + file_metadata_response = requests.get(file.contents_url) + if file_metadata_response.status_code == 200: + download_url = json.loads(file_metadata_response.text)[ + "download_url" + ] + else: + print(f"Failed to download file: {file.contents_url}, skipping") + continue + + file_content_response = requests.get(download_url) + if file_content_response.status_code == 200: + # Save the content as a UTF-8 string + file_content = file_content_response.text + else: + print( + "Failed downloading file content " + f"(Error {file_content_response.status_code}). Skipping" + ) + continue + + file_tokens = len( + tiktoken.get_encoding("cl100k_base").encode( + file_content + file.filename + "file_name file_contents" + ) + ) + if (total_tokens + file_tokens) < MAX_TOKENS_FOR_FILES: + pr_files.append( + { + "filename": file.filename, + "contents": file_content, + "additions": file.additions, + "deletions": file.deletions, + } + ) + total_tokens += file_tokens + except Exception as e: + print(f"Error when reading files from a PR on github. {e}") + page += 1 + return pr_files + + def get_pull_request(self, pr_number: int) -> Dict[str, Any]: + """ + Fetches a specific pull request and its first 10 comments, + limited by max_tokens. + + Parameters: + pr_number(int): The number for the Github pull + max_tokens(int): The maximum number of tokens in the response + Returns: + dict: A dictionary containing the pull's title, body, + and comments as a string + """ + max_tokens = 2_000 + pull = self.github_repo_instance.get_pull(number=pr_number) + total_tokens = 0 + + def get_tokens(text: str) -> int: + return len(tiktoken.get_encoding("cl100k_base").encode(text)) + + def add_to_dict(data_dict: Dict[str, Any], key: str, value: str) -> None: + nonlocal total_tokens # Declare total_tokens as nonlocal + tokens = get_tokens(value) + if total_tokens + tokens <= max_tokens: + data_dict[key] = value + total_tokens += tokens # Now this will modify the outer variable + + response_dict: Dict[str, str] = {} + add_to_dict(response_dict, "title", pull.title) + add_to_dict(response_dict, "number", str(pr_number)) + add_to_dict(response_dict, "body", pull.body) + + comments: List[str] = [] + page = 0 + while len(comments) <= 10: + comments_page = pull.get_issue_comments().get_page(page) + if len(comments_page) == 0: + break + for comment in comments_page: + comment_str = str({"body": comment.body, "user": comment.user.login}) + if total_tokens + get_tokens(comment_str) > max_tokens: + break + comments.append(comment_str) + total_tokens += get_tokens(comment_str) + page += 1 + add_to_dict(response_dict, "comments", str(comments)) + + commits: List[str] = [] + page = 0 + while len(commits) <= 10: + commits_page = pull.get_commits().get_page(page) + if len(commits_page) == 0: + break + for commit in commits_page: + commit_str = str({"message": commit.commit.message}) + if total_tokens + get_tokens(commit_str) > max_tokens: + break + commits.append(commit_str) + total_tokens += get_tokens(commit_str) + page += 1 + add_to_dict(response_dict, "commits", str(commits)) + return response_dict + def create_pull_request(self, pr_query: str) -> str: """ Makes a pull request from the bot's branch to the base branch @@ -150,9 +509,9 @@ def create_pull_request(self, pr_query: str) -> str: Returns: str: A success or failure message """ - if self.github_base_branch == self.github_branch: + if self.github_base_branch == self.active_branch: return """Cannot make a pull request because - commits are already in the master branch""" + commits are already in the main or master branch.""" else: try: title = pr_query.split("\n")[0] @@ -160,7 +519,7 @@ def create_pull_request(self, pr_query: str) -> str: pr = self.github_repo_instance.create_pull( title=title, body=body, - head=self.github_branch, + head=self.active_branch, base=self.github_base_branch, ) return f"Successfully created PR number {pr.number}" @@ -198,33 +557,60 @@ def create_file(self, file_query: str) -> str: Returns: str: A success or failure message """ + if self.active_branch == self.github_base_branch: + return ( + "You're attempting to commit to the directly to the" + f"{self.github_base_branch} branch, which is protected. " + "Please create a new branch and try again." + ) + file_path = file_query.split("\n")[0] file_contents = file_query[len(file_path) + 2 :] + try: - exists = self.github_repo_instance.get_contents(file_path) - if exists is None: - self.github_repo_instance.create_file( - path=file_path, - message="Create " + file_path, - content=file_contents, - branch=self.github_branch, + try: + file = self.github_repo_instance.get_contents( + file_path, ref=self.active_branch ) - return "Created file " + file_path - else: - return f"File already exists at {file_path}. Use update_file instead" + if file: + return ( + f"File already exists at `{file_path}` " + f"on branch `{self.active_branch}`. You must use " + "`update_file` to modify it." + ) + except Exception: + # expected behavior, file shouldn't exist yet + pass + + self.github_repo_instance.create_file( + path=file_path, + message="Create " + file_path, + content=file_contents, + branch=self.active_branch, + ) + return "Created file " + file_path except Exception as e: return "Unable to make file due to error:\n" + str(e) def read_file(self, file_path: str) -> str: """ - Reads a file from the github repo + Read a file from this agent's branch, defined by self.active_branch, + which supports PR branches. Parameters: file_path(str): the file path Returns: - str: The file decoded as a string + str: The file decoded as a string, or an error message if not found """ - file = self.github_repo_instance.get_contents(file_path) - return file.decoded_content.decode("utf-8") + try: + file = self.github_repo_instance.get_contents( + file_path, ref=self.active_branch + ) + return file.decoded_content.decode("utf-8") + except Exception as e: + return ( + f"File not found `{file_path}` on branch" + f"`{self.active_branch}`. Error: {str(e)}" + ) def update_file(self, file_query: str) -> str: """ @@ -244,8 +630,14 @@ def update_file(self, file_query: str) -> str: Returns: A success or failure message """ + if self.active_branch == self.github_base_branch: + return ( + "You're attempting to commit to the directly" + f"to the {self.github_base_branch} branch, which is protected. " + "Please create a new branch and try again." + ) try: - file_path = file_query.split("\n")[0] + file_path: str = file_query.split("\n")[0] old_file_contents = ( file_query.split("OLD <<<<")[1].split(">>>> OLD")[0].strip() ) @@ -267,12 +659,14 @@ def update_file(self, file_query: str) -> str: self.github_repo_instance.update_file( path=file_path, - message="Update " + file_path, + message="Update " + str(file_path), content=updated_file_content, - branch=self.github_branch, - sha=self.github_repo_instance.get_contents(file_path).sha, + branch=self.active_branch, + sha=self.github_repo_instance.get_contents( + file_path, ref=self.active_branch + ).sha, ) - return "Updated file " + file_path + return "Updated file " + str(file_path) except Exception as e: return "Unable to update file due to error:\n" + str(e) @@ -284,23 +678,119 @@ def delete_file(self, file_path: str) -> str: Returns: str: Success or failure message """ + if self.active_branch == self.github_base_branch: + return ( + "You're attempting to commit to the directly" + f"to the {self.github_base_branch} branch, which is protected. " + "Please create a new branch and try again." + ) try: - file = self.github_repo_instance.get_contents(file_path) self.github_repo_instance.delete_file( path=file_path, message="Delete " + file_path, - branch=self.github_branch, - sha=file.sha, + branch=self.active_branch, + sha=self.github_repo_instance.get_contents( + file_path, ref=self.active_branch + ).sha, ) return "Deleted file " + file_path except Exception as e: return "Unable to delete file due to error:\n" + str(e) + def search_issues_and_prs(self, query: str) -> str: + """ + Searches issues and pull requests in the repository. + + Parameters: + query(str): The search query + + Returns: + str: A string containing the first 5 issues and pull requests + """ + search_result = self.github.search_issues(query, repo=self.github_repository) + max_items = min(5, len(search_result)) + results = [f"Top {max_items} results:"] + for issue in search_result[:max_items]: + results.append( + f"Title: {issue.title}, Number: {issue.number}, State: {issue.state}" + ) + return "\n".join(results) + + def search_code(self, query: str) -> str: + """ + Searches code in the repository. + # Todo: limit total tokens returned... + + Parameters: + query(str): The search query + + Returns: + str: A string containing, at most, the top 5 search results + """ + search_result = self.github.search_code( + query=query, repo=self.github_repository + ) + if search_result.totalCount == 0: + return "0 results found." + max_results = min(5, search_result.totalCount) + results = [f"Showing top {max_results} of {search_result.totalCount} results:"] + count = 0 + for code in search_result: + if count >= max_results: + break + # Get the file content using the PyGithub get_contents method + file_content = self.github_repo_instance.get_contents( + code.path, ref=self.active_branch + ).decoded_content.decode() + results.append( + f"Filepath: `{code.path}`\nFile contents: " + f"{file_content}\n" + ) + count += 1 + return "\n".join(results) + + def create_review_request(self, reviewer_username: str) -> str: + """ + Creates a review request on *THE* open pull request + that matches the current active_branch. + + Parameters: + reviewer_username(str): The username of the person who is being requested + + Returns: + str: A message confirming the creation of the review request + """ + pull_requests = self.github_repo_instance.get_pulls( + state="open", sort="created" + ) + # find PR against active_branch + pr = next( + (pr for pr in pull_requests if pr.head.ref == self.active_branch), None + ) + if pr is None: + return ( + "No open pull request found for the " + f"current branch `{self.active_branch}`" + ) + + try: + pr.create_review_request(reviewers=[reviewer_username]) + return ( + f"Review request created for user {reviewer_username} " + f"on PR #{pr.number}" + ) + except Exception as e: + return f"Failed to create a review request with error {e}" + def run(self, mode: str, query: str) -> str: - if mode == "get_issues": - return self.get_issues() - elif mode == "get_issue": + if mode == "get_issue": return json.dumps(self.get_issue(int(query))) + elif mode == "get_pull_request": + return json.dumps(self.get_pull_request(int(query))) + elif mode == "list_pull_request_files": + return json.dumps(self.list_pull_request_files(int(query))) + elif mode == "get_issues": + return self.get_issues() elif mode == "comment_on_issue": return self.comment_on_issue(query) elif mode == "create_file": @@ -313,5 +803,25 @@ def run(self, mode: str, query: str) -> str: return self.update_file(query) elif mode == "delete_file": return self.delete_file(query) + elif mode == "list_open_pull_requests": + return self.list_open_pull_requests() + elif mode == "list_files_in_main_branch": + return self.list_files_in_main_branch() + elif mode == "list_files_in_bot_branch": + return self.list_files_in_bot_branch() + elif mode == "list_branches_in_repo": + return self.list_branches_in_repo() + elif mode == "set_active_branch": + return self.set_active_branch(query) + elif mode == "create_branch": + return self.create_branch(query) + elif mode == "get_files_from_directory": + return self.get_files_from_directory(query) + elif mode == "search_issues_and_prs": + return self.search_issues_and_prs(query) + elif mode == "search_code": + return self.search_code(query) + elif mode == "create_review_request": + return self.create_review_request(query) else: raise ValueError("Invalid mode" + mode) From f26d88ca60772374ee8355d9128a03b4839f92ab Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 4 Dec 2023 16:03:09 -0800 Subject: [PATCH 25/49] docs[patch]: fix columns (#14251) --- docs/docs/expression_language/why.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/expression_language/why.ipynb b/docs/docs/expression_language/why.ipynb index a1e17a1cbddae..2b8c85e6b0117 100644 --- a/docs/docs/expression_language/why.ipynb +++ b/docs/docs/expression_language/why.ipynb @@ -10,7 +10,7 @@ "title: Why use LCEL\n", "---\n", "\n", - "{ import { ColumnContainer, Column } from \"@theme/Columns\"; }" + "import { ColumnContainer, Column } from \\\"@theme/Columns\\\";" ] }, { From 9f9cb71d26f297fa84a364b8f19f4afba807d204 Mon Sep 17 00:00:00 2001 From: Colin Ulin <47982430+pocketcolin@users.noreply.github.com> Date: Mon, 4 Dec 2023 19:21:35 -0500 Subject: [PATCH 26/49] Embaas - added backoff retries for network requests (#13679) Running a large number of requests to Embaas' servers (or any server) can result in intermittent network failures (both from local and external network/service issues). This PR implements exponential backoff retries to help mitigate this issue. --- libs/langchain/langchain/embeddings/embaas.py | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/libs/langchain/langchain/embeddings/embaas.py b/libs/langchain/langchain/embeddings/embaas.py index df7696467e776..4d8e682ef3bfa 100644 --- a/libs/langchain/langchain/embeddings/embaas.py +++ b/libs/langchain/langchain/embeddings/embaas.py @@ -1,10 +1,11 @@ from typing import Any, Dict, List, Mapping, Optional import requests -from langchain_core.embeddings import Embeddings -from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator +from requests.adapters import HTTPAdapter, Retry from typing_extensions import NotRequired, TypedDict +from langchain.pydantic_v1 import BaseModel, Extra, root_validator +from langchain.schema.embeddings import Embeddings from langchain.utils import get_from_dict_or_env # Currently supported maximum batch size for embedding requests @@ -51,6 +52,10 @@ class EmbaasEmbeddings(BaseModel, Embeddings): api_url: str = EMBAAS_API_URL """The URL for the embaas embeddings API.""" embaas_api_key: Optional[str] = None + """max number of retries for requests""" + max_retries: Optional[int] = 3 + """request timeout in seconds""" + timeout: Optional[int] = 30 class Config: """Configuration for this pydantic object.""" @@ -85,8 +90,22 @@ def _handle_request(self, payload: EmbaasEmbeddingsPayload) -> List[List[float]] "Content-Type": "application/json", } - response = requests.post(self.api_url, headers=headers, json=payload) - response.raise_for_status() + session = requests.Session() + retries = Retry( + total=self.max_retries, + backoff_factor=0.5, + allowed_methods=["POST"], + raise_on_status=True, + ) + + session.mount("http://", HTTPAdapter(max_retries=retries)) + session.mount("https://", HTTPAdapter(max_retries=retries)) + response = session.post( + self.api_url, + headers=headers, + json=payload, + timeout=self.timeout, + ) parsed_response = response.json() embeddings = [item["embedding"] for item in parsed_response["data"]] From 224aa5151d03c8c9a3ca5fd4ecb62292ec7c87d1 Mon Sep 17 00:00:00 2001 From: Ravidhu Date: Tue, 5 Dec 2023 01:28:29 +0100 Subject: [PATCH 27/49] Fix Sagemaker Endpoint documentation (#13660) - **Description:** fixed the transform_input method in the example., - **Issue:** example didn't work, - **Dependencies:** None, - **Tag maintainer:** @baskaryan, - **Twitter handle:** @Ravidhu87 --- docs/docs/integrations/llms/sagemaker.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/integrations/llms/sagemaker.ipynb b/docs/docs/integrations/llms/sagemaker.ipynb index f10e14d72ef1e..101f9d81ab0ec 100644 --- a/docs/docs/integrations/llms/sagemaker.ipynb +++ b/docs/docs/integrations/llms/sagemaker.ipynb @@ -141,7 +141,7 @@ " accepts = \"application/json\"\n", "\n", " def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:\n", - " input_str = json.dumps({prompt: prompt, **model_kwargs})\n", + " input_str = json.dumps({\"inputs\": prompt, \"parameters\": model_kwargs})\n", " return input_str.encode(\"utf-8\")\n", "\n", " def transform_output(self, output: bytes) -> str:\n", @@ -197,7 +197,7 @@ " accepts = \"application/json\"\n", "\n", " def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:\n", - " input_str = json.dumps({prompt: prompt, **model_kwargs})\n", + " input_str = json.dumps({\"inputs\": prompt, \"parameters\": model_kwargs})\n", " return input_str.encode(\"utf-8\")\n", "\n", " def transform_output(self, output: bytes) -> str:\n", From 921c4b5597092f28ae23133e1f323fd89fdd8b41 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 4 Dec 2023 16:34:15 -0800 Subject: [PATCH 28/49] Harrison/searchapi (#14252) Co-authored-by: SebastjanPrachovskij <86522260+SebastjanPrachovskij@users.noreply.github.com> --- docs/docs/integrations/platforms/google.mdx | 14 ++++++++++++-- .../modules/agents/how_to/async_agent.ipynb | 2 +- .../agents/self_ask_with_search/base.py | 5 ++++- libs/langchain/langchain/tools/__init__.py | 18 ++++++++++++++++++ .../chains/test_self_ask_with_search.py | 6 +++--- .../tests/unit_tests/test_dependencies.py | 5 ++++- .../tests/unit_tests/tools/test_imports.py | 2 ++ .../tests/unit_tests/tools/test_public_api.py | 2 ++ 8 files changed, 46 insertions(+), 8 deletions(-) diff --git a/docs/docs/integrations/platforms/google.mdx b/docs/docs/integrations/platforms/google.mdx index ec0d3003a511e..a1a7b5673ca78 100644 --- a/docs/docs/integrations/platforms/google.mdx +++ b/docs/docs/integrations/platforms/google.mdx @@ -393,14 +393,24 @@ from langchain.chat_loaders.gmail import GMailLoader ## 3rd Party Integrations +### SearchApi + +>[SearchApi](https://www.searchapi.io/) provides a 3rd-party API to access Google search results, YouTube search & transcripts, and other Google-related engines. + +See [usage examples and authorization instructions](/docs/integrations/tools/searchapi). + +```python +from langchain.utilities import SearchApiAPIWrapper +``` + ### SerpAPI >[SerpApi](https://serpapi.com/) provides a 3rd-party API to access Google search results. -See a [usage example and authorization instructions](/docs/integrations/tools/google_serper). +See a [usage example and authorization instructions](/docs/integrations/tools/serpapi). ```python -from langchain.utilities import GoogleSerperAPIWrapper +from langchain.utilities import SerpAPIWrapper ``` ### YouTube diff --git a/docs/docs/modules/agents/how_to/async_agent.ipynb b/docs/docs/modules/agents/how_to/async_agent.ipynb index e233372f0ee14..716c4e875179c 100644 --- a/docs/docs/modules/agents/how_to/async_agent.ipynb +++ b/docs/docs/modules/agents/how_to/async_agent.ipynb @@ -9,7 +9,7 @@ "\n", "LangChain provides async support for Agents by leveraging the [asyncio](https://docs.python.org/3/library/asyncio.html) library.\n", "\n", - "Async methods are currently supported for the following `Tool`s: [`GoogleSerperAPIWrapper`](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/utilities/google_serper.py), [`SerpAPIWrapper`](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/utilities/serpapi.py), [`LLMMathChain`](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/chains/llm_math/base.py) and [`Qdrant`](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/vectorstores/qdrant.py). Async support for other agent tools are on the roadmap.\n", + "Async methods are currently supported for the following `Tool`s: [`SearchApiAPIWrapper`](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/utilities/searchapi.py), [`GoogleSerperAPIWrapper`](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/utilities/google_serper.py), [`SerpAPIWrapper`](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/utilities/serpapi.py), [`LLMMathChain`](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/chains/llm_math/base.py) and [`Qdrant`](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/vectorstores/qdrant.py). Async support for other agent tools are on the roadmap.\n", "\n", "For `Tool`s that have a `coroutine` implemented (the four mentioned above), the `AgentExecutor` will `await` them directly. Otherwise, the `AgentExecutor` will call the `Tool`'s `func` via `asyncio.get_event_loop().run_in_executor` to avoid blocking the main runloop.\n", "\n", diff --git a/libs/langchain/langchain/agents/self_ask_with_search/base.py b/libs/langchain/langchain/agents/self_ask_with_search/base.py index 5a4109a7cf37b..c95e1dd3a28a1 100644 --- a/libs/langchain/langchain/agents/self_ask_with_search/base.py +++ b/libs/langchain/langchain/agents/self_ask_with_search/base.py @@ -13,6 +13,7 @@ from langchain.agents.utils import validate_tools_single_input from langchain.tools.base import BaseTool from langchain.utilities.google_serper import GoogleSerperAPIWrapper +from langchain.utilities.searchapi import SearchApiAPIWrapper from langchain.utilities.serpapi import SerpAPIWrapper @@ -64,7 +65,9 @@ class SelfAskWithSearchChain(AgentExecutor): def __init__( self, llm: BaseLanguageModel, - search_chain: Union[GoogleSerperAPIWrapper, SerpAPIWrapper], + search_chain: Union[ + GoogleSerperAPIWrapper, SearchApiAPIWrapper, SerpAPIWrapper + ], **kwargs: Any, ): """Initialize only with an LLM and a search chain.""" diff --git a/libs/langchain/langchain/tools/__init__.py b/libs/langchain/langchain/tools/__init__.py index cc8c37ee6a632..7cc103753248c 100644 --- a/libs/langchain/langchain/tools/__init__.py +++ b/libs/langchain/langchain/tools/__init__.py @@ -284,6 +284,18 @@ def _import_google_serper_tool_GoogleSerperRun() -> Any: return GoogleSerperRun +def _import_searchapi_tool_SearchAPIResults() -> Any: + from langchain.tools.searchapi.tool import SearchAPIResults + + return SearchAPIResults + + +def _import_searchapi_tool_SearchAPIRun() -> Any: + from langchain.tools.searchapi.tool import SearchAPIRun + + return SearchAPIRun + + def _import_graphql_tool() -> Any: from langchain.tools.graphql.tool import BaseGraphQLTool @@ -819,6 +831,10 @@ def __getattr__(name: str) -> Any: return _import_google_serper_tool_GoogleSerperResults() elif name == "GoogleSerperRun": return _import_google_serper_tool_GoogleSerperRun() + elif name == "SearchAPIResults": + return _import_searchapi_tool_SearchAPIResults() + elif name == "SearchAPIRun": + return _import_searchapi_tool_SearchAPIRun() elif name == "BaseGraphQLTool": return _import_graphql_tool() elif name == "HumanInputRun": @@ -1023,6 +1039,8 @@ def __getattr__(name: str) -> Any: "GoogleSearchRun", "GoogleSerperResults", "GoogleSerperRun", + "SearchAPIResults", + "SearchAPIRun", "HumanInputRun", "IFTTTWebhook", "InfoPowerBITool", diff --git a/libs/langchain/tests/integration_tests/chains/test_self_ask_with_search.py b/libs/langchain/tests/integration_tests/chains/test_self_ask_with_search.py index 61ef78d9228d2..f288f23dadf72 100644 --- a/libs/langchain/tests/integration_tests/chains/test_self_ask_with_search.py +++ b/libs/langchain/tests/integration_tests/chains/test_self_ask_with_search.py @@ -1,7 +1,7 @@ """Integration test for self ask with search.""" from langchain.agents.self_ask_with_search.base import SelfAskWithSearchChain from langchain.llms.openai import OpenAI -from langchain.utilities.google_serper import GoogleSerperAPIWrapper +from langchain.utilities.searchapi import SearchApiAPIWrapper def test_self_ask_with_search() -> None: @@ -9,10 +9,10 @@ def test_self_ask_with_search() -> None: question = "What is the hometown of the reigning men's U.S. Open champion?" chain = SelfAskWithSearchChain( llm=OpenAI(temperature=0), - search_chain=GoogleSerperAPIWrapper(), + search_chain=SearchApiAPIWrapper(), input_key="q", output_key="a", ) answer = chain.run(question) final_answer = answer.split("\n")[-1] - assert final_answer == "El Palmar, Spain" + assert final_answer == "Belgrade, Serbia" diff --git a/libs/langchain/tests/unit_tests/test_dependencies.py b/libs/langchain/tests/unit_tests/test_dependencies.py index 2d326dcbbc082..a5297dd569f5a 100644 --- a/libs/langchain/tests/unit_tests/test_dependencies.py +++ b/libs/langchain/tests/unit_tests/test_dependencies.py @@ -104,5 +104,8 @@ def test_imports() -> None: from langchain.llms import OpenAI # noqa: F401 from langchain.retrievers import VespaRetriever # noqa: F401 from langchain.tools import DuckDuckGoSearchResults # noqa: F401 - from langchain.utilities import SerpAPIWrapper # noqa: F401 + from langchain.utilities import ( + SearchApiAPIWrapper, # noqa: F401 + SerpAPIWrapper, # noqa: F401 + ) from langchain.vectorstores import FAISS # noqa: F401 diff --git a/libs/langchain/tests/unit_tests/tools/test_imports.py b/libs/langchain/tests/unit_tests/tools/test_imports.py index dbd9bd48555ff..a0960a7d2f551 100644 --- a/libs/langchain/tests/unit_tests/tools/test_imports.py +++ b/libs/langchain/tests/unit_tests/tools/test_imports.py @@ -92,6 +92,8 @@ "RequestsPostTool", "RequestsPutTool", "SceneXplainTool", + "SearchAPIRun", + "SearchAPIResults", "SearxSearchResults", "SearxSearchRun", "ShellTool", diff --git a/libs/langchain/tests/unit_tests/tools/test_public_api.py b/libs/langchain/tests/unit_tests/tools/test_public_api.py index 4d7cf8f4d0a86..87ebceaae32f3 100644 --- a/libs/langchain/tests/unit_tests/tools/test_public_api.py +++ b/libs/langchain/tests/unit_tests/tools/test_public_api.py @@ -94,6 +94,8 @@ "RequestsPostTool", "RequestsPutTool", "SceneXplainTool", + "SearchAPIResults", + "SearchAPIRun", "SearxSearchResults", "SearxSearchRun", "ShellTool", From 676a077c4e7b1cfa0a3ddeee40cc10ff86d57f57 Mon Sep 17 00:00:00 2001 From: Aliaksandr Kuzmik <98702584+alexkuzmik@users.noreply.github.com> Date: Tue, 5 Dec 2023 01:46:48 +0100 Subject: [PATCH 29/49] Add CometTracer (#13661) Hi! I'm Alex, Python SDK Team Lead from [Comet](https://www.comet.com/site/). This PR contains our new integration between langchain and Comet - `CometTracer` class which uses new `comet_llm` python package for submitting data to Comet. No additional dependencies for the langchain package are required directly, but if the user wants to use `CometTracer`, `comet-llm>=2.0.0` should be installed. Otherwise an exception will be raised from `CometTracer.__init__`. A test for the feature is included. There is also an already existing callback (and .ipynb file with example) which ideally should be deprecated in favor of a new tracer. I wasn't sure how exactly you'd prefer to do it. For example we could open a separate PR for that. I'm open to your ideas :) --- .../langchain/callbacks/tracers/comet.py | 138 ++++++++++++++++++ .../callbacks/tracers/test_comet.py | 97 ++++++++++++ 2 files changed, 235 insertions(+) create mode 100644 libs/langchain/langchain/callbacks/tracers/comet.py create mode 100644 libs/langchain/tests/unit_tests/callbacks/tracers/test_comet.py diff --git a/libs/langchain/langchain/callbacks/tracers/comet.py b/libs/langchain/langchain/callbacks/tracers/comet.py new file mode 100644 index 0000000000000..bfe7bb44342ce --- /dev/null +++ b/libs/langchain/langchain/callbacks/tracers/comet.py @@ -0,0 +1,138 @@ +from types import ModuleType, SimpleNamespace +from typing import TYPE_CHECKING, Any, Callable, Dict + +from langchain.callbacks.tracers.base import BaseTracer + +if TYPE_CHECKING: + from uuid import UUID + + from comet_llm import Span + from comet_llm.chains.chain import Chain + + from langchain.callbacks.tracers.schemas import Run + + +def _get_run_type(run: "Run") -> str: + if isinstance(run.run_type, str): + return run.run_type + elif hasattr(run.run_type, "value"): + return run.run_type.value + else: + return str(run.run_type) + + +def import_comet_llm_api() -> SimpleNamespace: + """Import comet_llm api and raise an error if it is not installed.""" + try: + from comet_llm import ( + experiment_info, # noqa: F401 + flush, # noqa: F401 + ) + from comet_llm.chains import api as chain_api # noqa: F401 + from comet_llm.chains import ( + chain, # noqa: F401 + span, # noqa: F401 + ) + + except ImportError: + raise ImportError( + "To use the CometTracer you need to have the " + "`comet_llm>=2.0.0` python package installed. Please install it with" + " `pip install -U comet_llm`" + ) + return SimpleNamespace( + chain=chain, + span=span, + chain_api=chain_api, + experiment_info=experiment_info, + flush=flush, + ) + + +class CometTracer(BaseTracer): + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + self._span_map: Dict["UUID", "Span"] = {} + self._chains_map: Dict["UUID", "Chain"] = {} + self._initialize_comet_modules() + + def _initialize_comet_modules(self) -> None: + comet_llm_api = import_comet_llm_api() + self._chain: ModuleType = comet_llm_api.chain + self._span: ModuleType = comet_llm_api.span + self._chain_api: ModuleType = comet_llm_api.chain_api + self._experiment_info: ModuleType = comet_llm_api.experiment_info + self._flush: Callable[[], None] = comet_llm_api.flush + + def _persist_run(self, run: "Run") -> None: + chain_ = self._chains_map[run.id] + chain_.set_outputs(outputs=run.outputs) + self._chain_api.log_chain(chain_) + + def _process_start_trace(self, run: "Run") -> None: + if not run.parent_run_id: + # This is the first run, which maps to a chain + chain_: "Chain" = self._chain.Chain( + inputs=run.inputs, + metadata=None, + experiment_info=self._experiment_info.get(), + ) + self._chains_map[run.id] = chain_ + else: + span: "Span" = self._span.Span( + inputs=run.inputs, + category=_get_run_type(run), + metadata=run.extra, + name=run.name, + ) + span.__api__start__(self._chains_map[run.parent_run_id]) + self._chains_map[run.id] = self._chains_map[run.parent_run_id] + self._span_map[run.id] = span + + def _process_end_trace(self, run: "Run") -> None: + if not run.parent_run_id: + pass + # Langchain will call _persist_run for us + else: + span = self._span_map[run.id] + span.set_outputs(outputs=run.outputs) + span.__api__end__() + + def flush(self) -> None: + self._flush() + + def _on_llm_start(self, run: "Run") -> None: + """Process the LLM Run upon start.""" + self._process_start_trace(run) + + def _on_llm_end(self, run: "Run") -> None: + """Process the LLM Run.""" + self._process_end_trace(run) + + def _on_llm_error(self, run: "Run") -> None: + """Process the LLM Run upon error.""" + self._process_end_trace(run) + + def _on_chain_start(self, run: "Run") -> None: + """Process the Chain Run upon start.""" + self._process_start_trace(run) + + def _on_chain_end(self, run: "Run") -> None: + """Process the Chain Run.""" + self._process_end_trace(run) + + def _on_chain_error(self, run: "Run") -> None: + """Process the Chain Run upon error.""" + self._process_end_trace(run) + + def _on_tool_start(self, run: "Run") -> None: + """Process the Tool Run upon start.""" + self._process_start_trace(run) + + def _on_tool_end(self, run: "Run") -> None: + """Process the Tool Run.""" + self._process_end_trace(run) + + def _on_tool_error(self, run: "Run") -> None: + """Process the Tool Run upon error.""" + self._process_end_trace(run) diff --git a/libs/langchain/tests/unit_tests/callbacks/tracers/test_comet.py b/libs/langchain/tests/unit_tests/callbacks/tracers/test_comet.py new file mode 100644 index 0000000000000..537bc64e4554d --- /dev/null +++ b/libs/langchain/tests/unit_tests/callbacks/tracers/test_comet.py @@ -0,0 +1,97 @@ +import uuid +from types import SimpleNamespace +from unittest import mock + +from langchain.callbacks.tracers import comet +from langchain.schema.output import LLMResult + + +def test_comet_tracer__trace_chain_with_single_span__happyflow() -> None: + # Setup mocks + chain_module_mock = mock.Mock() + chain_instance_mock = mock.Mock() + chain_module_mock.Chain.return_value = chain_instance_mock + + span_module_mock = mock.Mock() + span_instance_mock = mock.MagicMock() + span_instance_mock.__api__start__ = mock.Mock() + span_instance_mock.__api__end__ = mock.Mock() + + span_module_mock.Span.return_value = span_instance_mock + + experiment_info_module_mock = mock.Mock() + experiment_info_module_mock.get.return_value = "the-experiment-info" + + chain_api_module_mock = mock.Mock() + + comet_ml_api_mock = SimpleNamespace( + chain=chain_module_mock, + span=span_module_mock, + experiment_info=experiment_info_module_mock, + chain_api=chain_api_module_mock, + flush="not-used-in-this-test", + ) + + # Create tracer + with mock.patch.object( + comet, "import_comet_llm_api", return_value=comet_ml_api_mock + ): + tracer = comet.CometTracer() + + run_id_1 = uuid.UUID("9d878ab3-e5ca-4218-aef6-44cbdc90160a") + run_id_2 = uuid.UUID("4f31216e-7c26-4027-a5fd-0bbf9ace17dc") + + # Parent run + tracer.on_chain_start( + {"name": "chain-input"}, + ["chain-input-prompt"], + parent_run_id=None, + run_id=run_id_1, + ) + + # Check that chain was created + chain_module_mock.Chain.assert_called_once_with( + inputs={"input": ["chain-input-prompt"]}, + metadata=None, + experiment_info="the-experiment-info", + ) + + # Child run + tracer.on_llm_start( + {"name": "span-input"}, + ["span-input-prompt"], + parent_run_id=run_id_1, + run_id=run_id_2, + ) + + # Check that Span was created and attached to chain + span_module_mock.Span.assert_called_once_with( + inputs={"prompts": ["span-input-prompt"]}, + category=mock.ANY, + metadata=mock.ANY, + name=mock.ANY, + ) + span_instance_mock.__api__start__(chain_instance_mock) + + # Child run end + tracer.on_llm_end( + LLMResult(generations=[], llm_output={"span-output-key": "span-output-value"}), + run_id=run_id_2, + ) + # Check that Span outputs are set and span is ended + span_instance_mock.set_outputs.assert_called_once() + actual_span_outputs = span_instance_mock.set_outputs.call_args[1]["outputs"] + assert { + "llm_output": {"span-output-key": "span-output-value"}, + "generations": [], + }.items() <= actual_span_outputs.items() + span_instance_mock.__api__end__() + + # Parent run end + tracer.on_chain_end({"chain-output-key": "chain-output-value"}, run_id=run_id_1) + + # Check that chain outputs are set and chain is logged + chain_instance_mock.set_outputs.assert_called_once() + actual_chain_outputs = chain_instance_mock.set_outputs.call_args[1]["outputs"] + assert ("chain-output-key", "chain-output-value") in actual_chain_outputs.items() + chain_api_module_mock.log_chain.assert_called_once_with(chain_instance_mock) From ee9abb6722c3a3438cb5313624f2bd7ba54a56e7 Mon Sep 17 00:00:00 2001 From: deedy5 <65482418+deedy5@users.noreply.github.com> Date: Tue, 5 Dec 2023 00:48:20 +0000 Subject: [PATCH 30/49] Bugfix duckduckgo_search news search (#13670) - **Description:** Bugfix duckduckgo_search news search - **Issue:** https://github.com/langchain-ai/langchain/issues/13648 - **Dependencies:** None - **Tag maintainer:** @baskaryan --------- Co-authored-by: Harrison Chase --- docs/docs/integrations/tools/ddg.ipynb | 52 ++++---- .../langchain/tools/ddg_search/tool.py | 6 +- .../langchain/utilities/duckduckgo_search.py | 117 ++++++++++-------- .../utilities/test_duckduckdgo_search_api.py | 13 +- 4 files changed, 111 insertions(+), 77 deletions(-) diff --git a/docs/docs/integrations/tools/ddg.ipynb b/docs/docs/integrations/tools/ddg.ipynb index 2f83586ff94c0..2d8daefcfa416 100644 --- a/docs/docs/integrations/tools/ddg.ipynb +++ b/docs/docs/integrations/tools/ddg.ipynb @@ -17,12 +17,12 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install duckduckgo-search" + "# !pip install -U duckduckgo-search" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 1, "id": "ac4910f8", "metadata": {}, "outputs": [], @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 2, "id": "84b8f773", "metadata": {}, "outputs": [], @@ -42,17 +42,17 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 3, "id": "068991a6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'August 4, 1961 (age 61) Honolulu Hawaii Title / Office: presidency of the United States of America (2009-2017), United States United States Senate (2005-2008), United States ... (Show more) Political Affiliation: Democratic Party Awards And Honors: Barack Hussein Obama II (/ b ə ˈ r ɑː k h uː ˈ s eɪ n oʊ ˈ b ɑː m ə / bə-RAHK hoo-SAYN oh-BAH-mə; born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president of the United States. Obama previously served as a U.S. senator representing Illinois ... Answer (1 of 12): I see others have answered President Obama\\'s name which is \"Barack Hussein Obama\". President Obama has received many comments about his name from the racists across US. It is worth noting that he never changed his name. Also, it is worth noting that a simple search would have re... What is Barack Obama\\'s full name? Updated: 11/11/2022 Wiki User ∙ 6y ago Study now See answer (1) Best Answer Copy His full, birth name is Barack Hussein Obama, II. He was named after his... Alex Oliveira July 24, 2023 4:57pm Updated 0 seconds of 43 secondsVolume 0% 00:00 00:43 The man who drowned while paddleboarding on a pond outside the Obamas\\' Martha\\'s Vineyard estate has been...'" + "\"Life After the Presidency How Tall is Obama? Books and Grammy Hobbies Movies About Obama Quotes 1961-present Who Is Barack Obama? Barack Obama was the 44 th president of the United States... facts you never knew about Barack Obama is that his immediate family spread out across three continents. Barack, who led America from 2009 to 2017, comes from a large family of seven living half-siblings. His father, Barack Obama Sr., met his mother, Ann Dunham, in 1960 and married her a year after. With a tear running from his eye, President Barack Obama recalls the 20 first-graders killed in 2012 at Sandy Hook Elementary School, while speaking in the East Room of the White House in ... Former first Lady Rosalynn Carter was laid to rest at her family's home in Plains, Ga. on Nov. 29 following three days of memorials across her home state. She passed away on Nov. 19, aged 96 ... Here are 28 of President Obama's biggest accomplishments as President of the United States. 1 - Rescued the country from the Great Recession, cutting the unemployment rate from 10% to 4.7% over ...\"" ] }, - "execution_count": 22, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -71,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 4, "id": "95635444", "metadata": {}, "outputs": [], @@ -81,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 5, "id": "0133d103", "metadata": {}, "outputs": [], @@ -91,17 +91,17 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 7, "id": "439efc06", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "\"[snippet: Barack Hussein Obama II (/ b ə ˈ r ɑː k h uː ˈ s eɪ n oʊ ˈ b ɑː m ə / bə-RAHK hoo-SAYN oh-BAH-mə; born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president of the United States. Obama previously served as a U.S. senator representing Illinois ..., title: Barack Obama - Wikipedia, link: https://en.wikipedia.org/wiki/Barack_Obama], [snippet: Barack Obama, in full Barack Hussein Obama II, (born August 4, 1961, Honolulu, Hawaii, U.S.), 44th president of the United States (2009-17) and the first African American to hold the office. Before winning the presidency, Obama represented Illinois in the U.S. Senate (2005-08). He was the third African American to be elected to that body ..., title: Barack Obama | Biography, Parents, Education, Presidency, Books ..., link: https://www.britannica.com/biography/Barack-Obama], [snippet: Barack Obama 's tenure as the 44th president of the United States began with his first inauguration on January 20, 2009, and ended on January 20, 2017. A Democrat from Illinois, Obama took office following a decisive victory over Republican nominee John McCain in the 2008 presidential election. Four years later, in the 2012 presidential ..., title: Presidency of Barack Obama - Wikipedia, link: https://en.wikipedia.org/wiki/Presidency_of_Barack_Obama], [snippet: First published on Mon 24 Jul 2023 20.03 EDT. Barack Obama's personal chef died while paddleboarding near the ex-president's home on Martha's Vineyard over the weekend, Massachusetts state ..., title: Obama's personal chef dies while paddleboarding off Martha's Vineyard ..., link: https://www.theguardian.com/us-news/2023/jul/24/tafari-campbell-barack-obama-chef-drowns-marthas-vineyard]\"" + "'[snippet: 1:12. Former President Barack Obama, in a CNN interview that aired Thursday night, said he does not believe President Joe Biden will face a serious primary challenge during his 2024 reelection ..., title: Five takeaways from Barack Obama\\'s CNN interview on Biden ... - Yahoo, link: https://www.usatoday.com/story/news/politics/2023/06/23/five-takeaways-from-barack-obama-cnn-interview/70349112007/], [snippet: Democratic institutions in the United States and around the world have grown \"creaky,\" former President Barack Obama warned in an exclusive CNN interview Thursday, and it remains incumbent on ..., title: Obama warns democratic institutions are \\'creaky\\' but Trump ... - CNN, link: https://www.cnn.com/2023/06/22/politics/barack-obama-interview-cnntv/index.html], [snippet: Barack Obama was the 44 th president of the United States and the first Black commander-in-chief. He served two terms, from 2009 until 2017. The son of parents from Kenya and Kansas, Obama was ..., title: Barack Obama: Biography, 44th U.S. President, Politician, link: https://www.biography.com/political-figures/barack-obama], [snippet: Aug. 2, 2023, 5:00 PM PDT. By Mike Memoli and Kristen Welker. WASHINGTON — During a trip to the White House in June, former President Barack Obama made it clear to his former running mate that ..., title: Obama privately told Biden he would do whatever it takes to help in 2024, link: https://www.nbcnews.com/politics/white-house/obama-privately-told-biden-whatever-takes-help-2024-rcna97865], [snippet: Natalie Bookey-Baker, a vice president at the Obama Foundation who worked for then-first lady Michelle Obama in the White House, said about 2,500 alumni are expected. They are veterans of Obama ..., title: Barack Obama team reunion this week in Chicago; 2,500 alumni expected ..., link: https://chicago.suntimes.com/politics/2023/10/29/23937504/barack-obama-michelle-obama-david-axelrod-pod-save-america-jon-batiste-jen-psaki-reunion-obamaworld]'" ] }, - "execution_count": 25, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 8, "id": "21afe28d", "metadata": {}, "outputs": [], @@ -130,17 +130,17 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 9, "id": "2a4beeb9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "\"[date: 2023-07-26T12:01:22, title: 'My heart is broken': Former Obama White House chef mourned following apparent drowning death in Edgartown, snippet: Tafari Campbell of Dumfries, Va., had been paddle boarding in Edgartown Great Pond when he appeared to briefly struggle, submerged, and did not return to the surface, authorities have said. Crews ultimately found the 45-year-old's body Monday morning., source: The Boston Globe on MSN.com, link: https://www.msn.com/en-us/news/us/my-heart-is-broken-former-obama-white-house-chef-mourned-following-apparent-drowning-death-in-edgartown/ar-AA1elNB8], [date: 2023-07-25T18:44:00, title: Obama's chef drowns paddleboarding near former president's Edgartown vacation home, snippet: Campbell was visiting Martha's Vineyard, where the Obamas own a vacation home. He was not wearing a lifejacket when he fell off his paddleboard., source: YAHOO!News, link: https://news.yahoo.com/obama-chef-drowns-paddleboarding-near-184437491.html], [date: 2023-07-26T00:30:00, title: Obama's personal chef dies while paddleboarding off Martha's Vineyard, snippet: Tafari Campbell, who worked at the White House during Obama's presidency, was visiting the island while the family was away, source: The Guardian, link: https://www.theguardian.com/us-news/2023/jul/24/tafari-campbell-barack-obama-chef-drowns-marthas-vineyard], [date: 2023-07-24T21:54:00, title: Obama's chef ID'd as paddleboarder who drowned near former president's Martha's Vineyard estate, snippet: Former President Barack Obama's personal chef, Tafari Campbell, has been identified as the paddle boarder who drowned near the Obamas' Martha's Vineyard estate., source: Fox News, link: https://www.foxnews.com/politics/obamas-chef-idd-paddleboarder-who-drowned-near-former-presidents-marthas-vineyard-estate]\"" + "'[snippet: 1:12. Former President Barack Obama, in a CNN interview that aired Thursday night, said he does not believe President Joe Biden will face a serious primary challenge during his 2024 reelection ..., title: Five takeaways from Barack Obama\\'s CNN interview on Biden ... - Yahoo, link: https://www.usatoday.com/story/news/politics/2023/06/23/five-takeaways-from-barack-obama-cnn-interview/70349112007/], [snippet: Democratic institutions in the United States and around the world have grown \"creaky,\" former President Barack Obama warned in an exclusive CNN interview Thursday, and it remains incumbent on ..., title: Obama warns democratic institutions are \\'creaky\\' but Trump ... - CNN, link: https://www.cnn.com/2023/06/22/politics/barack-obama-interview-cnntv/index.html], [snippet: Barack Obama was the 44 th president of the United States and the first Black commander-in-chief. He served two terms, from 2009 until 2017. The son of parents from Kenya and Kansas, Obama was ..., title: Barack Obama: Biography, 44th U.S. President, Politician, link: https://www.biography.com/political-figures/barack-obama], [snippet: Natalie Bookey-Baker, a vice president at the Obama Foundation who worked for then-first lady Michelle Obama in the White House, said about 2,500 alumni are expected. They are veterans of Obama ..., title: Barack Obama team reunion this week in Chicago; 2,500 alumni expected ..., link: https://chicago.suntimes.com/politics/2023/10/29/23937504/barack-obama-michelle-obama-david-axelrod-pod-save-america-jon-batiste-jen-psaki-reunion-obamaworld], [snippet: Aug. 2, 2023, 5:00 PM PDT. By Mike Memoli and Kristen Welker. WASHINGTON — During a trip to the White House in June, former President Barack Obama made it clear to his former running mate that ..., title: Obama privately told Biden he would do whatever it takes to help in 2024, link: https://www.nbcnews.com/politics/white-house/obama-privately-told-biden-whatever-takes-help-2024-rcna97865]'" ] }, - "execution_count": 27, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -159,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 10, "id": "c7ab3b55", "metadata": {}, "outputs": [], @@ -171,27 +171,27 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 11, "id": "adce16e1", "metadata": {}, "outputs": [], "source": [ - "search = DuckDuckGoSearchResults(api_wrapper=wrapper, backend=\"news\")" + "search = DuckDuckGoSearchResults(api_wrapper=wrapper, source=\"news\")" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 12, "id": "b7e77c54", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'[date: 2023-07-25T12:15:00, title: Barack + Michelle Obama: Sie trauern um Angestellten, snippet: Barack und Michelle Obama trauern um ihren ehemaligen Küchenchef Tafari Campbell. Der Familienvater verunglückte am vergangenen Sonntag und wurde in einem Teich geborgen., source: Gala, link: https://www.gala.de/stars/news/barack---michelle-obama--sie-trauern-um-angestellten-23871228.html], [date: 2023-07-25T10:30:00, title: Barack Obama: Sein Koch (†45) ist tot - diese Details sind bekannt, snippet: Tafari Campbell war früher im Weißen Haus eingestellt, arbeitete anschließend weiter für Ex-Präsident Barack Obama. Nun ist er gestorben. Diese Details sind bekannt., source: T-Online, link: https://www.t-online.de/unterhaltung/stars/id_100213226/barack-obama-sein-koch-45-ist-tot-diese-details-sind-bekannt.html], [date: 2023-07-25T05:33:23, title: Barack Obama: Sein Privatkoch ist bei einem tragischen Unfall gestorben, snippet: Barack Obama (61) und Michelle Obama (59) sind in tiefer Trauer. Ihr Privatkoch Tafari Campbell ist am Montag (24. Juli) ums Leben gekommen, er wurde nur 45 Jahre alt. Laut US-Polizei starb er bei ein, source: BUNTE.de, link: https://www.msn.com/de-de/unterhaltung/other/barack-obama-sein-privatkoch-ist-bei-einem-tragischen-unfall-gestorben/ar-AA1ejrAd], [date: 2023-07-25T02:25:00, title: Barack Obama: Privatkoch tot in See gefunden, snippet: Tafari Campbell kochte für Barack Obama im Weißen Haus - und auch privat nach dessen Abschied aus dem Präsidentenamt. Nun machte die Polizei in einem Gewässer eine traurige Entdeckung., source: SPIEGEL, link: https://www.spiegel.de/panorama/justiz/barack-obama-leibkoch-tot-in-see-gefunden-a-3cdf6377-bee0-43f1-a200-a285742f9ffc]'" + "'[snippet: When Obama left office in January 2017, a CNN poll showed him with a 60% approval rating, landing him near the top of the list of presidential approval ratings upon leaving office., title: Opinion: The real reason Trump is attacking Obamacare | CNN, link: https://www.cnn.com/2023/12/04/opinions/trump-obamacare-obama-repeal-health-care-obeidallah/index.html], [snippet: Buchempfehlung von Barack Obama. Der gut zweistündige Netflix-Film basiert auf dem gleichnamigen Roman \"Leave the World Behind\" des hochgelobten US-Autors Rumaan Alam. 2020 landete er damit unter den Finalisten des \"National Book Awards\". In Deutschland ist das Buch, das auch Barack Obama auf seiner einflussreichen Lese-Empfehlungsliste hatte ..., title: Neu bei Netflix \"Leave The World Behind\": Kritik zum ... - Prisma, link: https://www.prisma.de/news/filme/Neu-bei-Netflix-Leave-The-World-Behind-Kritik-zum-ungewoehnlichen-Endzeit-Film-mit-Julia-Roberts,46563944]'" ] }, - "execution_count": 30, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -199,6 +199,14 @@ "source": [ "search.run(\"Obama\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b133e3c1", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -217,7 +225,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.10.1" }, "vscode": { "interpreter": { diff --git a/libs/langchain/langchain/tools/ddg_search/tool.py b/libs/langchain/langchain/tools/ddg_search/tool.py index 93b5b8d08dc96..32ac8feae466a 100644 --- a/libs/langchain/langchain/tools/ddg_search/tool.py +++ b/libs/langchain/langchain/tools/ddg_search/tool.py @@ -46,11 +46,11 @@ class DuckDuckGoSearchResults(BaseTool): "Useful for when you need to answer questions about current events. " "Input should be a search query. Output is a JSON array of the query results" ) - num_results: int = 4 + max_results: int = Field(alias="num_results", default=4) api_wrapper: DuckDuckGoSearchAPIWrapper = Field( default_factory=DuckDuckGoSearchAPIWrapper ) - backend: str = "api" + backend: str = "text" args_schema: Type[BaseModel] = DDGInput def _run( @@ -59,7 +59,7 @@ def _run( run_manager: Optional[CallbackManagerForToolRun] = None, ) -> str: """Use the tool.""" - res = self.api_wrapper.results(query, self.num_results, backend=self.backend) + res = self.api_wrapper.results(query, self.max_results, source=self.backend) res_strs = [", ".join([f"{k}: {v}" for k, v in d.items()]) for d in res] return ", ".join([f"[{rs}]" for rs in res_strs]) diff --git a/libs/langchain/langchain/utilities/duckduckgo_search.py b/libs/langchain/langchain/utilities/duckduckgo_search.py index 67e89875ceca3..d258726896ab3 100644 --- a/libs/langchain/langchain/utilities/duckduckgo_search.py +++ b/libs/langchain/langchain/utilities/duckduckgo_search.py @@ -18,6 +18,8 @@ class DuckDuckGoSearchAPIWrapper(BaseModel): safesearch: str = "moderate" time: Optional[str] = "y" max_results: int = 5 + backend: str = "api" # which backend to use in DDGS.text() (api, html, lite) + source: str = "text" # which function to use in DDGS (DDGS.text() or DDGS.news()) class Config: """Configuration for this pydantic object.""" @@ -32,43 +34,69 @@ def validate_environment(cls, values: Dict) -> Dict: except ImportError: raise ImportError( "Could not import duckduckgo-search python package. " - "Please install it with `pip install duckduckgo-search`." + "Please install it with `pip install -U duckduckgo-search`." ) return values - def get_snippets(self, query: str) -> List[str]: - """Run query through DuckDuckGo and return concatenated results.""" + def _ddgs_text( + self, query: str, max_results: Optional[int] = None + ) -> List[Dict[str, str]]: + """Run query through DuckDuckGo text search and return results.""" + from duckduckgo_search import DDGS + + with DDGS() as ddgs: + ddgs_gen = ddgs.text( + query, + region=self.region, + safesearch=self.safesearch, + timelimit=self.time, + max_results=max_results or self.max_results, + backend=self.backend, + ) + if ddgs_gen: + return [r for r in ddgs_gen] + return [] + + def _ddgs_news( + self, query: str, max_results: Optional[int] = None + ) -> List[Dict[str, str]]: + """Run query through DuckDuckGo news search and return results.""" from duckduckgo_search import DDGS with DDGS() as ddgs: - results = ddgs.text( + ddgs_gen = ddgs.news( query, region=self.region, safesearch=self.safesearch, timelimit=self.time, + max_results=max_results or self.max_results, ) - if results is None: - return ["No good DuckDuckGo Search Result was found"] - snippets = [] - for i, res in enumerate(results, 1): - if res is not None: - snippets.append(res["body"]) - if len(snippets) == self.max_results: - break - return snippets + if ddgs_gen: + return [r for r in ddgs_gen] + return [] def run(self, query: str) -> str: - snippets = self.get_snippets(query) - return " ".join(snippets) + """Run query through DuckDuckGo and return concatenated results.""" + if self.source == "text": + results = self._ddgs_text(query) + elif self.source == "news": + results = self._ddgs_news(query) + else: + results = [] + + if not results: + return "No good DuckDuckGo Search Result was found" + return " ".join(r["body"] for r in results) def results( - self, query: str, num_results: int, backend: str = "api" + self, query: str, max_results: int, source: Optional[str] = None ) -> List[Dict[str, str]]: """Run query through DuckDuckGo and return metadata. Args: query: The query to search for. - num_results: The number of results to return. + max_results: The number of results to return. + source: The source to look from. Returns: A list of dictionaries with the following keys: @@ -76,38 +104,27 @@ def results( title - The title of the result. link - The link to the result. """ - from duckduckgo_search import DDGS - - with DDGS() as ddgs: - results = ddgs.text( - query, - region=self.region, - safesearch=self.safesearch, - timelimit=self.time, - backend=backend, - ) - if results is None: - return [{"Result": "No good DuckDuckGo Search Result was found"}] - - def to_metadata(result: Dict) -> Dict[str, str]: - if backend == "news": - return { - "date": result["date"], - "title": result["title"], - "snippet": result["body"], - "source": result["source"], - "link": result["url"], - } - return { - "snippet": result["body"], - "title": result["title"], - "link": result["href"], + source = source or self.source + if source == "text": + results = [ + {"snippet": r["body"], "title": r["title"], "link": r["href"]} + for r in self._ddgs_text(query, max_results=max_results) + ] + elif source == "news": + results = [ + { + "snippet": r["body"], + "title": r["title"], + "link": r["url"], + "date": r["date"], + "source": r["source"], } + for r in self._ddgs_news(query, max_results=max_results) + ] + else: + results = [] + + if results is None: + results = [{"Result": "No good DuckDuckGo Search Result was found"}] - formatted_results = [] - for i, res in enumerate(results, 1): - if res is not None: - formatted_results.append(to_metadata(res)) - if len(formatted_results) == num_results: - break - return formatted_results + return results diff --git a/libs/langchain/tests/integration_tests/utilities/test_duckduckdgo_search_api.py b/libs/langchain/tests/integration_tests/utilities/test_duckduckdgo_search_api.py index 8d228e573d6b3..74f0f25fa370c 100644 --- a/libs/langchain/tests/integration_tests/utilities/test_duckduckdgo_search_api.py +++ b/libs/langchain/tests/integration_tests/utilities/test_duckduckdgo_search_api.py @@ -1,11 +1,11 @@ import pytest -from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun +from langchain.tools.ddg_search.tool import DuckDuckGoSearchResults, DuckDuckGoSearchRun def ddg_installed() -> bool: try: - from duckduckgo_search import ddg # noqa: F401 + from duckduckgo_search import DDGS # noqa: F401 return True except Exception as e: @@ -20,3 +20,12 @@ def test_ddg_search_tool() -> None: result = tool(keywords) print(result) assert len(result.split()) > 20 + + +@pytest.mark.skipif(not ddg_installed(), reason="requires duckduckgo-search package") +def test_ddg_search_news_tool() -> None: + keywords = "Tesla" + tool = DuckDuckGoSearchResults(source="news") + result = tool(keywords) + print(result) + assert len(result.split()) > 20 From e26906c1dc8c867fdfd2cbb1a67536f0fd5f7d99 Mon Sep 17 00:00:00 2001 From: Michael Landis Date: Mon, 4 Dec 2023 16:50:23 -0800 Subject: [PATCH 31/49] feat: implement max marginal relevance for momento vector index (#13619) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Description** Implements `max_marginal_relevance_search` and `max_marginal_relevance_search_by_vector` for the Momento Vector Index vectorstore. Additionally bumps the `momento` dependency in the lock file and adds logging to the implementation. **Dependencies** ✅ updates `momento` dependency in lock file **Tag maintainer** @baskaryan **Twitter handle** Please tag @momentohq for Momento Vector Index and @mloml for the contribution 🙇 --- .../vectorstores/momento_vector_index.py | 94 ++++++++++++++++++- libs/langchain/poetry.lock | 15 ++- .../vectorstores/test_momento_vector_index.py | 24 ++++- 3 files changed, 120 insertions(+), 13 deletions(-) diff --git a/libs/langchain/langchain/vectorstores/momento_vector_index.py b/libs/langchain/langchain/vectorstores/momento_vector_index.py index 8368fd6fc1502..a969fa739f824 100644 --- a/libs/langchain/langchain/vectorstores/momento_vector_index.py +++ b/libs/langchain/langchain/vectorstores/momento_vector_index.py @@ -1,3 +1,4 @@ +import logging from typing import ( TYPE_CHECKING, Any, @@ -11,15 +12,17 @@ ) from uuid import uuid4 +import numpy as np from langchain_core.documents import Document from langchain_core.embeddings import Embeddings from langchain_core.vectorstores import VectorStore from langchain.utils import get_from_env -from langchain.vectorstores.utils import DistanceStrategy +from langchain.vectorstores.utils import DistanceStrategy, maximal_marginal_relevance VST = TypeVar("VST", bound="VectorStore") +logger = logging.getLogger(__name__) if TYPE_CHECKING: from momento import PreviewVectorIndexClient @@ -75,9 +78,8 @@ def __init__( index_name (str, optional): The name of the index to store the documents in. Defaults to "default". distance_strategy (DistanceStrategy, optional): The distance strategy to - use. Defaults to DistanceStrategy.COSINE. If you select - DistanceStrategy.EUCLIDEAN_DISTANCE, Momento uses the squared - Euclidean distance. + use. If you select DistanceStrategy.EUCLIDEAN_DISTANCE, Momento uses + the squared Euclidean distance. Defaults to DistanceStrategy.COSINE. text_field (str, optional): The name of the metadata field to store the original text in. Defaults to "text". ensure_index_exists (bool, optional): Whether to ensure that the index @@ -125,6 +127,7 @@ def _create_index_if_not_exists(self, num_dimensions: int) -> bool: elif self.distance_strategy == DistanceStrategy.EUCLIDEAN_DISTANCE: similarity_metric = SimilarityMetric.EUCLIDEAN_SIMILARITY else: + logger.error(f"Distance strategy {self.distance_strategy} not implemented.") raise ValueError( f"Distance strategy {self.distance_strategy} not implemented." ) @@ -137,8 +140,10 @@ def _create_index_if_not_exists(self, num_dimensions: int) -> bool: elif isinstance(response, CreateIndex.IndexAlreadyExists): return False elif isinstance(response, CreateIndex.Error): + logger.error(f"Error creating index: {response.inner_exception}") raise response.inner_exception else: + logger.error(f"Unexpected response: {response}") raise Exception(f"Unexpected response: {response}") def add_texts( @@ -331,6 +336,87 @@ def similarity_search_by_vector( ) return [doc for doc, _ in results] + def max_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + lambda_mult: Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Defaults to 0.5. + Returns: + List of Documents selected by maximal marginal relevance. + """ + from momento.requests.vector_index import ALL_METADATA + from momento.responses.vector_index import SearchAndFetchVectors + + response = self._client.search_and_fetch_vectors( + self.index_name, embedding, top_k=fetch_k, metadata_fields=ALL_METADATA + ) + + if isinstance(response, SearchAndFetchVectors.Success): + pass + elif isinstance(response, SearchAndFetchVectors.Error): + logger.error(f"Error searching and fetching vectors: {response}") + return [] + else: + logger.error(f"Unexpected response: {response}") + raise Exception(f"Unexpected response: {response}") + + mmr_selected = maximal_marginal_relevance( + query_embedding=np.array([embedding], dtype=np.float32), + embedding_list=[hit.vector for hit in response.hits], + lambda_mult=lambda_mult, + k=k, + ) + selected = [response.hits[i].metadata for i in mmr_selected] + return [ + Document(page_content=metadata.pop(self.text_field, ""), metadata=metadata) # type: ignore # noqa: E501 + for metadata in selected + ] + + def max_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + lambda_mult: Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Defaults to 0.5. + Returns: + List of Documents selected by maximal marginal relevance. + """ + embedding = self._embedding.embed_query(query) + return self.max_marginal_relevance_search_by_vector( + embedding, k, fetch_k, lambda_mult, **kwargs + ) + @classmethod def from_texts( cls: Type[VST], diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index 4c983673e8c00..8622ddc080d85 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -3936,7 +3936,6 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, - {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -4958,29 +4957,29 @@ files = [ [[package]] name = "momento" -version = "1.13.0" +version = "1.14.1" description = "SDK for Momento" optional = true python-versions = ">=3.7,<4.0" files = [ - {file = "momento-1.13.0-py3-none-any.whl", hash = "sha256:dd5ace5b8d679e882afcefaa16bc413973c270b0a7a1c6c45f3eb60b0b9526de"}, - {file = "momento-1.13.0.tar.gz", hash = "sha256:39419627542b8f5997a777ff91aa3aaf6406b7d76fb83cd84284a0f7d1f9e356"}, + {file = "momento-1.14.1-py3-none-any.whl", hash = "sha256:241e46669e39c19627396f2b2b027a912861f1b8097fc9f97b05b76b3d90d199"}, + {file = "momento-1.14.1.tar.gz", hash = "sha256:d200a5e7463f7746a8a611474af1c245183d7ddf9346d9592760b78b6e801560"}, ] [package.dependencies] grpcio = ">=1.46.0,<2.0.0" -momento-wire-types = ">=0.91.1,<0.92.0" +momento-wire-types = ">=0.96.0,<0.97.0" pyjwt = ">=2.4.0,<3.0.0" [[package]] name = "momento-wire-types" -version = "0.91.4" +version = "0.96.0" description = "Momento Client Proto Generated Files" optional = true python-versions = ">=3.7,<4.0" files = [ - {file = "momento_wire_types-0.91.4-py3-none-any.whl", hash = "sha256:f296249693de2f6c383a397e7616b84dd83dfd466743d34b035b90865000a2a8"}, - {file = "momento_wire_types-0.91.4.tar.gz", hash = "sha256:de8cd14a12835d95997eb9b753ea47e1a5d2916658ec9320e416da8bd835fdff"}, + {file = "momento_wire_types-0.96.0-py3-none-any.whl", hash = "sha256:93dc0e3c31bbe1f664ce33974f235bc20e63b5e35ea8e118f0c5e5ed3cda7709"}, + {file = "momento_wire_types-0.96.0.tar.gz", hash = "sha256:9c6c839c698741c54b9fc3a4fe0f82094ea5102418b02bb271ed6e64ea6d7d9e"}, ] [package.dependencies] diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_momento_vector_index.py b/libs/langchain/tests/integration_tests/vectorstores/test_momento_vector_index.py index 7689088ac5196..c4f20cf2e117a 100644 --- a/libs/langchain/tests/integration_tests/vectorstores/test_momento_vector_index.py +++ b/libs/langchain/tests/integration_tests/vectorstores/test_momento_vector_index.py @@ -125,7 +125,7 @@ def test_from_texts_with_metadatas( def test_from_texts_with_scores(vector_store: MomentoVectorIndex) -> None: - # """Test end to end construction and search with scores and IDs.""" + """Test end to end construction and search with scores and IDs.""" texts = ["apple", "orange", "hammer"] metadatas = [{"page": f"{i}"} for i in range(len(texts))] @@ -162,3 +162,25 @@ def test_add_documents_with_ids(vector_store: MomentoVectorIndex) -> None: ) assert isinstance(response, Search.Success) assert [hit.id for hit in response.hits] == ids + + +def test_max_marginal_relevance_search(vector_store: MomentoVectorIndex) -> None: + """Test max marginal relevance search.""" + pepperoni_pizza = "pepperoni pizza" + cheese_pizza = "cheese pizza" + hot_dog = "hot dog" + + vector_store.add_texts([pepperoni_pizza, cheese_pizza, hot_dog]) + wait() + search_results = vector_store.similarity_search("pizza", k=2) + + assert search_results == [ + Document(page_content=pepperoni_pizza, metadata={}), + Document(page_content=cheese_pizza, metadata={}), + ] + + search_results = vector_store.max_marginal_relevance_search(query="pizza", k=2) + assert search_results == [ + Document(page_content=pepperoni_pizza, metadata={}), + Document(page_content=hot_dog, metadata={}), + ] From 4fb72ff76f46baf3c691eef38c572773e9342ac0 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 4 Dec 2023 16:55:30 -0800 Subject: [PATCH 32/49] fake consistent embeddings cleanup (#14256) delete code that could never be reached --- .../tests/integration_tests/vectorstores/fake_embeddings.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/libs/langchain/tests/integration_tests/vectorstores/fake_embeddings.py b/libs/langchain/tests/integration_tests/vectorstores/fake_embeddings.py index 7b99c696444af..5de74832de05b 100644 --- a/libs/langchain/tests/integration_tests/vectorstores/fake_embeddings.py +++ b/libs/langchain/tests/integration_tests/vectorstores/fake_embeddings.py @@ -53,11 +53,6 @@ def embed_query(self, text: str) -> List[float]: """Return consistent embeddings for the text, if seen before, or a constant one if the text is unknown.""" return self.embed_documents([text])[0] - if text not in self.known_texts: - return [float(1.0)] * (self.dimensionality - 1) + [float(0.0)] - return [float(1.0)] * (self.dimensionality - 1) + [ - float(self.known_texts.index(text)) - ] class AngularTwoDimensionalEmbeddings(Embeddings): From 4351b99d2b102951576aa1addb9b133d0d082ee6 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 4 Dec 2023 16:58:26 -0800 Subject: [PATCH 33/49] docs[patch]: search experiment (#14254) - npm - search config - custom --- docs/.local_build.sh | 2 +- docs/docusaurus.config.js | 16 +- docs/package-lock.json | 1292 ++++++++++------------------------- docs/package.json | 9 +- docs/src/css/custom.css | 17 - docs/src/theme/SearchBar.js | 35 - 6 files changed, 396 insertions(+), 975 deletions(-) delete mode 100644 docs/src/theme/SearchBar.js diff --git a/docs/.local_build.sh b/docs/.local_build.sh index 0c67d35024a70..21d3fb3d49bc6 100755 --- a/docs/.local_build.sh +++ b/docs/.local_build.sh @@ -9,7 +9,7 @@ SCRIPT_DIR="$(cd "$(dirname "$0")"; pwd)" cd "${SCRIPT_DIR}" mkdir -p ../_dist -rsync -ruv . ../_dist +rsync -ruv --exclude node_modules . ../_dist cd ../_dist poetry run python scripts/model_feat_table.py cp ../cookbook/README.md src/pages/cookbook.mdx diff --git a/docs/docusaurus.config.js b/docs/docusaurus.config.js index d38a3591e77ba..f256de993b955 100644 --- a/docs/docusaurus.config.js +++ b/docs/docusaurus.config.js @@ -14,9 +14,6 @@ const config = { title: "🦜️🔗 Langchain", tagline: "LangChain Python Docs", favicon: "img/favicon.ico", - customFields: { - mendableAnonKey: process.env.MENDABLE_ANON_KEY, - }, // Set the production url of your site here url: "https://python.langchain.com", // Set the // pathname under which your site is served @@ -310,6 +307,18 @@ const config = { ], copyright: `Copyright © ${new Date().getFullYear()} LangChain, Inc.`, }, + algolia: { + // The application ID provided by Algolia + appId: "VAU016LAWS", + + // Public API key: it is safe to commit it + // this is linked to erick@langchain.dev currently + apiKey: "6c01842d6a88772ed2236b9c85806441", + + indexName: "python-langchain", + + contextualSearch: true, + }, }), scripts: [ @@ -319,6 +328,7 @@ const config = { async: true, }, ], + }; module.exports = config; diff --git a/docs/package-lock.json b/docs/package-lock.json index 315c5bc78c4ec..2d2efe785d068 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -8,12 +8,11 @@ "name": "docs", "version": "0.0.0", "dependencies": { - "@docusaurus/core": "2.4.0", - "@docusaurus/preset-classic": "2.4.0", - "@docusaurus/remark-plugin-npm2yarn": "^2.4.0", - "@docusaurus/theme-mermaid": "^2.4.3", + "@docusaurus/core": "2.4.3", + "@docusaurus/preset-classic": "2.4.3", + "@docusaurus/remark-plugin-npm2yarn": "^2.4.3", + "@docusaurus/theme-mermaid": "2.4.3", "@mdx-js/react": "^1.6.22", - "@mendable/search": "^0.0.160", "clsx": "^1.2.1", "json-loader": "^0.5.7", "process": "^0.11.10", @@ -92,74 +91,74 @@ } }, "node_modules/@algolia/cache-browser-local-storage": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/cache-browser-local-storage/-/cache-browser-local-storage-4.18.0.tgz", - "integrity": "sha512-rUAs49NLlO8LVLgGzM4cLkw8NJLKguQLgvFmBEe3DyzlinoqxzQMHfKZs6TSq4LZfw/z8qHvRo8NcTAAUJQLcw==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/cache-browser-local-storage/-/cache-browser-local-storage-4.20.0.tgz", + "integrity": "sha512-uujahcBt4DxduBTvYdwO3sBfHuJvJokiC3BP1+O70fglmE1ShkH8lpXqZBac1rrU3FnNYSUs4pL9lBdTKeRPOQ==", "dependencies": { - "@algolia/cache-common": "4.18.0" + "@algolia/cache-common": "4.20.0" } }, "node_modules/@algolia/cache-common": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/cache-common/-/cache-common-4.18.0.tgz", - "integrity": "sha512-BmxsicMR4doGbeEXQu8yqiGmiyvpNvejYJtQ7rvzttEAMxOPoWEHrWyzBQw4x7LrBY9pMrgv4ZlUaF8PGzewHg==" + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/cache-common/-/cache-common-4.20.0.tgz", + "integrity": "sha512-vCfxauaZutL3NImzB2G9LjLt36vKAckc6DhMp05An14kVo8F1Yofb6SIl6U3SaEz8pG2QOB9ptwM5c+zGevwIQ==" }, "node_modules/@algolia/cache-in-memory": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/cache-in-memory/-/cache-in-memory-4.18.0.tgz", - "integrity": "sha512-evD4dA1nd5HbFdufBxLqlJoob7E2ozlqJZuV3YlirNx5Na4q1LckIuzjNYZs2ddLzuTc/Xd5O3Ibf7OwPskHxw==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/cache-in-memory/-/cache-in-memory-4.20.0.tgz", + "integrity": "sha512-Wm9ak/IaacAZXS4mB3+qF/KCoVSBV6aLgIGFEtQtJwjv64g4ePMapORGmCyulCFwfePaRAtcaTbMcJF+voc/bg==", "dependencies": { - "@algolia/cache-common": "4.18.0" + "@algolia/cache-common": "4.20.0" } }, "node_modules/@algolia/client-account": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/client-account/-/client-account-4.18.0.tgz", - "integrity": "sha512-XsDnlROr3+Z1yjxBJjUMfMazi1V155kVdte6496atvBgOEtwCzTs3A+qdhfsAnGUvaYfBrBkL0ThnhMIBCGcew==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/client-account/-/client-account-4.20.0.tgz", + "integrity": "sha512-GGToLQvrwo7am4zVkZTnKa72pheQeez/16sURDWm7Seyz+HUxKi3BM6fthVVPUEBhtJ0reyVtuK9ArmnaKl10Q==", "dependencies": { - "@algolia/client-common": "4.18.0", - "@algolia/client-search": "4.18.0", - "@algolia/transporter": "4.18.0" + "@algolia/client-common": "4.20.0", + "@algolia/client-search": "4.20.0", + "@algolia/transporter": "4.20.0" } }, "node_modules/@algolia/client-analytics": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/client-analytics/-/client-analytics-4.18.0.tgz", - "integrity": "sha512-chEUSN4ReqU7uRQ1C8kDm0EiPE+eJeAXiWcBwLhEynfNuTfawN9P93rSZktj7gmExz0C8XmkbBU19IQ05wCNrQ==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/client-analytics/-/client-analytics-4.20.0.tgz", + "integrity": "sha512-EIr+PdFMOallRdBTHHdKI3CstslgLORQG7844Mq84ib5oVFRVASuuPmG4bXBgiDbcsMLUeOC6zRVJhv1KWI0ug==", "dependencies": { - "@algolia/client-common": "4.18.0", - "@algolia/client-search": "4.18.0", - "@algolia/requester-common": "4.18.0", - "@algolia/transporter": "4.18.0" + "@algolia/client-common": "4.20.0", + "@algolia/client-search": "4.20.0", + "@algolia/requester-common": "4.20.0", + "@algolia/transporter": "4.20.0" } }, "node_modules/@algolia/client-common": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-4.18.0.tgz", - "integrity": "sha512-7N+soJFP4wn8tjTr3MSUT/U+4xVXbz4jmeRfWfVAzdAbxLAQbHa0o/POSdTvQ8/02DjCLelloZ1bb4ZFVKg7Wg==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-4.20.0.tgz", + "integrity": "sha512-P3WgMdEss915p+knMMSd/fwiHRHKvDu4DYRrCRaBrsfFw7EQHon+EbRSm4QisS9NYdxbS04kcvNoavVGthyfqQ==", "dependencies": { - "@algolia/requester-common": "4.18.0", - "@algolia/transporter": "4.18.0" + "@algolia/requester-common": "4.20.0", + "@algolia/transporter": "4.20.0" } }, "node_modules/@algolia/client-personalization": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/client-personalization/-/client-personalization-4.18.0.tgz", - "integrity": "sha512-+PeCjODbxtamHcPl+couXMeHEefpUpr7IHftj4Y4Nia1hj8gGq4VlIcqhToAw8YjLeCTfOR7r7xtj3pJcYdP8A==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/client-personalization/-/client-personalization-4.20.0.tgz", + "integrity": "sha512-N9+zx0tWOQsLc3K4PVRDV8GUeOLAY0i445En79Pr3zWB+m67V+n/8w4Kw1C5LlbHDDJcyhMMIlqezh6BEk7xAQ==", "dependencies": { - "@algolia/client-common": "4.18.0", - "@algolia/requester-common": "4.18.0", - "@algolia/transporter": "4.18.0" + "@algolia/client-common": "4.20.0", + "@algolia/requester-common": "4.20.0", + "@algolia/transporter": "4.20.0" } }, "node_modules/@algolia/client-search": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-4.18.0.tgz", - "integrity": "sha512-F9xzQXTjm6UuZtnsLIew6KSraXQ0AzS/Ee+OD+mQbtcA/K1sg89tqb8TkwjtiYZ0oij13u3EapB3gPZwm+1Y6g==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-4.20.0.tgz", + "integrity": "sha512-zgwqnMvhWLdpzKTpd3sGmMlr4c+iS7eyyLGiaO51zDZWGMkpgoNVmltkzdBwxOVXz0RsFMznIxB9zuarUv4TZg==", "dependencies": { - "@algolia/client-common": "4.18.0", - "@algolia/requester-common": "4.18.0", - "@algolia/transporter": "4.18.0" + "@algolia/client-common": "4.20.0", + "@algolia/requester-common": "4.20.0", + "@algolia/transporter": "4.20.0" } }, "node_modules/@algolia/events": { @@ -168,47 +167,47 @@ "integrity": "sha512-FQzvOCgoFXAbf5Y6mYozw2aj5KCJoA3m4heImceldzPSMbdyS4atVjJzXKMsfX3wnZTFYwkkt8/z8UesLHlSBQ==" }, "node_modules/@algolia/logger-common": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/logger-common/-/logger-common-4.18.0.tgz", - "integrity": "sha512-46etYgSlkoKepkMSyaoriSn2JDgcrpc/nkOgou/lm0y17GuMl9oYZxwKKTSviLKI5Irk9nSKGwnBTQYwXOYdRg==" + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/logger-common/-/logger-common-4.20.0.tgz", + "integrity": "sha512-xouigCMB5WJYEwvoWW5XDv7Z9f0A8VoXJc3VKwlHJw/je+3p2RcDXfksLI4G4lIVncFUYMZx30tP/rsdlvvzHQ==" }, "node_modules/@algolia/logger-console": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/logger-console/-/logger-console-4.18.0.tgz", - "integrity": "sha512-3P3VUYMl9CyJbi/UU1uUNlf6Z8N2ltW3Oqhq/nR7vH0CjWv32YROq3iGWGxB2xt3aXobdUPXs6P0tHSKRmNA6g==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/logger-console/-/logger-console-4.20.0.tgz", + "integrity": "sha512-THlIGG1g/FS63z0StQqDhT6bprUczBI8wnLT3JWvfAQDZX5P6fCg7dG+pIrUBpDIHGszgkqYEqECaKKsdNKOUA==", "dependencies": { - "@algolia/logger-common": "4.18.0" + "@algolia/logger-common": "4.20.0" } }, "node_modules/@algolia/requester-browser-xhr": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/requester-browser-xhr/-/requester-browser-xhr-4.18.0.tgz", - "integrity": "sha512-/AcWHOBub2U4TE/bPi4Gz1XfuLK6/7dj4HJG+Z2SfQoS1RjNLshZclU3OoKIkFp8D2NC7+BNsPvr9cPLyW8nyQ==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/requester-browser-xhr/-/requester-browser-xhr-4.20.0.tgz", + "integrity": "sha512-HbzoSjcjuUmYOkcHECkVTwAelmvTlgs48N6Owt4FnTOQdwn0b8pdht9eMgishvk8+F8bal354nhx/xOoTfwiAw==", "dependencies": { - "@algolia/requester-common": "4.18.0" + "@algolia/requester-common": "4.20.0" } }, "node_modules/@algolia/requester-common": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/requester-common/-/requester-common-4.18.0.tgz", - "integrity": "sha512-xlT8R1qYNRBCi1IYLsx7uhftzdfsLPDGudeQs+xvYB4sQ3ya7+ppolB/8m/a4F2gCkEO6oxpp5AGemM7kD27jA==" + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/requester-common/-/requester-common-4.20.0.tgz", + "integrity": "sha512-9h6ye6RY/BkfmeJp7Z8gyyeMrmmWsMOCRBXQDs4mZKKsyVlfIVICpcSibbeYcuUdurLhIlrOUkH3rQEgZzonng==" }, "node_modules/@algolia/requester-node-http": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/requester-node-http/-/requester-node-http-4.18.0.tgz", - "integrity": "sha512-TGfwj9aeTVgOUhn5XrqBhwUhUUDnGIKlI0kCBMdR58XfXcfdwomka+CPIgThRbfYw04oQr31A6/95ZH2QVJ9UQ==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/requester-node-http/-/requester-node-http-4.20.0.tgz", + "integrity": "sha512-ocJ66L60ABSSTRFnCHIEZpNHv6qTxsBwJEPfYaSBsLQodm0F9ptvalFkHMpvj5DfE22oZrcrLbOYM2bdPJRHng==", "dependencies": { - "@algolia/requester-common": "4.18.0" + "@algolia/requester-common": "4.20.0" } }, "node_modules/@algolia/transporter": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/@algolia/transporter/-/transporter-4.18.0.tgz", - "integrity": "sha512-xbw3YRUGtXQNG1geYFEDDuFLZt4Z8YNKbamHPkzr3rWc6qp4/BqEeXcI2u/P/oMq2yxtXgMxrCxOPA8lyIe5jw==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/@algolia/transporter/-/transporter-4.20.0.tgz", + "integrity": "sha512-Lsii1pGWOAISbzeyuf+r/GPhvHMPHSPrTDWNcIzOE1SG1inlJHICaVe2ikuoRjcpgxZNU54Jl+if15SUCsaTUg==", "dependencies": { - "@algolia/cache-common": "4.18.0", - "@algolia/logger-common": "4.18.0", - "@algolia/requester-common": "4.18.0" + "@algolia/cache-common": "4.20.0", + "@algolia/logger-common": "4.20.0", + "@algolia/requester-common": "4.20.0" } }, "node_modules/@ampproject/remapping": { @@ -660,6 +659,7 @@ "version": "7.12.1", "resolved": "https://registry.npmjs.org/@babel/plugin-proposal-object-rest-spread/-/plugin-proposal-object-rest-spread-7.12.1.tgz", "integrity": "sha512-s6SowJIjzlhx8o7lsFx5zmY4At6CTtDvgNQDdPzkBQucle58A6b/TTeEBYtyDgmcXjUTM+vE8YOGHZzzbc/ioA==", + "deprecated": "This proposal has been merged to the ECMAScript standard and thus this plugin is no longer maintained. Please use @babel/plugin-transform-object-rest-spread instead.", "dependencies": { "@babel/helper-plugin-utils": "^7.10.4", "@babel/plugin-syntax-object-rest-spread": "^7.8.0", @@ -2017,24 +2017,25 @@ } }, "node_modules/@docsearch/css": { - "version": "3.5.1", - "resolved": "https://registry.npmjs.org/@docsearch/css/-/css-3.5.1.tgz", - "integrity": "sha512-2Pu9HDg/uP/IT10rbQ+4OrTQuxIWdKVUEdcw9/w7kZJv9NeHS6skJx1xuRiFyoGKwAzcHXnLp7csE99sj+O1YA==" + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/@docsearch/css/-/css-3.5.2.tgz", + "integrity": "sha512-SPiDHaWKQZpwR2siD0KQUwlStvIAnEyK6tAE2h2Wuoq8ue9skzhlyVQ1ddzOxX6khULnAALDiR/isSF3bnuciA==" }, "node_modules/@docsearch/react": { - "version": "3.5.1", - "resolved": "https://registry.npmjs.org/@docsearch/react/-/react-3.5.1.tgz", - "integrity": "sha512-t5mEODdLzZq4PTFAm/dvqcvZFdPDMdfPE5rJS5SC8OUq9mPzxEy6b+9THIqNM9P0ocCb4UC5jqBrxKclnuIbzQ==", + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/@docsearch/react/-/react-3.5.2.tgz", + "integrity": "sha512-9Ahcrs5z2jq/DcAvYtvlqEBHImbm4YJI8M9y0x6Tqg598P40HTEkX7hsMcIuThI+hTFxRGZ9hll0Wygm2yEjng==", "dependencies": { "@algolia/autocomplete-core": "1.9.3", "@algolia/autocomplete-preset-algolia": "1.9.3", - "@docsearch/css": "3.5.1", - "algoliasearch": "^4.0.0" + "@docsearch/css": "3.5.2", + "algoliasearch": "^4.19.1" }, "peerDependencies": { "@types/react": ">= 16.8.0 < 19.0.0", "react": ">= 16.8.0 < 19.0.0", - "react-dom": ">= 16.8.0 < 19.0.0" + "react-dom": ">= 16.8.0 < 19.0.0", + "search-insights": ">= 1 < 3" }, "peerDependenciesMeta": { "@types/react": { @@ -2045,13 +2046,16 @@ }, "react-dom": { "optional": true + }, + "search-insights": { + "optional": true } } }, "node_modules/@docusaurus/core": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/core/-/core-2.4.0.tgz", - "integrity": "sha512-J55/WEoIpRcLf3afO5POHPguVZosKmJEQWKBL+K7TAnfuE7i+Y0NPLlkKtnWCehagGsgTqClfQEexH/UT4kELA==", + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/core/-/core-2.4.3.tgz", + "integrity": "sha512-dWH5P7cgeNSIg9ufReX6gaCl/TmrGKD38Orbwuz05WPhAQtFXHd5B8Qym1TiXfvUNvwoYKkAJOJuGe8ou0Z7PA==", "dependencies": { "@babel/core": "^7.18.6", "@babel/generator": "^7.18.7", @@ -2063,13 +2067,13 @@ "@babel/runtime": "^7.18.6", "@babel/runtime-corejs3": "^7.18.6", "@babel/traverse": "^7.18.8", - "@docusaurus/cssnano-preset": "2.4.0", - "@docusaurus/logger": "2.4.0", - "@docusaurus/mdx-loader": "2.4.0", + "@docusaurus/cssnano-preset": "2.4.3", + "@docusaurus/logger": "2.4.3", + "@docusaurus/mdx-loader": "2.4.3", "@docusaurus/react-loadable": "5.5.2", - "@docusaurus/utils": "2.4.0", - "@docusaurus/utils-common": "2.4.0", - "@docusaurus/utils-validation": "2.4.0", + "@docusaurus/utils": "2.4.3", + "@docusaurus/utils-common": "2.4.3", + "@docusaurus/utils-validation": "2.4.3", "@slorber/static-site-generator-webpack-plugin": "^4.0.7", "@svgr/webpack": "^6.2.1", "autoprefixer": "^10.4.7", @@ -2231,9 +2235,9 @@ "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" }, "node_modules/@docusaurus/cssnano-preset": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/cssnano-preset/-/cssnano-preset-2.4.0.tgz", - "integrity": "sha512-RmdiA3IpsLgZGXRzqnmTbGv43W4OD44PCo+6Q/aYjEM2V57vKCVqNzuafE94jv0z/PjHoXUrjr69SaRymBKYYw==", + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/cssnano-preset/-/cssnano-preset-2.4.3.tgz", + "integrity": "sha512-ZvGSRCi7z9wLnZrXNPG6DmVPHdKGd8dIn9pYbEOFiYihfv4uDR3UtxogmKf+rT8ZlKFf5Lqne8E8nt08zNM8CA==", "dependencies": { "cssnano-preset-advanced": "^5.3.8", "postcss": "^8.4.14", @@ -2245,9 +2249,9 @@ } }, "node_modules/@docusaurus/logger": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/logger/-/logger-2.4.0.tgz", - "integrity": "sha512-T8+qR4APN+MjcC9yL2Es+xPJ2923S9hpzDmMtdsOcUGLqpCGBbU1vp3AAqDwXtVgFkq+NsEk7sHdVsfLWR/AXw==", + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/logger/-/logger-2.4.3.tgz", + "integrity": "sha512-Zxws7r3yLufk9xM1zq9ged0YHs65mlRmtsobnFkdZTxWXdTYlWWLWdKyNKAsVC+D7zg+pv2fGbyabdOnyZOM3w==", "dependencies": { "chalk": "^4.1.2", "tslib": "^2.4.0" @@ -2321,492 +2325,6 @@ } }, "node_modules/@docusaurus/mdx-loader": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/mdx-loader/-/mdx-loader-2.4.0.tgz", - "integrity": "sha512-GWoH4izZKOmFoC+gbI2/y8deH/xKLvzz/T5BsEexBye8EHQlwsA7FMrVa48N063bJBH4FUOiRRXxk5rq9cC36g==", - "dependencies": { - "@babel/parser": "^7.18.8", - "@babel/traverse": "^7.18.8", - "@docusaurus/logger": "2.4.0", - "@docusaurus/utils": "2.4.0", - "@mdx-js/mdx": "^1.6.22", - "escape-html": "^1.0.3", - "file-loader": "^6.2.0", - "fs-extra": "^10.1.0", - "image-size": "^1.0.1", - "mdast-util-to-string": "^2.0.0", - "remark-emoji": "^2.2.0", - "stringify-object": "^3.3.0", - "tslib": "^2.4.0", - "unified": "^9.2.2", - "unist-util-visit": "^2.0.3", - "url-loader": "^4.1.1", - "webpack": "^5.73.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/module-type-aliases": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/module-type-aliases/-/module-type-aliases-2.4.0.tgz", - "integrity": "sha512-YEQO2D3UXs72qCn8Cr+RlycSQXVGN9iEUyuHwTuK4/uL/HFomB2FHSU0vSDM23oLd+X/KibQ3Ez6nGjQLqXcHg==", - "dependencies": { - "@docusaurus/react-loadable": "5.5.2", - "@docusaurus/types": "2.4.0", - "@types/history": "^4.7.11", - "@types/react": "*", - "@types/react-router-config": "*", - "@types/react-router-dom": "*", - "react-helmet-async": "*", - "react-loadable": "npm:@docusaurus/react-loadable@5.5.2" - }, - "peerDependencies": { - "react": "*", - "react-dom": "*" - } - }, - "node_modules/@docusaurus/plugin-content-blog": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/plugin-content-blog/-/plugin-content-blog-2.4.0.tgz", - "integrity": "sha512-YwkAkVUxtxoBAIj/MCb4ohN0SCtHBs4AS75jMhPpf67qf3j+U/4n33cELq7567hwyZ6fMz2GPJcVmctzlGGThQ==", - "dependencies": { - "@docusaurus/core": "2.4.0", - "@docusaurus/logger": "2.4.0", - "@docusaurus/mdx-loader": "2.4.0", - "@docusaurus/types": "2.4.0", - "@docusaurus/utils": "2.4.0", - "@docusaurus/utils-common": "2.4.0", - "@docusaurus/utils-validation": "2.4.0", - "cheerio": "^1.0.0-rc.12", - "feed": "^4.2.2", - "fs-extra": "^10.1.0", - "lodash": "^4.17.21", - "reading-time": "^1.5.0", - "tslib": "^2.4.0", - "unist-util-visit": "^2.0.3", - "utility-types": "^3.10.0", - "webpack": "^5.73.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/plugin-content-docs": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/plugin-content-docs/-/plugin-content-docs-2.4.0.tgz", - "integrity": "sha512-ic/Z/ZN5Rk/RQo+Io6rUGpToOtNbtPloMR2JcGwC1xT2riMu6zzfSwmBi9tHJgdXH6CB5jG+0dOZZO8QS5tmDg==", - "dependencies": { - "@docusaurus/core": "2.4.0", - "@docusaurus/logger": "2.4.0", - "@docusaurus/mdx-loader": "2.4.0", - "@docusaurus/module-type-aliases": "2.4.0", - "@docusaurus/types": "2.4.0", - "@docusaurus/utils": "2.4.0", - "@docusaurus/utils-validation": "2.4.0", - "@types/react-router-config": "^5.0.6", - "combine-promises": "^1.1.0", - "fs-extra": "^10.1.0", - "import-fresh": "^3.3.0", - "js-yaml": "^4.1.0", - "lodash": "^4.17.21", - "tslib": "^2.4.0", - "utility-types": "^3.10.0", - "webpack": "^5.73.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/plugin-content-pages": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/plugin-content-pages/-/plugin-content-pages-2.4.0.tgz", - "integrity": "sha512-Pk2pOeOxk8MeU3mrTU0XLIgP9NZixbdcJmJ7RUFrZp1Aj42nd0RhIT14BGvXXyqb8yTQlk4DmYGAzqOfBsFyGw==", - "dependencies": { - "@docusaurus/core": "2.4.0", - "@docusaurus/mdx-loader": "2.4.0", - "@docusaurus/types": "2.4.0", - "@docusaurus/utils": "2.4.0", - "@docusaurus/utils-validation": "2.4.0", - "fs-extra": "^10.1.0", - "tslib": "^2.4.0", - "webpack": "^5.73.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/plugin-debug": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/plugin-debug/-/plugin-debug-2.4.0.tgz", - "integrity": "sha512-KC56DdYjYT7Txyux71vXHXGYZuP6yYtqwClvYpjKreWIHWus5Zt6VNi23rMZv3/QKhOCrN64zplUbdfQMvddBQ==", - "dependencies": { - "@docusaurus/core": "2.4.0", - "@docusaurus/types": "2.4.0", - "@docusaurus/utils": "2.4.0", - "fs-extra": "^10.1.0", - "react-json-view": "^1.21.3", - "tslib": "^2.4.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/plugin-google-analytics": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/plugin-google-analytics/-/plugin-google-analytics-2.4.0.tgz", - "integrity": "sha512-uGUzX67DOAIglygdNrmMOvEp8qG03X20jMWadeqVQktS6nADvozpSLGx4J0xbkblhJkUzN21WiilsP9iVP+zkw==", - "dependencies": { - "@docusaurus/core": "2.4.0", - "@docusaurus/types": "2.4.0", - "@docusaurus/utils-validation": "2.4.0", - "tslib": "^2.4.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/plugin-google-gtag": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/plugin-google-gtag/-/plugin-google-gtag-2.4.0.tgz", - "integrity": "sha512-adj/70DANaQs2+TF/nRdMezDXFAV/O/pjAbUgmKBlyOTq5qoMe0Tk4muvQIwWUmiUQxFJe+sKlZGM771ownyOg==", - "dependencies": { - "@docusaurus/core": "2.4.0", - "@docusaurus/types": "2.4.0", - "@docusaurus/utils-validation": "2.4.0", - "tslib": "^2.4.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/plugin-google-tag-manager": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/plugin-google-tag-manager/-/plugin-google-tag-manager-2.4.0.tgz", - "integrity": "sha512-E66uGcYs4l7yitmp/8kMEVQftFPwV9iC62ORh47Veqzs6ExwnhzBkJmwDnwIysHBF1vlxnzET0Fl2LfL5fRR3A==", - "dependencies": { - "@docusaurus/core": "2.4.0", - "@docusaurus/types": "2.4.0", - "@docusaurus/utils-validation": "2.4.0", - "tslib": "^2.4.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/plugin-sitemap": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/plugin-sitemap/-/plugin-sitemap-2.4.0.tgz", - "integrity": "sha512-pZxh+ygfnI657sN8a/FkYVIAmVv0CGk71QMKqJBOfMmDHNN1FeDeFkBjWP49ejBqpqAhjufkv5UWq3UOu2soCw==", - "dependencies": { - "@docusaurus/core": "2.4.0", - "@docusaurus/logger": "2.4.0", - "@docusaurus/types": "2.4.0", - "@docusaurus/utils": "2.4.0", - "@docusaurus/utils-common": "2.4.0", - "@docusaurus/utils-validation": "2.4.0", - "fs-extra": "^10.1.0", - "sitemap": "^7.1.1", - "tslib": "^2.4.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/preset-classic": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/preset-classic/-/preset-classic-2.4.0.tgz", - "integrity": "sha512-/5z5o/9bc6+P5ool2y01PbJhoGddEGsC0ej1MF6mCoazk8A+kW4feoUd68l7Bnv01rCnG3xy7kHUQP97Y0grUA==", - "dependencies": { - "@docusaurus/core": "2.4.0", - "@docusaurus/plugin-content-blog": "2.4.0", - "@docusaurus/plugin-content-docs": "2.4.0", - "@docusaurus/plugin-content-pages": "2.4.0", - "@docusaurus/plugin-debug": "2.4.0", - "@docusaurus/plugin-google-analytics": "2.4.0", - "@docusaurus/plugin-google-gtag": "2.4.0", - "@docusaurus/plugin-google-tag-manager": "2.4.0", - "@docusaurus/plugin-sitemap": "2.4.0", - "@docusaurus/theme-classic": "2.4.0", - "@docusaurus/theme-common": "2.4.0", - "@docusaurus/theme-search-algolia": "2.4.0", - "@docusaurus/types": "2.4.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/react-loadable": { - "version": "5.5.2", - "resolved": "https://registry.npmjs.org/@docusaurus/react-loadable/-/react-loadable-5.5.2.tgz", - "integrity": "sha512-A3dYjdBGuy0IGT+wyLIGIKLRE+sAk1iNk0f1HjNDysO7u8lhL4N3VEm+FAubmJbAztn94F7MxBTPmnixbiyFdQ==", - "dependencies": { - "@types/react": "*", - "prop-types": "^15.6.2" - }, - "peerDependencies": { - "react": "*" - } - }, - "node_modules/@docusaurus/remark-plugin-npm2yarn": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/@docusaurus/remark-plugin-npm2yarn/-/remark-plugin-npm2yarn-2.4.1.tgz", - "integrity": "sha512-RTX4hGCrwibqjDVf6edWVNwdvWHjx+YmfKwxqXxfhNnYjypTCXWTAyKeIfCUW2DNdtqAI2ZM0zFhB1maua2JbQ==", - "dependencies": { - "npm-to-yarn": "^2.0.0", - "tslib": "^2.4.1", - "unist-util-visit": "^2.0.3" - }, - "engines": { - "node": ">=16.14" - } - }, - "node_modules/@docusaurus/theme-classic": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/theme-classic/-/theme-classic-2.4.0.tgz", - "integrity": "sha512-GMDX5WU6Z0OC65eQFgl3iNNEbI9IMJz9f6KnOyuMxNUR6q0qVLsKCNopFUDfFNJ55UU50o7P7o21yVhkwpfJ9w==", - "dependencies": { - "@docusaurus/core": "2.4.0", - "@docusaurus/mdx-loader": "2.4.0", - "@docusaurus/module-type-aliases": "2.4.0", - "@docusaurus/plugin-content-blog": "2.4.0", - "@docusaurus/plugin-content-docs": "2.4.0", - "@docusaurus/plugin-content-pages": "2.4.0", - "@docusaurus/theme-common": "2.4.0", - "@docusaurus/theme-translations": "2.4.0", - "@docusaurus/types": "2.4.0", - "@docusaurus/utils": "2.4.0", - "@docusaurus/utils-common": "2.4.0", - "@docusaurus/utils-validation": "2.4.0", - "@mdx-js/react": "^1.6.22", - "clsx": "^1.2.1", - "copy-text-to-clipboard": "^3.0.1", - "infima": "0.2.0-alpha.43", - "lodash": "^4.17.21", - "nprogress": "^0.2.0", - "postcss": "^8.4.14", - "prism-react-renderer": "^1.3.5", - "prismjs": "^1.28.0", - "react-router-dom": "^5.3.3", - "rtlcss": "^3.5.0", - "tslib": "^2.4.0", - "utility-types": "^3.10.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/theme-common": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/theme-common/-/theme-common-2.4.0.tgz", - "integrity": "sha512-IkG/l5f/FLY6cBIxtPmFnxpuPzc5TupuqlOx+XDN+035MdQcAh8wHXXZJAkTeYDeZ3anIUSUIvWa7/nRKoQEfg==", - "dependencies": { - "@docusaurus/mdx-loader": "2.4.0", - "@docusaurus/module-type-aliases": "2.4.0", - "@docusaurus/plugin-content-blog": "2.4.0", - "@docusaurus/plugin-content-docs": "2.4.0", - "@docusaurus/plugin-content-pages": "2.4.0", - "@docusaurus/utils": "2.4.0", - "@docusaurus/utils-common": "2.4.0", - "@types/history": "^4.7.11", - "@types/react": "*", - "@types/react-router-config": "*", - "clsx": "^1.2.1", - "parse-numeric-range": "^1.3.0", - "prism-react-renderer": "^1.3.5", - "tslib": "^2.4.0", - "use-sync-external-store": "^1.2.0", - "utility-types": "^3.10.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/theme-mermaid": { - "version": "2.4.3", - "resolved": "https://registry.npmjs.org/@docusaurus/theme-mermaid/-/theme-mermaid-2.4.3.tgz", - "integrity": "sha512-S1tZ3xpowtFiTrpTKmvVbRHUYGOlEG5CnPzWlO4huJT1sAwLR+pD6f9DYUlPv2+9NezF3EfUrUyW9xLH0UP58w==", - "dependencies": { - "@docusaurus/core": "2.4.3", - "@docusaurus/module-type-aliases": "2.4.3", - "@docusaurus/theme-common": "2.4.3", - "@docusaurus/types": "2.4.3", - "@docusaurus/utils-validation": "2.4.3", - "@mdx-js/react": "^1.6.22", - "mermaid": "^9.2.2", - "tslib": "^2.4.0" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/core": { - "version": "2.4.3", - "resolved": "https://registry.npmjs.org/@docusaurus/core/-/core-2.4.3.tgz", - "integrity": "sha512-dWH5P7cgeNSIg9ufReX6gaCl/TmrGKD38Orbwuz05WPhAQtFXHd5B8Qym1TiXfvUNvwoYKkAJOJuGe8ou0Z7PA==", - "dependencies": { - "@babel/core": "^7.18.6", - "@babel/generator": "^7.18.7", - "@babel/plugin-syntax-dynamic-import": "^7.8.3", - "@babel/plugin-transform-runtime": "^7.18.6", - "@babel/preset-env": "^7.18.6", - "@babel/preset-react": "^7.18.6", - "@babel/preset-typescript": "^7.18.6", - "@babel/runtime": "^7.18.6", - "@babel/runtime-corejs3": "^7.18.6", - "@babel/traverse": "^7.18.8", - "@docusaurus/cssnano-preset": "2.4.3", - "@docusaurus/logger": "2.4.3", - "@docusaurus/mdx-loader": "2.4.3", - "@docusaurus/react-loadable": "5.5.2", - "@docusaurus/utils": "2.4.3", - "@docusaurus/utils-common": "2.4.3", - "@docusaurus/utils-validation": "2.4.3", - "@slorber/static-site-generator-webpack-plugin": "^4.0.7", - "@svgr/webpack": "^6.2.1", - "autoprefixer": "^10.4.7", - "babel-loader": "^8.2.5", - "babel-plugin-dynamic-import-node": "^2.3.3", - "boxen": "^6.2.1", - "chalk": "^4.1.2", - "chokidar": "^3.5.3", - "clean-css": "^5.3.0", - "cli-table3": "^0.6.2", - "combine-promises": "^1.1.0", - "commander": "^5.1.0", - "copy-webpack-plugin": "^11.0.0", - "core-js": "^3.23.3", - "css-loader": "^6.7.1", - "css-minimizer-webpack-plugin": "^4.0.0", - "cssnano": "^5.1.12", - "del": "^6.1.1", - "detect-port": "^1.3.0", - "escape-html": "^1.0.3", - "eta": "^2.0.0", - "file-loader": "^6.2.0", - "fs-extra": "^10.1.0", - "html-minifier-terser": "^6.1.0", - "html-tags": "^3.2.0", - "html-webpack-plugin": "^5.5.0", - "import-fresh": "^3.3.0", - "leven": "^3.1.0", - "lodash": "^4.17.21", - "mini-css-extract-plugin": "^2.6.1", - "postcss": "^8.4.14", - "postcss-loader": "^7.0.0", - "prompts": "^2.4.2", - "react-dev-utils": "^12.0.1", - "react-helmet-async": "^1.3.0", - "react-loadable": "npm:@docusaurus/react-loadable@5.5.2", - "react-loadable-ssr-addon-v5-slorber": "^1.0.1", - "react-router": "^5.3.3", - "react-router-config": "^5.1.1", - "react-router-dom": "^5.3.3", - "rtl-detect": "^1.0.4", - "semver": "^7.3.7", - "serve-handler": "^6.1.3", - "shelljs": "^0.8.5", - "terser-webpack-plugin": "^5.3.3", - "tslib": "^2.4.0", - "update-notifier": "^5.1.0", - "url-loader": "^4.1.1", - "wait-on": "^6.0.1", - "webpack": "^5.73.0", - "webpack-bundle-analyzer": "^4.5.0", - "webpack-dev-server": "^4.9.3", - "webpack-merge": "^5.8.0", - "webpackbar": "^5.0.2" - }, - "bin": { - "docusaurus": "bin/docusaurus.mjs" - }, - "engines": { - "node": ">=16.14" - }, - "peerDependencies": { - "react": "^16.8.4 || ^17.0.0", - "react-dom": "^16.8.4 || ^17.0.0" - } - }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/cssnano-preset": { - "version": "2.4.3", - "resolved": "https://registry.npmjs.org/@docusaurus/cssnano-preset/-/cssnano-preset-2.4.3.tgz", - "integrity": "sha512-ZvGSRCi7z9wLnZrXNPG6DmVPHdKGd8dIn9pYbEOFiYihfv4uDR3UtxogmKf+rT8ZlKFf5Lqne8E8nt08zNM8CA==", - "dependencies": { - "cssnano-preset-advanced": "^5.3.8", - "postcss": "^8.4.14", - "postcss-sort-media-queries": "^4.2.1", - "tslib": "^2.4.0" - }, - "engines": { - "node": ">=16.14" - } - }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/logger": { - "version": "2.4.3", - "resolved": "https://registry.npmjs.org/@docusaurus/logger/-/logger-2.4.3.tgz", - "integrity": "sha512-Zxws7r3yLufk9xM1zq9ged0YHs65mlRmtsobnFkdZTxWXdTYlWWLWdKyNKAsVC+D7zg+pv2fGbyabdOnyZOM3w==", - "dependencies": { - "chalk": "^4.1.2", - "tslib": "^2.4.0" - }, - "engines": { - "node": ">=16.14" - } - }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/mdx-loader": { "version": "2.4.3", "resolved": "https://registry.npmjs.org/@docusaurus/mdx-loader/-/mdx-loader-2.4.3.tgz", "integrity": "sha512-b1+fDnWtl3GiqkL0BRjYtc94FZrcDDBV1j8446+4tptB9BAOlePwG2p/pK6vGvfL53lkOsszXMghr2g67M0vCw==", @@ -2837,7 +2355,7 @@ "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/module-type-aliases": { + "node_modules/@docusaurus/module-type-aliases": { "version": "2.4.3", "resolved": "https://registry.npmjs.org/@docusaurus/module-type-aliases/-/module-type-aliases-2.4.3.tgz", "integrity": "sha512-cwkBkt1UCiduuvEAo7XZY01dJfRn7UR/75mBgOdb1hKknhrabJZ8YH+7savd/y9kLExPyrhe0QwdS9GuzsRRIA==", @@ -2856,7 +2374,7 @@ "react-dom": "*" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/plugin-content-blog": { + "node_modules/@docusaurus/plugin-content-blog": { "version": "2.4.3", "resolved": "https://registry.npmjs.org/@docusaurus/plugin-content-blog/-/plugin-content-blog-2.4.3.tgz", "integrity": "sha512-PVhypqaA0t98zVDpOeTqWUTvRqCEjJubtfFUQ7zJNYdbYTbS/E/ytq6zbLVsN/dImvemtO/5JQgjLxsh8XLo8Q==", @@ -2886,7 +2404,7 @@ "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/plugin-content-docs": { + "node_modules/@docusaurus/plugin-content-docs": { "version": "2.4.3", "resolved": "https://registry.npmjs.org/@docusaurus/plugin-content-docs/-/plugin-content-docs-2.4.3.tgz", "integrity": "sha512-N7Po2LSH6UejQhzTCsvuX5NOzlC+HiXOVvofnEPj0WhMu1etpLEXE6a4aTxrtg95lQ5kf0xUIdjX9sh3d3G76A==", @@ -2916,7 +2434,7 @@ "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/plugin-content-pages": { + "node_modules/@docusaurus/plugin-content-pages": { "version": "2.4.3", "resolved": "https://registry.npmjs.org/@docusaurus/plugin-content-pages/-/plugin-content-pages-2.4.3.tgz", "integrity": "sha512-txtDVz7y3zGk67q0HjG0gRttVPodkHqE0bpJ+7dOaTH40CQFLSh7+aBeGnPOTl+oCPG+hxkim4SndqPqXjQ8Bg==", @@ -2938,27 +2456,17 @@ "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/theme-common": { + "node_modules/@docusaurus/plugin-debug": { "version": "2.4.3", - "resolved": "https://registry.npmjs.org/@docusaurus/theme-common/-/theme-common-2.4.3.tgz", - "integrity": "sha512-7KaDJBXKBVGXw5WOVt84FtN8czGWhM0lbyWEZXGp8AFfL6sZQfRTluFp4QriR97qwzSyOfQb+nzcDZZU4tezUw==", + "resolved": "https://registry.npmjs.org/@docusaurus/plugin-debug/-/plugin-debug-2.4.3.tgz", + "integrity": "sha512-LkUbuq3zCmINlFb+gAd4ZvYr+bPAzMC0hwND4F7V9bZ852dCX8YoWyovVUBKq4er1XsOwSQaHmNGtObtn8Av8Q==", "dependencies": { - "@docusaurus/mdx-loader": "2.4.3", - "@docusaurus/module-type-aliases": "2.4.3", - "@docusaurus/plugin-content-blog": "2.4.3", - "@docusaurus/plugin-content-docs": "2.4.3", - "@docusaurus/plugin-content-pages": "2.4.3", + "@docusaurus/core": "2.4.3", + "@docusaurus/types": "2.4.3", "@docusaurus/utils": "2.4.3", - "@docusaurus/utils-common": "2.4.3", - "@types/history": "^4.7.11", - "@types/react": "*", - "@types/react-router-config": "*", - "clsx": "^1.2.1", - "parse-numeric-range": "^1.3.0", - "prism-react-renderer": "^1.3.5", - "tslib": "^2.4.0", - "use-sync-external-store": "^1.2.0", - "utility-types": "^3.10.0" + "fs-extra": "^10.1.0", + "react-json-view": "^1.21.3", + "tslib": "^2.4.0" }, "engines": { "node": ">=16.14" @@ -2968,200 +2476,239 @@ "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/types": { + "node_modules/@docusaurus/plugin-google-analytics": { "version": "2.4.3", - "resolved": "https://registry.npmjs.org/@docusaurus/types/-/types-2.4.3.tgz", - "integrity": "sha512-W6zNLGQqfrp/EoPD0bhb9n7OobP+RHpmvVzpA+Z/IuU3Q63njJM24hmT0GYboovWcDtFmnIJC9wcyx4RVPQscw==", + "resolved": "https://registry.npmjs.org/@docusaurus/plugin-google-analytics/-/plugin-google-analytics-2.4.3.tgz", + "integrity": "sha512-KzBV3k8lDkWOhg/oYGxlK5o9bOwX7KpPc/FTWoB+SfKhlHfhq7qcQdMi1elAaVEIop8tgK6gD1E58Q+XC6otSQ==", "dependencies": { - "@types/history": "^4.7.11", - "@types/react": "*", - "commander": "^5.1.0", - "joi": "^17.6.0", - "react-helmet-async": "^1.3.0", - "utility-types": "^3.10.0", - "webpack": "^5.73.0", - "webpack-merge": "^5.8.0" + "@docusaurus/core": "2.4.3", + "@docusaurus/types": "2.4.3", + "@docusaurus/utils-validation": "2.4.3", + "tslib": "^2.4.0" + }, + "engines": { + "node": ">=16.14" }, "peerDependencies": { "react": "^16.8.4 || ^17.0.0", "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/utils": { + "node_modules/@docusaurus/plugin-google-gtag": { "version": "2.4.3", - "resolved": "https://registry.npmjs.org/@docusaurus/utils/-/utils-2.4.3.tgz", - "integrity": "sha512-fKcXsjrD86Smxv8Pt0TBFqYieZZCPh4cbf9oszUq/AMhZn3ujwpKaVYZACPX8mmjtYx0JOgNx52CREBfiGQB4A==", + "resolved": "https://registry.npmjs.org/@docusaurus/plugin-google-gtag/-/plugin-google-gtag-2.4.3.tgz", + "integrity": "sha512-5FMg0rT7sDy4i9AGsvJC71MQrqQZwgLNdDetLEGDHLfSHLvJhQbTCUGbGXknUgWXQJckcV/AILYeJy+HhxeIFA==", "dependencies": { - "@docusaurus/logger": "2.4.3", - "@svgr/webpack": "^6.2.1", - "escape-string-regexp": "^4.0.0", - "file-loader": "^6.2.0", - "fs-extra": "^10.1.0", - "github-slugger": "^1.4.0", - "globby": "^11.1.0", - "gray-matter": "^4.0.3", - "js-yaml": "^4.1.0", - "lodash": "^4.17.21", - "micromatch": "^4.0.5", - "resolve-pathname": "^3.0.0", - "shelljs": "^0.8.5", - "tslib": "^2.4.0", - "url-loader": "^4.1.1", - "webpack": "^5.73.0" + "@docusaurus/core": "2.4.3", + "@docusaurus/types": "2.4.3", + "@docusaurus/utils-validation": "2.4.3", + "tslib": "^2.4.0" }, "engines": { "node": ">=16.14" }, "peerDependencies": { - "@docusaurus/types": "*" - }, - "peerDependenciesMeta": { - "@docusaurus/types": { - "optional": true - } + "react": "^16.8.4 || ^17.0.0", + "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/utils-common": { + "node_modules/@docusaurus/plugin-google-tag-manager": { "version": "2.4.3", - "resolved": "https://registry.npmjs.org/@docusaurus/utils-common/-/utils-common-2.4.3.tgz", - "integrity": "sha512-/jascp4GbLQCPVmcGkPzEQjNaAk3ADVfMtudk49Ggb+131B1WDD6HqlSmDf8MxGdy7Dja2gc+StHf01kiWoTDQ==", + "resolved": "https://registry.npmjs.org/@docusaurus/plugin-google-tag-manager/-/plugin-google-tag-manager-2.4.3.tgz", + "integrity": "sha512-1jTzp71yDGuQiX9Bi0pVp3alArV0LSnHXempvQTxwCGAEzUWWaBg4d8pocAlTpbP9aULQQqhgzrs8hgTRPOM0A==", "dependencies": { + "@docusaurus/core": "2.4.3", + "@docusaurus/types": "2.4.3", + "@docusaurus/utils-validation": "2.4.3", "tslib": "^2.4.0" }, "engines": { "node": ">=16.14" }, "peerDependencies": { - "@docusaurus/types": "*" - }, - "peerDependenciesMeta": { - "@docusaurus/types": { - "optional": true - } + "react": "^16.8.4 || ^17.0.0", + "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/@docusaurus/utils-validation": { - "version": "2.4.3", - "resolved": "https://registry.npmjs.org/@docusaurus/utils-validation/-/utils-validation-2.4.3.tgz", - "integrity": "sha512-G2+Vt3WR5E/9drAobP+hhZQMaswRwDlp6qOMi7o7ZypB+VO7N//DZWhZEwhcRGepMDJGQEwtPv7UxtYwPL9PBw==", + "node_modules/@docusaurus/plugin-sitemap": { + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/plugin-sitemap/-/plugin-sitemap-2.4.3.tgz", + "integrity": "sha512-LRQYrK1oH1rNfr4YvWBmRzTL0LN9UAPxBbghgeFRBm5yloF6P+zv1tm2pe2hQTX/QP5bSKdnajCvfnScgKXMZQ==", "dependencies": { + "@docusaurus/core": "2.4.3", "@docusaurus/logger": "2.4.3", + "@docusaurus/types": "2.4.3", "@docusaurus/utils": "2.4.3", - "joi": "^17.6.0", - "js-yaml": "^4.1.0", + "@docusaurus/utils-common": "2.4.3", + "@docusaurus/utils-validation": "2.4.3", + "fs-extra": "^10.1.0", + "sitemap": "^7.1.1", "tslib": "^2.4.0" }, "engines": { "node": ">=16.14" + }, + "peerDependencies": { + "react": "^16.8.4 || ^17.0.0", + "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "node_modules/@docusaurus/preset-classic": { + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/preset-classic/-/preset-classic-2.4.3.tgz", + "integrity": "sha512-tRyMliepY11Ym6hB1rAFSNGwQDpmszvWYJvlK1E+md4SW8i6ylNHtpZjaYFff9Mdk3i/Pg8ItQq9P0daOJAvQw==", "dependencies": { - "color-convert": "^2.0.1" + "@docusaurus/core": "2.4.3", + "@docusaurus/plugin-content-blog": "2.4.3", + "@docusaurus/plugin-content-docs": "2.4.3", + "@docusaurus/plugin-content-pages": "2.4.3", + "@docusaurus/plugin-debug": "2.4.3", + "@docusaurus/plugin-google-analytics": "2.4.3", + "@docusaurus/plugin-google-gtag": "2.4.3", + "@docusaurus/plugin-google-tag-manager": "2.4.3", + "@docusaurus/plugin-sitemap": "2.4.3", + "@docusaurus/theme-classic": "2.4.3", + "@docusaurus/theme-common": "2.4.3", + "@docusaurus/theme-search-algolia": "2.4.3", + "@docusaurus/types": "2.4.3" }, "engines": { - "node": ">=8" + "node": ">=16.14" }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" + "peerDependencies": { + "react": "^16.8.4 || ^17.0.0", + "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/chalk": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", - "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "node_modules/@docusaurus/react-loadable": { + "version": "5.5.2", + "resolved": "https://registry.npmjs.org/@docusaurus/react-loadable/-/react-loadable-5.5.2.tgz", + "integrity": "sha512-A3dYjdBGuy0IGT+wyLIGIKLRE+sAk1iNk0f1HjNDysO7u8lhL4N3VEm+FAubmJbAztn94F7MxBTPmnixbiyFdQ==", "dependencies": { - "ansi-styles": "^4.1.0", - "supports-color": "^7.1.0" - }, - "engines": { - "node": ">=10" + "@types/react": "*", + "prop-types": "^15.6.2" }, - "funding": { - "url": "https://github.com/chalk/chalk?sponsor=1" + "peerDependencies": { + "react": "*" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "node_modules/@docusaurus/remark-plugin-npm2yarn": { + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/remark-plugin-npm2yarn/-/remark-plugin-npm2yarn-2.4.3.tgz", + "integrity": "sha512-gBSzRPls2Nc1M+X8hTXLUJQ8mk+pTet7CGeTtgV02Hsk1G22yHYoCBW3r1aIqR27mVUHW9O5X4AN/Dt8x8Ifkg==", "dependencies": { - "color-name": "~1.1.4" + "npm-to-yarn": "^2.0.0", + "tslib": "^2.4.1", + "unist-util-visit": "^2.0.3" }, "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/@docusaurus/theme-mermaid/node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" - }, - "node_modules/@docusaurus/theme-mermaid/node_modules/has-flag": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", - "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "engines": { - "node": ">=8" + "node": ">=16.14" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/lru-cache": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", - "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "node_modules/@docusaurus/theme-classic": { + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/theme-classic/-/theme-classic-2.4.3.tgz", + "integrity": "sha512-QKRAJPSGPfDY2yCiPMIVyr+MqwZCIV2lxNzqbyUW0YkrlmdzzP3WuQJPMGLCjWgQp/5c9kpWMvMxjhpZx1R32Q==", "dependencies": { - "yallist": "^4.0.0" + "@docusaurus/core": "2.4.3", + "@docusaurus/mdx-loader": "2.4.3", + "@docusaurus/module-type-aliases": "2.4.3", + "@docusaurus/plugin-content-blog": "2.4.3", + "@docusaurus/plugin-content-docs": "2.4.3", + "@docusaurus/plugin-content-pages": "2.4.3", + "@docusaurus/theme-common": "2.4.3", + "@docusaurus/theme-translations": "2.4.3", + "@docusaurus/types": "2.4.3", + "@docusaurus/utils": "2.4.3", + "@docusaurus/utils-common": "2.4.3", + "@docusaurus/utils-validation": "2.4.3", + "@mdx-js/react": "^1.6.22", + "clsx": "^1.2.1", + "copy-text-to-clipboard": "^3.0.1", + "infima": "0.2.0-alpha.43", + "lodash": "^4.17.21", + "nprogress": "^0.2.0", + "postcss": "^8.4.14", + "prism-react-renderer": "^1.3.5", + "prismjs": "^1.28.0", + "react-router-dom": "^5.3.3", + "rtlcss": "^3.5.0", + "tslib": "^2.4.0", + "utility-types": "^3.10.0" }, "engines": { - "node": ">=10" + "node": ">=16.14" + }, + "peerDependencies": { + "react": "^16.8.4 || ^17.0.0", + "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/semver": { - "version": "7.5.4", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz", - "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==", + "node_modules/@docusaurus/theme-common": { + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/theme-common/-/theme-common-2.4.3.tgz", + "integrity": "sha512-7KaDJBXKBVGXw5WOVt84FtN8czGWhM0lbyWEZXGp8AFfL6sZQfRTluFp4QriR97qwzSyOfQb+nzcDZZU4tezUw==", "dependencies": { - "lru-cache": "^6.0.0" - }, - "bin": { - "semver": "bin/semver.js" + "@docusaurus/mdx-loader": "2.4.3", + "@docusaurus/module-type-aliases": "2.4.3", + "@docusaurus/plugin-content-blog": "2.4.3", + "@docusaurus/plugin-content-docs": "2.4.3", + "@docusaurus/plugin-content-pages": "2.4.3", + "@docusaurus/utils": "2.4.3", + "@docusaurus/utils-common": "2.4.3", + "@types/history": "^4.7.11", + "@types/react": "*", + "@types/react-router-config": "*", + "clsx": "^1.2.1", + "parse-numeric-range": "^1.3.0", + "prism-react-renderer": "^1.3.5", + "tslib": "^2.4.0", + "use-sync-external-store": "^1.2.0", + "utility-types": "^3.10.0" }, "engines": { - "node": ">=10" + "node": ">=16.14" + }, + "peerDependencies": { + "react": "^16.8.4 || ^17.0.0", + "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/supports-color": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", - "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "node_modules/@docusaurus/theme-mermaid": { + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/theme-mermaid/-/theme-mermaid-2.4.3.tgz", + "integrity": "sha512-S1tZ3xpowtFiTrpTKmvVbRHUYGOlEG5CnPzWlO4huJT1sAwLR+pD6f9DYUlPv2+9NezF3EfUrUyW9xLH0UP58w==", "dependencies": { - "has-flag": "^4.0.0" + "@docusaurus/core": "2.4.3", + "@docusaurus/module-type-aliases": "2.4.3", + "@docusaurus/theme-common": "2.4.3", + "@docusaurus/types": "2.4.3", + "@docusaurus/utils-validation": "2.4.3", + "@mdx-js/react": "^1.6.22", + "mermaid": "^9.2.2", + "tslib": "^2.4.0" }, "engines": { - "node": ">=8" + "node": ">=16.14" + }, + "peerDependencies": { + "react": "^16.8.4 || ^17.0.0", + "react-dom": "^16.8.4 || ^17.0.0" } }, - "node_modules/@docusaurus/theme-mermaid/node_modules/yallist": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", - "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" - }, "node_modules/@docusaurus/theme-search-algolia": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/theme-search-algolia/-/theme-search-algolia-2.4.0.tgz", - "integrity": "sha512-pPCJSCL1Qt4pu/Z0uxBAuke0yEBbxh0s4fOvimna7TEcBLPq0x06/K78AaABXrTVQM6S0vdocFl9EoNgU17hqA==", + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/theme-search-algolia/-/theme-search-algolia-2.4.3.tgz", + "integrity": "sha512-jziq4f6YVUB5hZOB85ELATwnxBz/RmSLD3ksGQOLDPKVzat4pmI8tddNWtriPpxR04BNT+ZfpPUMFkNFetSW1Q==", "dependencies": { "@docsearch/react": "^3.1.1", - "@docusaurus/core": "2.4.0", - "@docusaurus/logger": "2.4.0", - "@docusaurus/plugin-content-docs": "2.4.0", - "@docusaurus/theme-common": "2.4.0", - "@docusaurus/theme-translations": "2.4.0", - "@docusaurus/utils": "2.4.0", - "@docusaurus/utils-validation": "2.4.0", + "@docusaurus/core": "2.4.3", + "@docusaurus/logger": "2.4.3", + "@docusaurus/plugin-content-docs": "2.4.3", + "@docusaurus/theme-common": "2.4.3", + "@docusaurus/theme-translations": "2.4.3", + "@docusaurus/utils": "2.4.3", + "@docusaurus/utils-validation": "2.4.3", "algoliasearch": "^4.13.1", "algoliasearch-helper": "^3.10.0", "clsx": "^1.2.1", @@ -3180,9 +2727,9 @@ } }, "node_modules/@docusaurus/theme-translations": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/theme-translations/-/theme-translations-2.4.0.tgz", - "integrity": "sha512-kEoITnPXzDPUMBHk3+fzEzbopxLD3fR5sDoayNH0vXkpUukA88/aDL1bqkhxWZHA3LOfJ3f0vJbOwmnXW5v85Q==", + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/theme-translations/-/theme-translations-2.4.3.tgz", + "integrity": "sha512-H4D+lbZbjbKNS/Zw1Lel64PioUAIT3cLYYJLUf3KkuO/oc9e0QCVhIYVtUI2SfBCF2NNdlyhBDQEEMygsCedIg==", "dependencies": { "fs-extra": "^10.1.0", "tslib": "^2.4.0" @@ -3192,9 +2739,9 @@ } }, "node_modules/@docusaurus/types": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/types/-/types-2.4.0.tgz", - "integrity": "sha512-xaBXr+KIPDkIaef06c+i2HeTqVNixB7yFut5fBXPGI2f1rrmEV2vLMznNGsFwvZ5XmA3Quuefd4OGRkdo97Dhw==", + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/types/-/types-2.4.3.tgz", + "integrity": "sha512-W6zNLGQqfrp/EoPD0bhb9n7OobP+RHpmvVzpA+Z/IuU3Q63njJM24hmT0GYboovWcDtFmnIJC9wcyx4RVPQscw==", "dependencies": { "@types/history": "^4.7.11", "@types/react": "*", @@ -3211,11 +2758,11 @@ } }, "node_modules/@docusaurus/utils": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/utils/-/utils-2.4.0.tgz", - "integrity": "sha512-89hLYkvtRX92j+C+ERYTuSUK6nF9bGM32QThcHPg2EDDHVw6FzYQXmX6/p+pU5SDyyx5nBlE4qXR92RxCAOqfg==", + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/utils/-/utils-2.4.3.tgz", + "integrity": "sha512-fKcXsjrD86Smxv8Pt0TBFqYieZZCPh4cbf9oszUq/AMhZn3ujwpKaVYZACPX8mmjtYx0JOgNx52CREBfiGQB4A==", "dependencies": { - "@docusaurus/logger": "2.4.0", + "@docusaurus/logger": "2.4.3", "@svgr/webpack": "^6.2.1", "escape-string-regexp": "^4.0.0", "file-loader": "^6.2.0", @@ -3245,9 +2792,9 @@ } }, "node_modules/@docusaurus/utils-common": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/utils-common/-/utils-common-2.4.0.tgz", - "integrity": "sha512-zIMf10xuKxddYfLg5cS19x44zud/E9I7lj3+0bv8UIs0aahpErfNrGhijEfJpAfikhQ8tL3m35nH3hJ3sOG82A==", + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/utils-common/-/utils-common-2.4.3.tgz", + "integrity": "sha512-/jascp4GbLQCPVmcGkPzEQjNaAk3ADVfMtudk49Ggb+131B1WDD6HqlSmDf8MxGdy7Dja2gc+StHf01kiWoTDQ==", "dependencies": { "tslib": "^2.4.0" }, @@ -3264,12 +2811,12 @@ } }, "node_modules/@docusaurus/utils-validation": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@docusaurus/utils-validation/-/utils-validation-2.4.0.tgz", - "integrity": "sha512-IrBsBbbAp6y7mZdJx4S4pIA7dUyWSA0GNosPk6ZJ0fX3uYIEQgcQSGIgTeSC+8xPEx3c16o03en1jSDpgQgz/w==", + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/@docusaurus/utils-validation/-/utils-validation-2.4.3.tgz", + "integrity": "sha512-G2+Vt3WR5E/9drAobP+hhZQMaswRwDlp6qOMi7o7ZypB+VO7N//DZWhZEwhcRGepMDJGQEwtPv7UxtYwPL9PBw==", "dependencies": { - "@docusaurus/logger": "2.4.0", - "@docusaurus/utils": "2.4.0", + "@docusaurus/logger": "2.4.3", + "@docusaurus/utils": "2.4.3", "joi": "^17.6.0", "js-yaml": "^4.1.0", "tslib": "^2.4.0" @@ -3697,19 +3244,6 @@ "url": "https://opencollective.com/unified" } }, - "node_modules/@mendable/search": { - "version": "0.0.160", - "resolved": "https://registry.npmjs.org/@mendable/search/-/search-0.0.160.tgz", - "integrity": "sha512-Lq9Cy176iVeUlSS9PALyc0KPgMWv9MELgsDKXKLhyoPS85yQXs0uEpC2Zgf9i+R4jar5PibKZPh2Hj2xIm/Ajg==", - "dependencies": { - "html-react-parser": "^4.2.0", - "posthog-js": "^1.45.1" - }, - "peerDependencies": { - "react": "^17.x || ^18.x", - "react-dom": "^17.x || ^18.x" - } - }, "node_modules/@nicolo-ribaudo/eslint-scope-5-internals": { "version": "5.1.1-v1", "resolved": "https://registry.npmjs.org/@nicolo-ribaudo/eslint-scope-5-internals/-/eslint-scope-5-internals-5.1.1-v1.tgz", @@ -4150,9 +3684,9 @@ } }, "node_modules/@types/hast": { - "version": "2.3.5", - "resolved": "https://registry.npmjs.org/@types/hast/-/hast-2.3.5.tgz", - "integrity": "sha512-SvQi0L/lNpThgPoleH53cdjB3y9zpLlVjRbqB3rH8hx1jiRSBGAhyjV3H+URFjNVRqt2EdYNrbZE5IsGlNfpRg==", + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-2.3.8.tgz", + "integrity": "sha512-aMIqAlFd2wTIDZuvLbhUT+TGvMxrNC8ECUIVtH6xxy0sQLs3iu6NO8Kp/VT5je7i5ufnebXzdV1dNDMnvaH6IQ==", "dependencies": { "@types/unist": "^2" } @@ -4213,9 +3747,9 @@ "dev": true }, "node_modules/@types/mdast": { - "version": "3.0.12", - "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-3.0.12.tgz", - "integrity": "sha512-DT+iNIRNX884cx0/Q1ja7NyUPpZuv0KPyL5rGNxm1WC1OtHstl7n4Jb7nk+xacNShQMbczJjt8uFzznpp6kYBg==", + "version": "3.0.15", + "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-3.0.15.tgz", + "integrity": "sha512-LnwD+mUEfxWMa1QpDraczIn6k0Ee3SMicuYSSzS6ZYl2gKS09EClnJYGd8Du6rfc5r/GZEk5o1mRb8TaTj03sQ==", "dependencies": { "@types/unist": "^2" } @@ -4275,9 +3809,9 @@ } }, "node_modules/@types/react-router-config": { - "version": "5.0.7", - "resolved": "https://registry.npmjs.org/@types/react-router-config/-/react-router-config-5.0.7.tgz", - "integrity": "sha512-pFFVXUIydHlcJP6wJm7sDii5mD/bCmmAY0wQzq+M+uX7bqS95AQqHZWP1iNMKrWVQSuHIzj5qi9BvrtLX2/T4w==", + "version": "5.0.10", + "resolved": "https://registry.npmjs.org/@types/react-router-config/-/react-router-config-5.0.10.tgz", + "integrity": "sha512-Wn6c/tXdEgi9adCMtDwx8Q2vGty6TsPTc/wCQQ9kAlye8UqFxj0vGFWWuhywNfkwqth+SOgJxQTLTZukrqDQmQ==", "dependencies": { "@types/history": "^4.7.11", "@types/react": "*", @@ -4300,9 +3834,9 @@ "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==" }, "node_modules/@types/sax": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@types/sax/-/sax-1.2.4.tgz", - "integrity": "sha512-pSAff4IAxJjfAXUG6tFkO7dsSbTmf8CtUpfhhZ5VhkRpC4628tJhh3+V6H1E+/Gs9piSzYKT5yzHO5M4GG9jkw==", + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/@types/sax/-/sax-1.2.7.tgz", + "integrity": "sha512-rO73L89PJxeYM3s3pPPjiPgVVcymqU490g0YO5n5By0k2Erzj6tay/4lr1CHAAU4JyOWd1rpQ8bCf6cZfHU96A==", "dependencies": { "@types/node": "*" } @@ -4661,30 +4195,30 @@ } }, "node_modules/algoliasearch": { - "version": "4.18.0", - "resolved": "https://registry.npmjs.org/algoliasearch/-/algoliasearch-4.18.0.tgz", - "integrity": "sha512-pCuVxC1SVcpc08ENH32T4sLKSyzoU7TkRIDBMwSLfIiW+fq4znOmWDkAygHZ6pRcO9I1UJdqlfgnV7TRj+MXrA==", - "dependencies": { - "@algolia/cache-browser-local-storage": "4.18.0", - "@algolia/cache-common": "4.18.0", - "@algolia/cache-in-memory": "4.18.0", - "@algolia/client-account": "4.18.0", - "@algolia/client-analytics": "4.18.0", - "@algolia/client-common": "4.18.0", - "@algolia/client-personalization": "4.18.0", - "@algolia/client-search": "4.18.0", - "@algolia/logger-common": "4.18.0", - "@algolia/logger-console": "4.18.0", - "@algolia/requester-browser-xhr": "4.18.0", - "@algolia/requester-common": "4.18.0", - "@algolia/requester-node-http": "4.18.0", - "@algolia/transporter": "4.18.0" + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/algoliasearch/-/algoliasearch-4.20.0.tgz", + "integrity": "sha512-y+UHEjnOItoNy0bYO+WWmLWBlPwDjKHW6mNHrPi0NkuhpQOOEbrkwQH/wgKFDLh7qlKjzoKeiRtlpewDPDG23g==", + "dependencies": { + "@algolia/cache-browser-local-storage": "4.20.0", + "@algolia/cache-common": "4.20.0", + "@algolia/cache-in-memory": "4.20.0", + "@algolia/client-account": "4.20.0", + "@algolia/client-analytics": "4.20.0", + "@algolia/client-common": "4.20.0", + "@algolia/client-personalization": "4.20.0", + "@algolia/client-search": "4.20.0", + "@algolia/logger-common": "4.20.0", + "@algolia/logger-console": "4.20.0", + "@algolia/requester-browser-xhr": "4.20.0", + "@algolia/requester-common": "4.20.0", + "@algolia/requester-node-http": "4.20.0", + "@algolia/transporter": "4.20.0" } }, "node_modules/algoliasearch-helper": { - "version": "3.13.3", - "resolved": "https://registry.npmjs.org/algoliasearch-helper/-/algoliasearch-helper-3.13.3.tgz", - "integrity": "sha512-jhbbuYZ+fheXpaJlqdJdFa1jOsrTWKmRRTYDM3oVTto5VodZzM7tT+BHzslAotaJf/81CKrm6yLRQn8WIr/K4A==", + "version": "3.15.0", + "resolved": "https://registry.npmjs.org/algoliasearch-helper/-/algoliasearch-helper-3.15.0.tgz", + "integrity": "sha512-DGUnK3TGtDQsaUE4ayF/LjSN0DGsuYThB8WBgnnDY0Wq04K6lNVruO3LfqJOgSfDiezp+Iyt8Tj4YKHi+/ivSA==", "dependencies": { "@algolia/events": "^4.0.1" }, @@ -4919,9 +4453,9 @@ } }, "node_modules/autoprefixer": { - "version": "10.4.14", - "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.14.tgz", - "integrity": "sha512-FQzyfOsTlwVzjHxKEqRIAdJx9niO6VCBCoEwax/VLSoQF29ggECcPuBqUMZ+u8jCZOPSy8b8/8KnuFbp0SaFZQ==", + "version": "10.4.16", + "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.16.tgz", + "integrity": "sha512-7vd3UC6xKp0HLfua5IjZlcXvGAGy7cBAXTg2lyQ/8WpNhd6SiZ8Be+xm3FyBSYJx5GKcpRCzBh7RH4/0dnY+uQ==", "funding": [ { "type": "opencollective", @@ -4930,12 +4464,16 @@ { "type": "tidelift", "url": "https://tidelift.com/funding/github/npm/autoprefixer" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" } ], "dependencies": { - "browserslist": "^4.21.5", - "caniuse-lite": "^1.0.30001464", - "fraction.js": "^4.2.0", + "browserslist": "^4.21.10", + "caniuse-lite": "^1.0.30001538", + "fraction.js": "^4.3.6", "normalize-range": "^0.1.2", "picocolors": "^1.0.0", "postcss-value-parser": "^4.2.0" @@ -5294,9 +4832,9 @@ } }, "node_modules/browserslist": { - "version": "4.21.9", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.21.9.tgz", - "integrity": "sha512-M0MFoZzbUrRU4KNfCrDLnvyE7gub+peetoTid3TBIqtunaDJyXlwhakT+/VkvSXcfIzFfK/nkCs4nmyTmxdNSg==", + "version": "4.22.2", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.22.2.tgz", + "integrity": "sha512-0UgcrvQmBDvZHFGdYUehrCNIazki7/lUP3kkoi/r3YB2amZbFM9J43ZRkJTXBUZK4gmx56+Sqk9+Vs9mwZx9+A==", "funding": [ { "type": "opencollective", @@ -5312,10 +4850,10 @@ } ], "dependencies": { - "caniuse-lite": "^1.0.30001503", - "electron-to-chromium": "^1.4.431", - "node-releases": "^2.0.12", - "update-browserslist-db": "^1.0.11" + "caniuse-lite": "^1.0.30001565", + "electron-to-chromium": "^1.4.601", + "node-releases": "^2.0.14", + "update-browserslist-db": "^1.0.13" }, "bin": { "browserslist": "cli.js" @@ -5444,9 +4982,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001516", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001516.tgz", - "integrity": "sha512-Wmec9pCBY8CWbmI4HsjBeQLqDTqV91nFVR83DnZpYyRnPI1wePDsTg0bGLPC5VU/3OIZV1fmxEea1b+tFKe86g==", + "version": "1.0.30001566", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001566.tgz", + "integrity": "sha512-ggIhCsTxmITBAMmK8yZjEhCO5/47jKXPu6Dha/wuCS4JePVL+3uiDEBuhu2aIoT+bqTOR8L76Ip1ARL9xYsEJA==", "funding": [ { "type": "opencollective", @@ -7236,9 +6774,9 @@ "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==" }, "node_modules/electron-to-chromium": { - "version": "1.4.462", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.462.tgz", - "integrity": "sha512-ux2LqN9JKRBDKXMT+78jtiBLPiXf+rLtYlsrOg5Qn7uv6Cbg7+9JyIalE3wcqkOdB2wPCUYNWAuL7suKRMHe9w==" + "version": "1.4.603", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.603.tgz", + "integrity": "sha512-Dvo5OGjnl7AZTU632dFJtWj0uJK835eeOVQIuRcmBmsFsTNn3cL05FqOyHAfGQDIoHfLhyJ1Tya3PJ0ceMz54g==" }, "node_modules/elkjs": { "version": "0.8.2", @@ -8273,11 +7811,6 @@ "node": ">=0.4.0" } }, - "node_modules/fflate": { - "version": "0.4.8", - "resolved": "https://registry.npmjs.org/fflate/-/fflate-0.4.8.tgz", - "integrity": "sha512-FJqqoDBR00Mdj9ppamLa/Y7vxm+PRmNWA67N846RvsoYVMKB4q3y/de5PA7gUmRMYK/8CMz2GDZQmCRN1wBcWA==" - }, "node_modules/file-entry-cache": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", @@ -8660,15 +8193,15 @@ } }, "node_modules/fraction.js": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.2.0.tgz", - "integrity": "sha512-MhLuK+2gUcnZe8ZHlaaINnQLl0xRIGRfcGk2yl8xoQAfHrSsL3rYu6FCmBdkdbhc9EPlwyGHewaRsvwRMJtAlA==", + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz", + "integrity": "sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew==", "engines": { "node": "*" }, "funding": { "type": "patreon", - "url": "https://www.patreon.com/infusion" + "url": "https://github.com/sponsors/rawify" } }, "node_modules/fresh": { @@ -9300,33 +8833,6 @@ "safe-buffer": "~5.1.0" } }, - "node_modules/html-dom-parser": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/html-dom-parser/-/html-dom-parser-4.0.0.tgz", - "integrity": "sha512-TUa3wIwi80f5NF8CVWzkopBVqVAtlawUzJoLwVLHns0XSJGynss4jiY0mTWpiDOsuyw+afP+ujjMgRh9CoZcXw==", - "dependencies": { - "domhandler": "5.0.3", - "htmlparser2": "9.0.0" - } - }, - "node_modules/html-dom-parser/node_modules/htmlparser2": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-9.0.0.tgz", - "integrity": "sha512-uxbSI98wmFT/G4P2zXx4OVx04qWUmyFPrD2/CNepa2Zo3GPNaCaaxElDgwUrwYWkK1nr9fft0Ya8dws8coDLLQ==", - "funding": [ - "https://github.com/fb55/htmlparser2?sponsor=1", - { - "type": "github", - "url": "https://github.com/sponsors/fb55" - } - ], - "dependencies": { - "domelementtype": "^2.3.0", - "domhandler": "^5.0.3", - "domutils": "^3.1.0", - "entities": "^4.5.0" - } - }, "node_modules/html-entities": { "version": "2.4.0", "resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.4.0.tgz", @@ -9370,20 +8876,6 @@ "node": ">= 12" } }, - "node_modules/html-react-parser": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/html-react-parser/-/html-react-parser-4.2.0.tgz", - "integrity": "sha512-gzU55AS+FI6qD7XaKe5BLuLFM2Xw0/LodfMWZlxV9uOHe7LCD5Lukx/EgYuBI3c0kLu0XlgFXnSzO0qUUn3Vrg==", - "dependencies": { - "domhandler": "5.0.3", - "html-dom-parser": "4.0.0", - "react-property": "2.0.0", - "style-to-js": "1.1.3" - }, - "peerDependencies": { - "react": "0.14 || 15 || 16 || 17 || 18" - } - }, "node_modules/html-tags": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/html-tags/-/html-tags-3.3.1.tgz", @@ -11056,9 +10548,9 @@ } }, "node_modules/node-fetch": { - "version": "2.6.12", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.12.tgz", - "integrity": "sha512-C/fGU2E8ToujUivIO0H+tpQ6HWo4eEmchoPIoXtxCrVghxdKq+QOHqEZW7tuP3KlV3bC8FRMO5nMCC7Zm1VP6g==", + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", "dependencies": { "whatwg-url": "^5.0.0" }, @@ -11083,9 +10575,9 @@ } }, "node_modules/node-releases": { - "version": "2.0.13", - "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.13.tgz", - "integrity": "sha512-uYr7J37ae/ORWdZeQ1xxMJe3NtdmqMC/JZK+geofDrkLUApKRHPd18/TxtBOJ4A0/+uUIliorNrfYV6s1b02eQ==" + "version": "2.0.14", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.14.tgz", + "integrity": "sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw==" }, "node_modules/non-layered-tidy-tree-layout": { "version": "2.0.2", @@ -12334,14 +11826,6 @@ "postcss": "^8.2.15" } }, - "node_modules/posthog-js": { - "version": "1.71.0", - "resolved": "https://registry.npmjs.org/posthog-js/-/posthog-js-1.71.0.tgz", - "integrity": "sha512-Qs7lIBivj993apV17x8krEy46SKzSYCwMq4hihkNJ01U6ZltrIx0piYfKCC2pmiBgCaJG7Hjn0iTLy5evntdEw==", - "dependencies": { - "fflate": "^0.4.1" - } - }, "node_modules/prelude-ls": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", @@ -12834,11 +12318,6 @@ "webpack": ">=4.41.1 || 5.x" } }, - "node_modules/react-property": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/react-property/-/react-property-2.0.0.tgz", - "integrity": "sha512-kzmNjIgU32mO4mmH5+iUyrqlpFQhF8K2k7eZ4fdLSOPFrD1XgEuSBv9LDEgxRXTMBqMd8ppT0x6TIzqE5pdGdw==" - }, "node_modules/react-router": { "version": "5.3.4", "resolved": "https://registry.npmjs.org/react-router/-/react-router-5.3.4.tgz", @@ -12888,9 +12367,9 @@ } }, "node_modules/react-textarea-autosize": { - "version": "8.5.2", - "resolved": "https://registry.npmjs.org/react-textarea-autosize/-/react-textarea-autosize-8.5.2.tgz", - "integrity": "sha512-uOkyjkEl0ByEK21eCJMHDGBAAd/BoFQBawYK5XItjAmCTeSbjxghd8qnt7nzsLYzidjnoObu6M26xts0YGKsGg==", + "version": "8.5.3", + "resolved": "https://registry.npmjs.org/react-textarea-autosize/-/react-textarea-autosize-8.5.3.tgz", + "integrity": "sha512-XT1024o2pqCuZSuBt9FwHlaDeNtVrtCXu0Rnz88t1jUGheCLa3PhjE1GH8Ctm2axEtvdCl5SUHYschyQ0L5QHQ==", "dependencies": { "@babel/runtime": "^7.20.13", "use-composed-ref": "^1.3.0", @@ -13533,9 +13012,9 @@ "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" }, "node_modules/sax": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz", - "integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==" + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/sax/-/sax-1.3.0.tgz", + "integrity": "sha512-0s+oAmw9zLl1V1cS9BtZN7JAd0cW5e0QH4W3LWEK6a4LaLEA2OTpGYWDY+6XasBLtz6wkm3u1xRw95mRuJ59WA==" }, "node_modules/scheduler": { "version": "0.20.2", @@ -13564,13 +13043,10 @@ } }, "node_modules/search-insights": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/search-insights/-/search-insights-2.7.0.tgz", - "integrity": "sha512-GLbVaGgzYEKMvuJbHRhLi1qoBFnjXZGZ6l4LxOYPCp4lI2jDRB3jPU9/XNhMwv6kvnA9slTreq6pvK+b3o3aqg==", - "peer": true, - "engines": { - "node": ">=8.16.0" - } + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/search-insights/-/search-insights-2.11.0.tgz", + "integrity": "sha512-Uin2J8Bpm3xaZi9Y8QibSys6uJOFZ+REMrf42v20AA3FUDUrshKkMEP6liJbMAHCm71wO6ls4mwAf7a3gFVxLw==", + "peer": true }, "node_modules/section-matter": { "version": "1.0.0", @@ -14214,22 +13690,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/style-to-js": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/style-to-js/-/style-to-js-1.1.3.tgz", - "integrity": "sha512-zKI5gN/zb7LS/Vm0eUwjmjrXWw8IMtyA8aPBJZdYiQTXj4+wQ3IucOLIOnF7zCHxvW8UhIGh/uZh/t9zEHXNTQ==", - "dependencies": { - "style-to-object": "0.4.1" - } - }, - "node_modules/style-to-js/node_modules/style-to-object": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-0.4.1.tgz", - "integrity": "sha512-HFpbb5gr2ypci7Qw+IOhnP2zOU7e77b+rzM+wTzXzfi1PrtBCX0E7Pk4wL4iTLnhzZ+JgEGAhX81ebTg/aYjQw==", - "dependencies": { - "inline-style-parser": "0.1.1" - } - }, "node_modules/style-to-object": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-0.3.0.tgz", @@ -14833,9 +14293,9 @@ } }, "node_modules/ua-parser-js": { - "version": "1.0.35", - "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-1.0.35.tgz", - "integrity": "sha512-fKnGuqmTBnIE+/KXSzCn4db8RTigUzw1AN0DmdU6hJovUTbYJKyqj+8Mt1c4VfRDnOVJnENmfYkIPZ946UrSAA==", + "version": "1.0.37", + "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-1.0.37.tgz", + "integrity": "sha512-bhTyI94tZofjo+Dn8SN6Zv8nBDvyXTymAdM3LDI/0IboIUwTu1rEhW7v2TfiVsoYWgkQ4kOVqnI8APUFbIQIFQ==", "funding": [ { "type": "opencollective", @@ -14844,6 +14304,10 @@ { "type": "paypal", "url": "https://paypal.me/faisalman" + }, + { + "type": "github", + "url": "https://github.com/sponsors/faisalman" } ], "engines": { @@ -15058,9 +14522,9 @@ } }, "node_modules/update-browserslist-db": { - "version": "1.0.11", - "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.11.tgz", - "integrity": "sha512-dCwEFf0/oT85M1fHBg4F0jtLwJrutGoHSQXCh7u4o2t1drG+c0a9Flnqww6XUKSfQMPpJBRjU8d4RXB09qtvaA==", + "version": "1.0.13", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.13.tgz", + "integrity": "sha512-xebP81SNcPuNpPP3uzeW1NYXxI3rxyJzF3pD6sH4jE7o/IX+WtSpwnVU+qIsDPyk0d3hmFQ7mjqc6AtV604hbg==", "funding": [ { "type": "opencollective", diff --git a/docs/package.json b/docs/package.json index 6423218996d5b..0ae820aabcd78 100644 --- a/docs/package.json +++ b/docs/package.json @@ -19,12 +19,11 @@ "format:check": "prettier --check \"**/*.{js,jsx,ts,tsx,md,mdx}\"" }, "dependencies": { - "@docusaurus/core": "2.4.0", - "@docusaurus/preset-classic": "2.4.0", - "@docusaurus/remark-plugin-npm2yarn": "^2.4.0", - "@docusaurus/theme-mermaid": "2.4.0", + "@docusaurus/core": "2.4.3", + "@docusaurus/preset-classic": "2.4.3", + "@docusaurus/remark-plugin-npm2yarn": "^2.4.3", + "@docusaurus/theme-mermaid": "2.4.3", "@mdx-js/react": "^1.6.22", - "@mendable/search": "^0.0.160", "clsx": "^1.2.1", "json-loader": "^0.5.7", "process": "^0.11.10", diff --git a/docs/src/css/custom.css b/docs/src/css/custom.css index 84a7dc38f8920..b433f41aa407d 100644 --- a/docs/src/css/custom.css +++ b/docs/src/css/custom.css @@ -36,23 +36,6 @@ --ifm-color-primary-lightest: #4fddbf; } -.mendable-search { - width: 175px; -} - -/* Reduce width on mobile for Mendable Search */ -@media (max-width: 500px) { - .mendable-search { - width: 150px; - } -} - -@media (max-width: 380px) { - .mendable-search { - width: 140px; - } -} - .footer__links { margin-top: 1rem; margin-bottom: 3rem; diff --git a/docs/src/theme/SearchBar.js b/docs/src/theme/SearchBar.js deleted file mode 100644 index b5b15fe669385..0000000000000 --- a/docs/src/theme/SearchBar.js +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - * - * @format - */ -import React from "react"; -import { MendableSearchBar } from "@mendable/search"; -import useDocusaurusContext from "@docusaurus/useDocusaurusContext"; - -export default function SearchBarWrapper() { - const { - siteConfig: { customFields }, - } = useDocusaurusContext(); - return ( -
    - -
    - ); -} From c51001f01ee47daf97e3004fc434c84dd0e2a694 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 4 Dec 2023 19:03:19 -0800 Subject: [PATCH 34/49] fix comet tracer (#14259) --- .../tests/unit_tests/callbacks/tracers/test_comet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/langchain/tests/unit_tests/callbacks/tracers/test_comet.py b/libs/langchain/tests/unit_tests/callbacks/tracers/test_comet.py index 537bc64e4554d..3dff6520d477e 100644 --- a/libs/langchain/tests/unit_tests/callbacks/tracers/test_comet.py +++ b/libs/langchain/tests/unit_tests/callbacks/tracers/test_comet.py @@ -44,14 +44,14 @@ def test_comet_tracer__trace_chain_with_single_span__happyflow() -> None: # Parent run tracer.on_chain_start( {"name": "chain-input"}, - ["chain-input-prompt"], + {"input": "chain-input-prompt"}, parent_run_id=None, run_id=run_id_1, ) # Check that chain was created chain_module_mock.Chain.assert_called_once_with( - inputs={"input": ["chain-input-prompt"]}, + inputs={"input": "chain-input-prompt"}, metadata=None, experiment_info="the-experiment-info", ) From 0d47d15a9f6ea9880404595fe86ac19300aec348 Mon Sep 17 00:00:00 2001 From: cxumol Date: Mon, 4 Dec 2023 19:25:05 -0800 Subject: [PATCH 35/49] add(feat): Text Embeddings by Cloudflare Workers AI (#14220) Add [Text Embeddings by Cloudflare Workers AI](https://developers.cloudflare.com/workers-ai/models/text-embeddings/). It's a new integration. Trying to align it with its langchain-js version counterpart [here](https://api.js.langchain.com/classes/embeddings_cloudflare_workersai.CloudflareWorkersAIEmbeddings.html). - Dependencies: N/A - Done `make format` `make lint` `make spell_check` `make integration_tests` and all my changes was passed --- .../text_embedding/cloudflare_workersai.ipynb | 125 ++++++++++++++++++ .../embeddings/cloudflare_workersai.py | 94 +++++++++++++ .../embeddings/test_cloudflare_workersai.py | 53 ++++++++ 3 files changed, 272 insertions(+) create mode 100644 docs/docs/integrations/text_embedding/cloudflare_workersai.ipynb create mode 100644 libs/langchain/langchain/embeddings/cloudflare_workersai.py create mode 100644 libs/langchain/tests/integration_tests/embeddings/test_cloudflare_workersai.py diff --git a/docs/docs/integrations/text_embedding/cloudflare_workersai.ipynb b/docs/docs/integrations/text_embedding/cloudflare_workersai.ipynb new file mode 100644 index 0000000000000..25c6ce9601092 --- /dev/null +++ b/docs/docs/integrations/text_embedding/cloudflare_workersai.ipynb @@ -0,0 +1,125 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "59428e05", + "metadata": {}, + "source": [ + "# Text Embeddings on Cloudflare Workers AI\n", + "\n", + "[Cloudflare AI document](https://developers.cloudflare.com/workers-ai/models/text-embeddings/) listed all text embeddings models available.\n", + "\n", + "Both Cloudflare account ID and API token are required. Find how to obtain them from [this document](https://developers.cloudflare.com/workers-ai/get-started/rest-api/).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "92c5b61e", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings.cloudflare_workersai import CloudflareWorkersAIEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f60023b8", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "\n", + "my_account_id = getpass.getpass(\"Enter your Cloudflare account ID:\\n\\n\")\n", + "my_api_token = getpass.getpass(\"Enter your Cloudflare API token:\\n\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "062547b9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(384, [-0.033627357333898544, 0.03982774540781975, 0.03559349477291107])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "embeddings = CloudflareWorkersAIEmbeddings(\n", + " account_id=my_account_id,\n", + " api_token=my_api_token,\n", + " model_name=\"@cf/baai/bge-small-en-v1.5\",\n", + ")\n", + "# single string embeddings\n", + "query_result = embeddings.embed_query(\"test\")\n", + "len(query_result), query_result[:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e1dcc4bd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(3, 384)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# string embeddings in batches\n", + "batch_query_result = embeddings.embed_documents([\"test1\", \"test2\", \"test3\"])\n", + "len(batch_query_result), len(batch_query_result[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52de8b88", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + }, + "vscode": { + "interpreter": { + "hash": "7377c2ccc78bc62c2683122d48c8cd1fb85a53850a1b1fc29736ed39852c9885" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/embeddings/cloudflare_workersai.py b/libs/langchain/langchain/embeddings/cloudflare_workersai.py new file mode 100644 index 0000000000000..8d7e410340122 --- /dev/null +++ b/libs/langchain/langchain/embeddings/cloudflare_workersai.py @@ -0,0 +1,94 @@ +from typing import Any, Dict, List + +import requests +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseModel, Extra + +DEFAULT_MODEL_NAME = "@cf/baai/bge-base-en-v1.5" + + +class CloudflareWorkersAIEmbeddings(BaseModel, Embeddings): + """Cloudflare Workers AI embedding model. + + To use, you need to provide an API token and + account ID to access Cloudflare Workers AI. + + Example: + .. code-block:: python + + from langchain.embeddings import CloudflareWorkersAIEmbeddings + + account_id = "my_account_id" + api_token = "my_secret_api_token" + model_name = "@cf/baai/bge-small-en-v1.5" + + cf = CloudflareWorkersAIEmbeddings( + account_id=account_id, + api_token=api_token, + model_name=model_name + ) + """ + + api_base_url: str = "https://api.cloudflare.com/client/v4/accounts" + account_id: str + api_token: str + model_name: str = DEFAULT_MODEL_NAME + batch_size: int = 50 + strip_new_lines: bool = True + headers: Dict[str, str] = {"Authorization": "Bearer "} + + def __init__(self, **kwargs: Any): + """Initialize the Cloudflare Workers AI client.""" + super().__init__(**kwargs) + + self.headers = {"Authorization": f"Bearer {self.api_token}"} + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using Cloudflare Workers AI. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + if self.strip_new_lines: + texts = [text.replace("\n", " ") for text in texts] + + batches = [ + texts[i : i + self.batch_size] + for i in range(0, len(texts), self.batch_size) + ] + embeddings = [] + + for batch in batches: + response = requests.post( + f"{self.api_base_url}/{self.account_id}/ai/run/{self.model_name}", + headers=self.headers, + json={"text": batch}, + ) + embeddings.extend(response.json()["result"]["data"]) + + return embeddings + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using Cloudflare Workers AI. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + text = text.replace("\n", " ") if self.strip_new_lines else text + response = requests.post( + f"{self.api_base_url}/{self.account_id}/ai/run/{self.model_name}", + headers=self.headers, + json={"text": [text]}, + ) + return response.json()["result"]["data"][0] diff --git a/libs/langchain/tests/integration_tests/embeddings/test_cloudflare_workersai.py b/libs/langchain/tests/integration_tests/embeddings/test_cloudflare_workersai.py new file mode 100644 index 0000000000000..24ac031371728 --- /dev/null +++ b/libs/langchain/tests/integration_tests/embeddings/test_cloudflare_workersai.py @@ -0,0 +1,53 @@ +"""Test Cloudflare Workers AI embeddings.""" + +import responses + +from langchain.embeddings.cloudflare_workersai import CloudflareWorkersAIEmbeddings + + +@responses.activate +def test_cloudflare_workers_ai_embedding_documents() -> None: + """Test Cloudflare Workers AI embeddings.""" + documents = ["foo bar", "foo bar", "foo bar"] + + responses.add( + responses.POST, + "https://api.cloudflare.com/client/v4/accounts/123/ai/run/@cf/baai/bge-base-en-v1.5", + json={ + "result": { + "shape": [3, 768], + "data": [[0.0] * 768, [0.0] * 768, [0.0] * 768], + }, + "success": "true", + "errors": [], + "messages": [], + }, + ) + + embeddings = CloudflareWorkersAIEmbeddings(account_id="123", api_token="abc") + output = embeddings.embed_documents(documents) + + assert len(output) == 3 + assert len(output[0]) == 768 + + +@responses.activate +def test_cloudflare_workers_ai_embedding_query() -> None: + """Test Cloudflare Workers AI embeddings.""" + + responses.add( + responses.POST, + "https://api.cloudflare.com/client/v4/accounts/123/ai/run/@cf/baai/bge-base-en-v1.5", + json={ + "result": {"shape": [1, 768], "data": [[0.0] * 768]}, + "success": "true", + "errors": [], + "messages": [], + }, + ) + + document = "foo bar" + embeddings = CloudflareWorkersAIEmbeddings(account_id="123", api_token="abc") + output = embeddings.embed_query(document) + + assert len(output) == 768 From 2213fc9711f944401d8bfeb386b3b7520677c8e7 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 4 Dec 2023 19:42:15 -0800 Subject: [PATCH 36/49] Harrison/bookend ai (#14258) Co-authored-by: stvhu-bookend <142813359+stvhu-bookend@users.noreply.github.com> --- .../integrations/text_embedding/bookend.ipynb | 89 ++++++++++++++++++ .../langchain/embeddings/__init__.py | 2 + .../langchain/langchain/embeddings/bookend.py | 91 +++++++++++++++++++ .../embeddings/test_bookend.py | 27 ++++++ .../unit_tests/embeddings/test_imports.py | 1 + 5 files changed, 210 insertions(+) create mode 100644 docs/docs/integrations/text_embedding/bookend.ipynb create mode 100644 libs/langchain/langchain/embeddings/bookend.py create mode 100644 libs/langchain/tests/integration_tests/embeddings/test_bookend.py diff --git a/docs/docs/integrations/text_embedding/bookend.ipynb b/docs/docs/integrations/text_embedding/bookend.ipynb new file mode 100644 index 0000000000000..3277d49a6c885 --- /dev/null +++ b/docs/docs/integrations/text_embedding/bookend.ipynb @@ -0,0 +1,89 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2c591a6a42ac7f0", + "metadata": {}, + "source": [ + "# Bookend AI\n", + "\n", + "Let's load the Bookend AI Embeddings class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d94c62b4", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings import BookendEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "523a09e3", + "metadata": {}, + "outputs": [], + "source": [ + "embeddings = BookendEmbeddings(\n", + " domain=\"your_domain\",\n", + " api_token=\"your_api_token\",\n", + " model_id=\"your_embeddings_model_id\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b212bd5a", + "metadata": {}, + "outputs": [], + "source": [ + "text = \"This is a test document.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57db66bd", + "metadata": {}, + "outputs": [], + "source": [ + "query_result = embeddings.embed_query(text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b790fd09", + "metadata": {}, + "outputs": [], + "source": [ + "doc_result = embeddings.embed_documents([text])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/embeddings/__init__.py b/libs/langchain/langchain/embeddings/__init__.py index 8f2887942562e..3710a6e1969fa 100644 --- a/libs/langchain/langchain/embeddings/__init__.py +++ b/libs/langchain/langchain/embeddings/__init__.py @@ -22,6 +22,7 @@ from langchain.embeddings.azure_openai import AzureOpenAIEmbeddings from langchain.embeddings.baidu_qianfan_endpoint import QianfanEmbeddingsEndpoint from langchain.embeddings.bedrock import BedrockEmbeddings +from langchain.embeddings.bookend import BookendEmbeddings from langchain.embeddings.cache import CacheBackedEmbeddings from langchain.embeddings.clarifai import ClarifaiEmbeddings from langchain.embeddings.cohere import CohereEmbeddings @@ -127,6 +128,7 @@ "QianfanEmbeddingsEndpoint", "JohnSnowLabsEmbeddings", "VoyageEmbeddings", + "BookendEmbeddings", ] diff --git a/libs/langchain/langchain/embeddings/bookend.py b/libs/langchain/langchain/embeddings/bookend.py new file mode 100644 index 0000000000000..0b1116c4ba947 --- /dev/null +++ b/libs/langchain/langchain/embeddings/bookend.py @@ -0,0 +1,91 @@ +"""Wrapper around Bookend AI embedding models.""" + +import json +from typing import Any, List + +import requests + +from langchain.pydantic_v1 import BaseModel, Field +from langchain.schema.embeddings import Embeddings + +API_URL = "https://api.bookend.ai/" +DEFAULT_TASK = "embeddings" +PATH = "/models/predict" + + +class BookendEmbeddings(BaseModel, Embeddings): + """Bookend AI sentence_transformers embedding models. + + Example: + .. code-block:: python + + from langchain.embeddings import BookendEmbeddings + + bookend = BookendEmbeddings( + domain={domain} + api_token={api_token} + model_id={model_id} + ) + bookend.embed_documents([ + "Please put on these earmuffs because I can't you hear.", + "Baby wipes are made of chocolate stardust.", + ]) + bookend.embed_query( + "She only paints with bold colors; she does not like pastels." + ) + """ + + domain: str + """Request for a domain at https://bookend.ai/ to use this embeddings module.""" + api_token: str + """Request for an API token at https://bookend.ai/ to use this embeddings module.""" + model_id: str + """Embeddings model ID to use.""" + auth_header: dict = Field(default_factory=dict) + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + self.auth_header = {"Authorization": "Basic {}".format(self.api_token)} + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Embed documents using a Bookend deployed embeddings model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + result = [] + headers = self.auth_header + headers["Content-Type"] = "application/json; charset=utf-8" + params = { + "model_id": self.model_id, + "task": DEFAULT_TASK, + } + + for text in texts: + data = json.dumps( + {"text": text, "question": None, "context": None, "instruction": None} + ) + r = requests.request( + "POST", + API_URL + self.domain + PATH, + headers=headers, + params=params, + data=data, + ) + result.append(r.json()[0]["data"]) + + return result + + def embed_query(self, text: str) -> List[float]: + """Embed a query using a Bookend deployed embeddings model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + return self.embed_documents([text])[0] diff --git a/libs/langchain/tests/integration_tests/embeddings/test_bookend.py b/libs/langchain/tests/integration_tests/embeddings/test_bookend.py new file mode 100644 index 0000000000000..940f67063802c --- /dev/null +++ b/libs/langchain/tests/integration_tests/embeddings/test_bookend.py @@ -0,0 +1,27 @@ +"""Test Bookend AI embeddings.""" +from langchain.embeddings.bookend import BookendEmbeddings + + +def test_bookend_embedding_documents() -> None: + """Test Bookend AI embeddings for documents.""" + documents = ["foo bar", "bar foo"] + embedding = BookendEmbeddings( + domain="", + api_token="", + model_id="", + ) + output = embedding.embed_documents(documents) + assert len(output) == 2 + assert len(output[0]) == 768 + + +def test_bookend_embedding_query() -> None: + """Test Bookend AI embeddings for query.""" + document = "foo bar" + embedding = BookendEmbeddings( + domain="", + api_token="", + model_id="", + ) + output = embedding.embed_query(document) + assert len(output) == 768 diff --git a/libs/langchain/tests/unit_tests/embeddings/test_imports.py b/libs/langchain/tests/unit_tests/embeddings/test_imports.py index 9de69602dc6a7..8fe5df0994a50 100644 --- a/libs/langchain/tests/unit_tests/embeddings/test_imports.py +++ b/libs/langchain/tests/unit_tests/embeddings/test_imports.py @@ -53,6 +53,7 @@ "QianfanEmbeddingsEndpoint", "JohnSnowLabsEmbeddings", "VoyageEmbeddings", + "BookendEmbeddings", ] From fcc8e5e83909a2474488e1932d1043591cb57af1 Mon Sep 17 00:00:00 2001 From: Hamza Ahmed <29238429+Hamzahmed@users.noreply.github.com> Date: Mon, 4 Dec 2023 22:44:30 -0500 Subject: [PATCH 37/49] Update geodataframe.py (#13573) here it is validating shapely.geometry.point.Point: if not isinstance(data_frame[page_content_column].iloc[0], gpd.GeoSeries): raise ValueError( f"Expected data_frame[{page_content_column}] to be a GeoSeries" you need it to validate the geoSeries and not the shapely.geometry.point.Point if not isinstance(data_frame[page_content_column], gpd.GeoSeries): raise ValueError( f"Expected data_frame[{page_content_column}] to be a GeoSeries" --- libs/langchain/langchain/document_loaders/geodataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain/langchain/document_loaders/geodataframe.py b/libs/langchain/langchain/document_loaders/geodataframe.py index cd988b7c67788..afb03ea10443f 100644 --- a/libs/langchain/langchain/document_loaders/geodataframe.py +++ b/libs/langchain/langchain/document_loaders/geodataframe.py @@ -35,7 +35,7 @@ def __init__(self, data_frame: Any, page_content_column: str = "geometry"): f"Expected data_frame to have a column named {page_content_column}" ) - if not isinstance(data_frame[page_content_column].iloc[0], gpd.GeoSeries): + if not isinstance(data_frame[page_content_column], gpd.GeoSeries): raise ValueError( f"Expected data_frame[{page_content_column}] to be a GeoSeries" ) From e042e5df35288fb2809d9947c9e1ca8c3266cd12 Mon Sep 17 00:00:00 2001 From: Jon Watte Date: Mon, 4 Dec 2023 19:44:50 -0800 Subject: [PATCH 38/49] fix: call _on_llm_error() (#13581) Description: There's a copy-paste typo where on_llm_error() calls _on_chain_error() instead of _on_llm_error(). Issue: #13580 Dependencies: None Tag maintainer: @hwchase17 Twitter handle: @jwatte "Run `make format`, `make lint` and `make test` to check this locally." The test scripts don't work in a plain Ubuntu LTS 20.04 system. It looks like the dev container pulling is stuck. Or maybe the internet is just ornery today. --------- Co-authored-by: jwatte Co-authored-by: Harrison Chase --- libs/core/langchain_core/tracers/base.py | 2 +- .../callbacks/tracers/test_base_tracer.py | 36 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/libs/core/langchain_core/tracers/base.py b/libs/core/langchain_core/tracers/base.py index db0301b2a1a5f..ddf9e92e2a6f2 100644 --- a/libs/core/langchain_core/tracers/base.py +++ b/libs/core/langchain_core/tracers/base.py @@ -224,7 +224,7 @@ def on_llm_error( llm_run.end_time = datetime.utcnow() llm_run.events.append({"name": "error", "time": llm_run.end_time}) self._end_trace(llm_run) - self._on_chain_error(llm_run) + self._on_llm_error(llm_run) return llm_run def on_chain_start( diff --git a/libs/langchain/tests/unit_tests/callbacks/tracers/test_base_tracer.py b/libs/langchain/tests/unit_tests/callbacks/tracers/test_base_tracer.py index f658abe260854..94be5295c2f3f 100644 --- a/libs/langchain/tests/unit_tests/callbacks/tracers/test_base_tracer.py +++ b/libs/langchain/tests/unit_tests/callbacks/tracers/test_base_tracer.py @@ -332,6 +332,42 @@ def test_tracer_llm_run_on_error() -> None: assert tracer.runs == [compare_run] +@freeze_time("2023-01-01") +def test_tracer_llm_run_on_error_callback() -> None: + """Test tracer on an LLM run with an error and a callback.""" + exception = Exception("test") + uuid = uuid4() + + compare_run = Run( + id=str(uuid), + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + events=[ + {"name": "start", "time": datetime.utcnow()}, + {"name": "error", "time": datetime.utcnow()}, + ], + extra={}, + execution_order=1, + child_execution_order=1, + serialized=SERIALIZED, + inputs=dict(prompts=[]), + outputs=None, + error=repr(exception), + run_type="llm", + ) + + class FakeTracerWithLlmErrorCallback(FakeTracer): + error_run = None + + def _on_llm_error(self, run: Run) -> None: + self.error_run = run + + tracer = FakeTracerWithLlmErrorCallback() + tracer.on_llm_start(serialized=SERIALIZED, prompts=[], run_id=uuid) + tracer.on_llm_error(exception, run_id=uuid) + assert tracer.error_run == compare_run + + @freeze_time("2023-01-01") def test_tracer_chain_run_on_error() -> None: """Test tracer on a Chain run with an error.""" From b49104c2c9a2adfd49d442dff5b64c3bf3afc79f Mon Sep 17 00:00:00 2001 From: Nolan Date: Mon, 4 Dec 2023 19:53:27 -0800 Subject: [PATCH 39/49] Add missing doc key to metadata field in AzureSearch Vectorstore (#13328) - **Description:** Adds doc key to metadata field when adding document to Azure Search. - **Issue:** -, - **Dependencies:** -, - **Tag maintainer:** @eyurtsev, - **Twitter handle:** @finnless Right now the document key with the name FIELDS_ID is not included in the FIELDS_METADATA field, and therefore is not included in the Document returned from a query. This is really annoying if you want to be able to modify that item in the vectorstore. Other's thoughts on this are welcome. --- .../langchain/vectorstores/azuresearch.py | 43 +++++++++++++++---- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/libs/langchain/langchain/vectorstores/azuresearch.py b/libs/langchain/langchain/vectorstores/azuresearch.py index 73c96f738ae7c..2cd1f9546de72 100644 --- a/libs/langchain/langchain/vectorstores/azuresearch.py +++ b/libs/langchain/langchain/vectorstores/azuresearch.py @@ -391,10 +391,21 @@ def vector_search_with_score( ( Document( page_content=result.pop(FIELDS_CONTENT), - metadata=json.loads(result[FIELDS_METADATA]) - if FIELDS_METADATA in result - else { - k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR + metadata={ + **( + {FIELDS_ID: result.pop(FIELDS_ID)} + if FIELDS_ID in result + else {} + ), + **( + json.loads(result[FIELDS_METADATA]) + if FIELDS_METADATA in result + else { + k: v + for k, v in result.items() + if k != FIELDS_CONTENT_VECTOR + } + ), }, ), float(result["@search.score"]), @@ -452,10 +463,21 @@ def hybrid_search_with_score( ( Document( page_content=result.pop(FIELDS_CONTENT), - metadata=json.loads(result[FIELDS_METADATA]) - if FIELDS_METADATA in result - else { - k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR + metadata={ + **( + {FIELDS_ID: result.pop(FIELDS_ID)} + if FIELDS_ID in result + else {} + ), + **( + json.loads(result[FIELDS_METADATA]) + if FIELDS_METADATA in result + else { + k: v + for k, v in result.items() + if k != FIELDS_CONTENT_VECTOR + } + ), }, ), float(result["@search.score"]), @@ -547,6 +569,11 @@ def semantic_hybrid_search_with_score_and_rerank( Document( page_content=result.pop(FIELDS_CONTENT), metadata={ + **( + {FIELDS_ID: result.pop(FIELDS_ID)} + if FIELDS_ID in result + else {} + ), **( json.loads(result[FIELDS_METADATA]) if FIELDS_METADATA in result From 956d55de2b434c62ff28175d4b234e5f698fbf82 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 4 Dec 2023 20:08:41 -0800 Subject: [PATCH 40/49] docs[patch]: chat model page names (#14264) --- docs/docs/integrations/chat/anthropic.ipynb | 12 +++- docs/docs/integrations/chat/anyscale.ipynb | 14 +++- .../integrations/chat/azure_chat_openai.ipynb | 12 +++- .../chat/azureml_chat_endpoint.ipynb | 11 ++- docs/docs/integrations/chat/baichuan.ipynb | 59 +++++++++------- .../chat/baidu_qianfan_endpoint.ipynb | 11 ++- docs/docs/integrations/chat/bedrock.ipynb | 12 +++- docs/docs/integrations/chat/cohere.ipynb | 12 +++- docs/docs/integrations/chat/ernie.ipynb | 11 ++- docs/docs/integrations/chat/everlyai.ipynb | 12 +++- docs/docs/integrations/chat/fireworks.ipynb | 12 +++- .../chat/google_vertex_ai_palm.ipynb | 11 ++- docs/docs/integrations/chat/hunyuan.ipynb | 69 +++++++++++-------- docs/docs/integrations/chat/konko.ipynb | 11 ++- docs/docs/integrations/chat/litellm.ipynb | 12 +++- docs/docs/integrations/chat/llama2_chat.ipynb | 14 +++- docs/docs/integrations/chat/llama_api.ipynb | 12 +++- docs/docs/integrations/chat/minimax.ipynb | 11 ++- docs/docs/integrations/chat/ollama.ipynb | 11 ++- .../integrations/chat/ollama_functions.ipynb | 11 ++- docs/docs/integrations/chat/openai.ipynb | 12 +++- .../chat/pai_eas_chat_endpoint.ipynb | 11 ++- .../chat/promptlayer_chatopenai.ipynb | 12 +++- docs/docs/integrations/chat/tongyi.ipynb | 13 +++- docs/docs/integrations/chat/vllm.ipynb | 10 +++ .../integrations/chat/volcengine_maas.ipynb | 20 +++++- docs/docs/integrations/chat/yandex.ipynb | 12 +++- 27 files changed, 350 insertions(+), 80 deletions(-) diff --git a/docs/docs/integrations/chat/anthropic.ipynb b/docs/docs/integrations/chat/anthropic.ipynb index 00ad314f45dc0..5c99085bb933b 100644 --- a/docs/docs/integrations/chat/anthropic.ipynb +++ b/docs/docs/integrations/chat/anthropic.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "a016701c", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Anthropic\n", + "---" + ] + }, { "cell_type": "markdown", "id": "bf733a38-db84-4363-89e2-de6735c37230", "metadata": {}, "source": [ - "# Anthropic\n", + "# ChatAnthropic\n", "\n", "This notebook covers how to get started with Anthropic chat models." ] diff --git a/docs/docs/integrations/chat/anyscale.ipynb b/docs/docs/integrations/chat/anyscale.ipynb index 674549a6568c8..3d2e9e80cba4d 100644 --- a/docs/docs/integrations/chat/anyscale.ipynb +++ b/docs/docs/integrations/chat/anyscale.ipynb @@ -1,12 +1,22 @@ { "cells": [ + { + "cell_type": "raw", + "id": "31895fc4", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Anyscale\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "id": "642fd21c-600a-47a1-be96-6e1438b421a9", "metadata": {}, "source": [ - "# Anyscale\n", + "# ChatAnyscale\n", "\n", "This notebook demonstrates the use of `langchain.chat_models.ChatAnyscale` for [Anyscale Endpoints](https://endpoints.anyscale.com/).\n", "\n", @@ -33,7 +43,7 @@ "metadata": {}, "outputs": [ { - "name": "stdin", + "name": "stdout", "output_type": "stream", "text": [ " ········\n" diff --git a/docs/docs/integrations/chat/azure_chat_openai.ipynb b/docs/docs/integrations/chat/azure_chat_openai.ipynb index b4568ca2fd0aa..a6bee8f49cb39 100644 --- a/docs/docs/integrations/chat/azure_chat_openai.ipynb +++ b/docs/docs/integrations/chat/azure_chat_openai.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "641f8cb0", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Azure OpenAI\n", + "---" + ] + }, { "cell_type": "markdown", "id": "38f26d7a", "metadata": {}, "source": [ - "# Azure OpenAI\n", + "# AzureChatOpenAI\n", "\n", ">[Azure OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview) provides REST API access to OpenAI's powerful language models including the GPT-4, GPT-3.5-Turbo, and Embeddings model series. These models can be easily adapted to your specific task including but not limited to content generation, summarization, semantic search, and natural language to code translation. Users can access the service through REST APIs, Python SDK, or a web-based interface in the Azure OpenAI Studio.\n", "\n", diff --git a/docs/docs/integrations/chat/azureml_chat_endpoint.ipynb b/docs/docs/integrations/chat/azureml_chat_endpoint.ipynb index 4444f7fdf672e..2e0a09aff45a9 100644 --- a/docs/docs/integrations/chat/azureml_chat_endpoint.ipynb +++ b/docs/docs/integrations/chat/azureml_chat_endpoint.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Azure ML Endpoint\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Azure ML Endpoint\n", + "# AzureMLChatOnlineEndpoint\n", "\n", ">[Azure Machine Learning](https://azure.microsoft.com/en-us/products/machine-learning/) is a platform used to build, train, and deploy machine learning models. Users can explore the types of models to deploy in the Model Catalog, which provides Azure Foundation Models and OpenAI Models. `Azure Foundation Models` include various open-source models and popular Hugging Face models. Users can also import models of their liking into AzureML.\n", ">\n", diff --git a/docs/docs/integrations/chat/baichuan.ipynb b/docs/docs/integrations/chat/baichuan.ipynb index 9f9376beeeaf8..462a6aa2bc0e8 100644 --- a/docs/docs/integrations/chat/baichuan.ipynb +++ b/docs/docs/integrations/chat/baichuan.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Baichuan Chat\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Baichuan Chat\n", + "# ChatBaichuan\n", "\n", "Baichuan chat models API by Baichuan Intelligent Technology. For more information, see [https://platform.baichuan-ai.com/docs/api](https://platform.baichuan-ai.com/docs/api)" ] @@ -63,7 +72,9 @@ "outputs": [ { "data": { - "text/plain": "AIMessage(content='首先,我们需要确定闰年的二月有多少天。闰年的二月有29天。\\n\\n然后,我们可以计算你的月薪:\\n\\n日薪 = 月薪 / (当月天数)\\n\\n所以,你的月薪 = 日薪 * 当月天数\\n\\n将数值代入公式:\\n\\n月薪 = 8元/天 * 29天 = 232元\\n\\n因此,你在闰年的二月的月薪是232元。')" + "text/plain": [ + "AIMessage(content='首先,我们需要确定闰年的二月有多少天。闰年的二月有29天。\\n\\n然后,我们可以计算你的月薪:\\n\\n日薪 = 月薪 / (当月天数)\\n\\n所以,你的月薪 = 日薪 * 当月天数\\n\\n将数值代入公式:\\n\\n月薪 = 8元/天 * 29天 = 232元\\n\\n因此,你在闰年的二月的月薪是232元。')" + ] }, "execution_count": 3, "metadata": {}, @@ -76,16 +87,23 @@ }, { "cell_type": "markdown", - "source": [ - "## For ChatBaichuan with Streaming" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## For ChatBaichuan with Streaming" + ] }, { "cell_type": "code", "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-17T15:14:25.870044Z", + "start_time": "2023-10-17T15:14:25.863381Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "chat = ChatBaichuan(\n", @@ -93,22 +111,24 @@ " baichuan_secret_key=\"YOUR_SECRET_KEY\",\n", " streaming=True,\n", ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-17T15:14:25.870044Z", - "start_time": "2023-10-17T15:14:25.863381Z" - } - } + ] }, { "cell_type": "code", "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-17T15:14:27.153546Z", + "start_time": "2023-10-17T15:14:25.868470Z" + }, + "collapsed": false + }, "outputs": [ { "data": { - "text/plain": "AIMessageChunk(content='首先,我们需要确定闰年的二月有多少天。闰年的二月有29天。\\n\\n然后,我们可以计算你的月薪:\\n\\n日薪 = 月薪 / (当月天数)\\n\\n所以,你的月薪 = 日薪 * 当月天数\\n\\n将数值代入公式:\\n\\n月薪 = 8元/天 * 29天 = 232元\\n\\n因此,你在闰年的二月的月薪是232元。')" + "text/plain": [ + "AIMessageChunk(content='首先,我们需要确定闰年的二月有多少天。闰年的二月有29天。\\n\\n然后,我们可以计算你的月薪:\\n\\n日薪 = 月薪 / (当月天数)\\n\\n所以,你的月薪 = 日薪 * 当月天数\\n\\n将数值代入公式:\\n\\n月薪 = 8元/天 * 29天 = 232元\\n\\n因此,你在闰年的二月的月薪是232元。')" + ] }, "execution_count": 6, "metadata": {}, @@ -117,14 +137,7 @@ ], "source": [ "chat([HumanMessage(content=\"我日薪8块钱,请问在闰年的二月,我月薪多少\")])" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-17T15:14:27.153546Z", - "start_time": "2023-10-17T15:14:25.868470Z" - } - } + ] } ], "metadata": { diff --git a/docs/docs/integrations/chat/baidu_qianfan_endpoint.ipynb b/docs/docs/integrations/chat/baidu_qianfan_endpoint.ipynb index 57749548a8b1a..65f7826815686 100644 --- a/docs/docs/integrations/chat/baidu_qianfan_endpoint.ipynb +++ b/docs/docs/integrations/chat/baidu_qianfan_endpoint.ipynb @@ -1,11 +1,20 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Baidu Qianfan\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "# Baidu Qianfan\n", + "# QianfanChatEndpoint\n", "\n", "Baidu AI Cloud Qianfan Platform is a one-stop large model development and service operation platform for enterprise developers. Qianfan not only provides including the model of Wenxin Yiyan (ERNIE-Bot) and the third-party open-source models, but also provides various AI development tools and the whole set of development environment, which facilitates customers to use and develop large model applications easily.\n", "\n", diff --git a/docs/docs/integrations/chat/bedrock.ipynb b/docs/docs/integrations/chat/bedrock.ipynb index 02dfb5b9fbde7..3957c9c1e4645 100644 --- a/docs/docs/integrations/chat/bedrock.ipynb +++ b/docs/docs/integrations/chat/bedrock.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "fbc66410", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Bedrock Chat\n", + "---" + ] + }, { "cell_type": "markdown", "id": "bf733a38-db84-4363-89e2-de6735c37230", "metadata": {}, "source": [ - "# Bedrock Chat\n", + "# BedrockChat\n", "\n", ">[Amazon Bedrock](https://aws.amazon.com/bedrock/) is a fully managed service that offers a choice of \n", "> high-performing foundation models (FMs) from leading AI companies like `AI21 Labs`, `Anthropic`, `Cohere`, \n", diff --git a/docs/docs/integrations/chat/cohere.ipynb b/docs/docs/integrations/chat/cohere.ipynb index e9b90af3d215c..8f05b1c66739c 100644 --- a/docs/docs/integrations/chat/cohere.ipynb +++ b/docs/docs/integrations/chat/cohere.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "53fbf15f", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Cohere\n", + "---" + ] + }, { "cell_type": "markdown", "id": "bf733a38-db84-4363-89e2-de6735c37230", "metadata": {}, "source": [ - "# Cohere\n", + "# ChatCohere\n", "\n", "This notebook covers how to get started with Cohere chat models." ] diff --git a/docs/docs/integrations/chat/ernie.ipynb b/docs/docs/integrations/chat/ernie.ipynb index bcd28fd9cfb09..d98fcdb592f3f 100644 --- a/docs/docs/integrations/chat/ernie.ipynb +++ b/docs/docs/integrations/chat/ernie.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Ernie Bot Chat\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# ERNIE-Bot Chat\n", + "# ErnieBotChat\n", "\n", "[ERNIE-Bot](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/jlil56u11) is a large language model developed by Baidu, covering a huge amount of Chinese data.\n", "This notebook covers how to get started with ErnieBot chat models.\n", diff --git a/docs/docs/integrations/chat/everlyai.ipynb b/docs/docs/integrations/chat/everlyai.ipynb index 3310f8f21397b..3f18b36f7e6e3 100644 --- a/docs/docs/integrations/chat/everlyai.ipynb +++ b/docs/docs/integrations/chat/everlyai.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "5e45f35c", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: EverlyAI\n", + "---" + ] + }, { "cell_type": "markdown", "id": "642fd21c-600a-47a1-be96-6e1438b421a9", "metadata": {}, "source": [ - "# EverlyAI\n", + "# ChatEverlyAI\n", "\n", ">[EverlyAI](https://everlyai.xyz) allows you to run your ML models at scale in the cloud. It also provides API access to [several LLM models](https://everlyai.xyz).\n", "\n", diff --git a/docs/docs/integrations/chat/fireworks.ipynb b/docs/docs/integrations/chat/fireworks.ipynb index 6a5b0ad01d992..a0a3932bc5ef5 100644 --- a/docs/docs/integrations/chat/fireworks.ipynb +++ b/docs/docs/integrations/chat/fireworks.ipynb @@ -1,12 +1,22 @@ { "cells": [ + { + "cell_type": "raw", + "id": "529aeba9", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Fireworks\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "id": "642fd21c-600a-47a1-be96-6e1438b421a9", "metadata": {}, "source": [ - "# Fireworks\n", + "# ChatFireworks\n", "\n", ">[Fireworks](https://app.fireworks.ai/) accelerates product development on generative AI by creating an innovative AI experiment and production platform. \n", "\n", diff --git a/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb b/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb index af44d316e90a6..436e2fd14246e 100644 --- a/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb +++ b/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb @@ -1,11 +1,20 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Google Cloud Vertex AI\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "# Google Cloud Vertex AI \n", + "# ChatVertexAI\n", "\n", "Note: This is separate from the Google PaLM integration. Google has chosen to offer an enterprise version of PaLM through GCP, and this supports the models made available through there. \n", "\n", diff --git a/docs/docs/integrations/chat/hunyuan.ipynb b/docs/docs/integrations/chat/hunyuan.ipynb index 20779607dc56b..2cb334bfb937a 100644 --- a/docs/docs/integrations/chat/hunyuan.ipynb +++ b/docs/docs/integrations/chat/hunyuan.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Tencent Hunyuan\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Tencent Hunyuan\n", + "# ChatHunyuan\n", "\n", "Hunyuan chat model API by Tencent. For more information, see [https://cloud.tencent.com/document/product/1729](https://cloud.tencent.com/document/product/1729)" ] @@ -54,7 +63,9 @@ "outputs": [ { "data": { - "text/plain": "AIMessage(content=\"J'aime programmer.\")" + "text/plain": [ + "AIMessage(content=\"J'aime programmer.\")" + ] }, "execution_count": 3, "metadata": {}, @@ -73,16 +84,23 @@ }, { "cell_type": "markdown", - "source": [ - "## For ChatHunyuan with Streaming" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## For ChatHunyuan with Streaming" + ] }, { "cell_type": "code", "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-19T10:20:41.507720Z", + "start_time": "2023-10-19T10:20:41.496456Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "chat = ChatHunyuan(\n", @@ -91,22 +109,24 @@ " hunyuan_secret_key=\"YOUR_SECRET_KEY\",\n", " streaming=True,\n", ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-19T10:20:41.507720Z", - "start_time": "2023-10-19T10:20:41.496456Z" - } - } + ] }, { "cell_type": "code", "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-19T10:20:46.275673Z", + "start_time": "2023-10-19T10:20:44.241097Z" + }, + "collapsed": false + }, "outputs": [ { "data": { - "text/plain": "AIMessageChunk(content=\"J'aime programmer.\")" + "text/plain": [ + "AIMessageChunk(content=\"J'aime programmer.\")" + ] }, "execution_count": 3, "metadata": {}, @@ -121,26 +141,19 @@ " )\n", " ]\n", ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-19T10:20:46.275673Z", - "start_time": "2023-10-19T10:20:44.241097Z" - } - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [], "metadata": { - "collapsed": false, "ExecuteTime": { "start_time": "2023-10-19T10:19:56.233477Z" - } - } + }, + "collapsed": false + }, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/docs/docs/integrations/chat/konko.ipynb b/docs/docs/integrations/chat/konko.ipynb index 2250a242e55b3..6e4e19bf3dc80 100644 --- a/docs/docs/integrations/chat/konko.ipynb +++ b/docs/docs/integrations/chat/konko.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Konko\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Konko\n", + "# ChatKonko\n", "\n", ">[Konko](https://www.konko.ai/) API is a fully managed Web API designed to help application developers:\n", "\n", diff --git a/docs/docs/integrations/chat/litellm.ipynb b/docs/docs/integrations/chat/litellm.ipynb index bd3c8ef2823c2..a93d595bfbdbe 100644 --- a/docs/docs/integrations/chat/litellm.ipynb +++ b/docs/docs/integrations/chat/litellm.ipynb @@ -1,12 +1,22 @@ { "cells": [ + { + "cell_type": "raw", + "id": "59148044", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: LiteLLM\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "id": "bf733a38-db84-4363-89e2-de6735c37230", "metadata": {}, "source": [ - "# 🚅 LiteLLM\n", + "# ChatLiteLLM\n", "\n", "[LiteLLM](https://github.com/BerriAI/litellm) is a library that simplifies calling Anthropic, Azure, Huggingface, Replicate, etc. \n", "\n", diff --git a/docs/docs/integrations/chat/llama2_chat.ipynb b/docs/docs/integrations/chat/llama2_chat.ipynb index 48493973f685e..98cce09dfa0dd 100644 --- a/docs/docs/integrations/chat/llama2_chat.ipynb +++ b/docs/docs/integrations/chat/llama2_chat.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "7320f16b", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Llama 2 Chat\n", + "---" + ] + }, { "cell_type": "markdown", "id": "90a1faf2", "metadata": {}, "source": [ - "# Llama-2 Chat\n", + "# Llama2Chat\n", "\n", "This notebook shows how to augment Llama-2 `LLM`s with the `Llama2Chat` wrapper to support the [Llama-2 chat prompt format](https://huggingface.co/blog/llama2#how-to-prompt-llama-2). Several `LLM` implementations in LangChain can be used as interface to Llama-2 chat models. These include [HuggingFaceTextGenInference](https://python.langchain.com/docs/integrations/llms/huggingface_textgen_inference), [LlamaCpp](https://python.langchain.com/docs/use_cases/question_answering/how_to/local_retrieval_qa), [GPT4All](https://python.langchain.com/docs/integrations/llms/gpt4all), ..., to mention a few examples. \n", "\n", @@ -721,7 +731,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/docs/integrations/chat/llama_api.ipynb b/docs/docs/integrations/chat/llama_api.ipynb index 329904a6bc3e8..e75cd5b4b4292 100644 --- a/docs/docs/integrations/chat/llama_api.ipynb +++ b/docs/docs/integrations/chat/llama_api.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "71b5cfca", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Llama API\n", + "---" + ] + }, { "cell_type": "markdown", "id": "90a1faf2", "metadata": {}, "source": [ - "# Llama API\n", + "# ChatLlamaAPI\n", "\n", "This notebook shows how to use LangChain with [LlamaAPI](https://llama-api.com/) - a hosted version of Llama2 that adds in support for function calling." ] diff --git a/docs/docs/integrations/chat/minimax.ipynb b/docs/docs/integrations/chat/minimax.ipynb index 8b4d683d0fe84..e10eeb0d2a74d 100644 --- a/docs/docs/integrations/chat/minimax.ipynb +++ b/docs/docs/integrations/chat/minimax.ipynb @@ -1,11 +1,20 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: MiniMax\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "# MiniMax\n", + "# MiniMaxChat\n", "\n", "[Minimax](https://api.minimax.chat) is a Chinese startup that provides LLM service for companies and individuals.\n", "\n", diff --git a/docs/docs/integrations/chat/ollama.ipynb b/docs/docs/integrations/chat/ollama.ipynb index 7fb4b2984cacf..7f069112e6ea1 100644 --- a/docs/docs/integrations/chat/ollama.ipynb +++ b/docs/docs/integrations/chat/ollama.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Ollama\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Ollama\n", + "# ChatOllama\n", "\n", "[Ollama](https://ollama.ai/) allows you to run open-source large language models, such as LLaMA2, locally.\n", "\n", diff --git a/docs/docs/integrations/chat/ollama_functions.ipynb b/docs/docs/integrations/chat/ollama_functions.ipynb index a4f365bf3ae3f..707b8d74ccaff 100644 --- a/docs/docs/integrations/chat/ollama_functions.ipynb +++ b/docs/docs/integrations/chat/ollama_functions.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Ollama Functions\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Ollama Functions\n", + "# OllamaFunctions\n", "\n", "This notebook shows how to use an experimental wrapper around Ollama that gives it the same API as OpenAI Functions.\n", "\n", diff --git a/docs/docs/integrations/chat/openai.ipynb b/docs/docs/integrations/chat/openai.ipynb index 4c66ab76570cc..5fa123d0c98dd 100644 --- a/docs/docs/integrations/chat/openai.ipynb +++ b/docs/docs/integrations/chat/openai.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: OpenAI\n", + "---" + ] + }, { "cell_type": "markdown", "id": "e49f1e0d", "metadata": {}, "source": [ - "# OpenAI\n", + "# ChatOpenAI\n", "\n", "This notebook covers how to get started with OpenAI chat models." ] diff --git a/docs/docs/integrations/chat/pai_eas_chat_endpoint.ipynb b/docs/docs/integrations/chat/pai_eas_chat_endpoint.ipynb index 55bde5e5311fe..395d64775f66e 100644 --- a/docs/docs/integrations/chat/pai_eas_chat_endpoint.ipynb +++ b/docs/docs/integrations/chat/pai_eas_chat_endpoint.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: AliCloud PAI EAS\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# AliCloud PAI EAS\n", + "# PaiEasChatEndpoint\n", "Machine Learning Platform for AI of Alibaba Cloud is a machine learning or deep learning engineering platform intended for enterprises and developers. It provides easy-to-use, cost-effective, high-performance, and easy-to-scale plug-ins that can be applied to various industry scenarios. With over 140 built-in optimization algorithms, Machine Learning Platform for AI provides whole-process AI engineering capabilities including data labeling (PAI-iTAG), model building (PAI-Designer and PAI-DSW), model training (PAI-DLC), compilation optimization, and inference deployment (PAI-EAS). PAI-EAS supports different types of hardware resources, including CPUs and GPUs, and features high throughput and low latency. It allows you to deploy large-scale complex models with a few clicks and perform elastic scale-ins and scale-outs in real time. It also provides a comprehensive O&M and monitoring system." ] }, diff --git a/docs/docs/integrations/chat/promptlayer_chatopenai.ipynb b/docs/docs/integrations/chat/promptlayer_chatopenai.ipynb index 4b20a5852da6a..623bfbe1ae296 100644 --- a/docs/docs/integrations/chat/promptlayer_chatopenai.ipynb +++ b/docs/docs/integrations/chat/promptlayer_chatopenai.ipynb @@ -1,12 +1,22 @@ { "cells": [ + { + "cell_type": "raw", + "id": "ce3672d3", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: PromptLayer ChatOpenAI\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "id": "959300d4", "metadata": {}, "source": [ - "# PromptLayer ChatOpenAI\n", + "# PromptLayerChatOpenAI\n", "\n", "This example showcases how to connect to [PromptLayer](https://www.promptlayer.com) to start recording your ChatOpenAI requests." ] diff --git a/docs/docs/integrations/chat/tongyi.ipynb b/docs/docs/integrations/chat/tongyi.ipynb index f3c64f283065d..3de68b1e5ce13 100644 --- a/docs/docs/integrations/chat/tongyi.ipynb +++ b/docs/docs/integrations/chat/tongyi.ipynb @@ -1,5 +1,14 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Tongyi Qwen\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": { @@ -9,7 +18,7 @@ } }, "source": [ - "# Tongyi Qwen\n", + "# ChatTongyi\n", "Tongyi Qwen is a large language model developed by Alibaba's Damo Academy. It is capable of understanding user intent through natural language understanding and semantic analysis, based on user input in natural language. It provides services and assistance to users in different domains and tasks. By providing clear and detailed instructions, you can obtain results that better align with your expectations.\n", "In this notebook, we will introduce how to use langchain with [Tongyi](https://www.aliyun.com/product/dashscope) mainly in `Chat` corresponding\n", " to the package `langchain/chat_models` in langchain" @@ -41,7 +50,7 @@ }, "outputs": [ { - "name": "stdin", + "name": "stdout", "output_type": "stream", "text": [ " ········\n" diff --git a/docs/docs/integrations/chat/vllm.ipynb b/docs/docs/integrations/chat/vllm.ipynb index 5cc825d6d8df0..11023a201b037 100644 --- a/docs/docs/integrations/chat/vllm.ipynb +++ b/docs/docs/integrations/chat/vllm.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "raw", + "id": "eb65deaa", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: vLLM Chat\n", + "---" + ] + }, { "cell_type": "markdown", "id": "eb7e5679-aa06-47e4-a1a3-b6b70e604017", diff --git a/docs/docs/integrations/chat/volcengine_maas.ipynb b/docs/docs/integrations/chat/volcengine_maas.ipynb index 32dd0c16d0964..e7c39c6b6f65d 100644 --- a/docs/docs/integrations/chat/volcengine_maas.ipynb +++ b/docs/docs/integrations/chat/volcengine_maas.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "raw", + "id": "66107bdd", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Volc Enging Maas\n", + "---" + ] + }, { "cell_type": "markdown", "id": "404758628c7b20f6", @@ -7,7 +17,7 @@ "collapsed": false }, "source": [ - "# Volc Engine Maas\n", + "# VolcEngineMaasChat\n", "\n", "This notebook provides you with a guide on how to get started with volc engine maas chat models." ] @@ -86,7 +96,9 @@ "outputs": [ { "data": { - "text/plain": "AIMessage(content='好的,这是一个笑话:\\n\\n为什么鸟儿不会玩电脑游戏?\\n\\n因为它们没有翅膀!')" + "text/plain": [ + "AIMessage(content='好的,这是一个笑话:\\n\\n为什么鸟儿不会玩电脑游戏?\\n\\n因为它们没有翅膀!')" + ] }, "execution_count": 26, "metadata": {}, @@ -141,7 +153,9 @@ "outputs": [ { "data": { - "text/plain": "AIMessage(content='好的,这是一个笑话:\\n\\n三岁的女儿说她会造句了,妈妈让她用“年轻”造句,女儿说:“妈妈减肥,一年轻了好几斤”。')" + "text/plain": [ + "AIMessage(content='好的,这是一个笑话:\\n\\n三岁的女儿说她会造句了,妈妈让她用“年轻”造句,女儿说:“妈妈减肥,一年轻了好几斤”。')" + ] }, "execution_count": 28, "metadata": {}, diff --git a/docs/docs/integrations/chat/yandex.ipynb b/docs/docs/integrations/chat/yandex.ipynb index 598c9379562aa..0e1ced9b6397a 100644 --- a/docs/docs/integrations/chat/yandex.ipynb +++ b/docs/docs/integrations/chat/yandex.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "b4154fbe", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: YandexGPT\n", + "---" + ] + }, { "cell_type": "markdown", "id": "af63c9db-e4bd-4d3b-a4d7-7927f5541734", "metadata": {}, "source": [ - "# YandexGPT\n", + "# ChatYandexGPT\n", "\n", "This notebook goes over how to use Langchain with [YandexGPT](https://cloud.yandex.com/en/services/yandexgpt) chat model.\n", "\n", From 8eab4d95c078ba4b2d2f1ac0d1cbd7283f7bf4ff Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 4 Dec 2023 20:18:15 -0800 Subject: [PATCH 41/49] Harrison/delegate from template (#14266) Co-authored-by: M.R. Sopacua <144725145+msopacua@users.noreply.github.com> --- libs/core/langchain_core/prompts/prompt.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/libs/core/langchain_core/prompts/prompt.py b/libs/core/langchain_core/prompts/prompt.py index 012b926953701..566cf02bd2115 100644 --- a/libs/core/langchain_core/prompts/prompt.py +++ b/libs/core/langchain_core/prompts/prompt.py @@ -1,6 +1,7 @@ """Prompt schema definition.""" from __future__ import annotations +import warnings from pathlib import Path from typing import Any, Dict, List, Literal, Optional, Union @@ -176,21 +177,30 @@ def from_examples( @classmethod def from_file( - cls, template_file: Union[str, Path], input_variables: List[str], **kwargs: Any + cls, + template_file: Union[str, Path], + input_variables: Optional[List[str]] = None, + **kwargs: Any, ) -> PromptTemplate: """Load a prompt from a file. Args: template_file: The path to the file containing the prompt template. - input_variables: A list of variable names the final prompt template - will expect. + input_variables: [DEPRECATED] A list of variable names the final prompt + template will expect. + + input_variables is ignored as from_file now delegates to from_template(). Returns: The prompt loaded from the file. """ with open(str(template_file), "r") as f: template = f.read() - return cls(input_variables=input_variables, template=template, **kwargs) + if input_variables: + warnings.warn( + "`input_variables' is deprecated and ignored.", DeprecationWarning + ) + return cls.from_template(template=template, **kwargs) @classmethod def from_template( From abbba6c7d831b4d7fc6ad7e091c579815c122dd9 Mon Sep 17 00:00:00 2001 From: Arnaud Gelas Date: Tue, 5 Dec 2023 05:27:22 +0100 Subject: [PATCH 42/49] openapi/planner.py: Deal with json in markdown output cases (#13576) - **Description:** In openapi/planner deal with json in markdown output cases - **Issue:** In some cases LLMs could return json in markdown which can't be loaded. - **Dependencies:** - **Tag maintainer:** @eyurtsev - **Twitter handle:** --------- Co-authored-by: Harrison Chase --- .../agents/agent_toolkits/openapi/planner.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libs/langchain/langchain/agents/agent_toolkits/openapi/planner.py b/libs/langchain/langchain/agents/agent_toolkits/openapi/planner.py index 530418e28841a..754201f5bdc1e 100644 --- a/libs/langchain/langchain/agents/agent_toolkits/openapi/planner.py +++ b/libs/langchain/langchain/agents/agent_toolkits/openapi/planner.py @@ -36,6 +36,7 @@ from langchain.chains.llm import LLMChain from langchain.llms.openai import OpenAI from langchain.memory import ReadOnlySharedMemory +from langchain.output_parsers.json import parse_json_markdown from langchain.tools.base import BaseTool from langchain.tools.requests.tool import BaseRequestsTool from langchain.utilities.requests import RequestsWrapper @@ -80,7 +81,7 @@ class RequestsGetToolWithParsing(BaseRequestsTool, BaseTool): def _run(self, text: str) -> str: try: - data = json.loads(text) + data = parse_json_markdown(text) except json.JSONDecodeError as e: raise e data_params = data.get("params") @@ -110,7 +111,7 @@ class RequestsPostToolWithParsing(BaseRequestsTool, BaseTool): def _run(self, text: str) -> str: try: - data = json.loads(text) + data = parse_json_markdown(text) except json.JSONDecodeError as e: raise e response = self.requests_wrapper.post(data["url"], data["data"]) @@ -139,7 +140,7 @@ class RequestsPatchToolWithParsing(BaseRequestsTool, BaseTool): def _run(self, text: str) -> str: try: - data = json.loads(text) + data = parse_json_markdown(text) except json.JSONDecodeError as e: raise e response = self.requests_wrapper.patch(data["url"], data["data"]) @@ -168,7 +169,7 @@ class RequestsPutToolWithParsing(BaseRequestsTool, BaseTool): def _run(self, text: str) -> str: try: - data = json.loads(text) + data = parse_json_markdown(text) except json.JSONDecodeError as e: raise e response = self.requests_wrapper.put(data["url"], data["data"]) @@ -198,7 +199,7 @@ class RequestsDeleteToolWithParsing(BaseRequestsTool, BaseTool): def _run(self, text: str) -> str: try: - data = json.loads(text) + data = parse_json_markdown(text) except json.JSONDecodeError as e: raise e response = self.requests_wrapper.delete(data["url"]) From 74c7b799ef16a9987c5d13200b16f176b9d16ae1 Mon Sep 17 00:00:00 2001 From: Max Weng Date: Tue, 5 Dec 2023 12:27:54 +0800 Subject: [PATCH 43/49] migrate openai audio api (#13557) for issue https://github.com/langchain-ai/langchain/issues/13162 migrate openai audio api, as [openai v1.0.0 Migration Guide](https://github.com/openai/openai-python/discussions/742) --------- Co-authored-by: Double Max --- .../document_loaders/parsers/audio.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/parsers/audio.py b/libs/langchain/langchain/document_loaders/parsers/audio.py index 073eb82ea0883..737cfd147a3bc 100644 --- a/libs/langchain/langchain/document_loaders/parsers/audio.py +++ b/libs/langchain/langchain/document_loaders/parsers/audio.py @@ -6,6 +6,7 @@ from langchain.document_loaders.base import BaseBlobParser from langchain.document_loaders.blob_loaders import Blob +from langchain.utils.openai import is_openai_v1 logger = logging.getLogger(__name__) @@ -36,9 +37,13 @@ def lazy_parse(self, blob: Blob) -> Iterator[Document]: "pydub package not found, please install it with " "`pip install pydub`" ) - # Set the API key if provided - if self.api_key: - openai.api_key = self.api_key + if is_openai_v1(): + # api_key optional, defaults to `os.environ['OPENAI_API_KEY']` + client = openai.OpenAI(api_key=self.api_key) + else: + # Set the API key if provided + if self.api_key: + openai.api_key = self.api_key # Audio file from disk audio = AudioSegment.from_file(blob.path) @@ -63,7 +68,12 @@ def lazy_parse(self, blob: Blob) -> Iterator[Document]: attempts = 0 while attempts < 3: try: - transcript = openai.Audio.transcribe("whisper-1", file_obj) + if is_openai_v1(): + transcript = client.audio.transcriptions.create( + model="whisper-1", file=file_obj + ) + else: + transcript = openai.Audio.transcribe("whisper-1", file_obj) break except Exception as e: attempts += 1 From 5cb3393e20d9b95f93e37289933146514e5c880a Mon Sep 17 00:00:00 2001 From: price-deshaw <137956190+price-deshaw@users.noreply.github.com> Date: Mon, 4 Dec 2023 23:28:13 -0500 Subject: [PATCH 44/49] update OpenAI function agents' llm validation (#13538) - **Description:** This PR modifies the LLM validation in OpenAI function agents to check whether the LLM supports OpenAI functions based on a property (`supports_oia_functions`) instead of whether the LLM passed to the agent `isinstance` of `ChatOpenAI`. This allows classes that extend `BaseChatModel` to be passed to these agents as long as they've been integrated with the OpenAI APIs and have this property set, even if they don't extend `ChatOpenAI`. - **Issue:** N/A - **Dependencies:** none --- .../conversational_retrieval/openai_functions.py | 3 --- .../langchain/agents/openai_functions_agent/base.py | 9 --------- .../agents/openai_functions_multi_agent/base.py | 7 ------- 3 files changed, 19 deletions(-) diff --git a/libs/langchain/langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py b/libs/langchain/langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py index 105a6047a14f9..f577124d2e2cd 100644 --- a/libs/langchain/langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +++ b/libs/langchain/langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py @@ -10,7 +10,6 @@ AgentTokenBufferMemory, ) from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent -from langchain.chat_models.openai import ChatOpenAI from langchain.memory.token_buffer import ConversationTokenBufferMemory from langchain.tools.base import BaseTool @@ -57,8 +56,6 @@ def create_conversational_retrieval_agent( An agent executor initialized appropriately """ - if not isinstance(llm, ChatOpenAI): - raise ValueError("Only supported with ChatOpenAI models.") if remember_intermediate_steps: memory: BaseMemory = AgentTokenBufferMemory( memory_key=memory_key, llm=llm, max_token_limit=max_token_limit diff --git a/libs/langchain/langchain/agents/openai_functions_agent/base.py b/libs/langchain/langchain/agents/openai_functions_agent/base.py index 1af9ff39ac0d6..1238c7d4912bf 100644 --- a/libs/langchain/langchain/agents/openai_functions_agent/base.py +++ b/libs/langchain/langchain/agents/openai_functions_agent/base.py @@ -25,7 +25,6 @@ ) from langchain.callbacks.base import BaseCallbackManager from langchain.callbacks.manager import Callbacks -from langchain.chat_models.openai import ChatOpenAI from langchain.tools.base import BaseTool from langchain.tools.render import format_tool_to_openai_function @@ -50,12 +49,6 @@ def get_allowed_tools(self) -> List[str]: """Get allowed tools.""" return [t.name for t in self.tools] - @root_validator - def validate_llm(cls, values: dict) -> dict: - if not isinstance(values["llm"], ChatOpenAI): - raise ValueError("Only supported with ChatOpenAI models.") - return values - @root_validator def validate_prompt(cls, values: dict) -> dict: prompt: BasePromptTemplate = values["prompt"] @@ -222,8 +215,6 @@ def from_llm_and_tools( **kwargs: Any, ) -> BaseSingleActionAgent: """Construct an agent from an LLM and tools.""" - if not isinstance(llm, ChatOpenAI): - raise ValueError("Only supported with ChatOpenAI models.") prompt = cls.create_prompt( extra_prompt_messages=extra_prompt_messages, system_message=system_message, diff --git a/libs/langchain/langchain/agents/openai_functions_multi_agent/base.py b/libs/langchain/langchain/agents/openai_functions_multi_agent/base.py index 8b8d1da9f095b..d25944863757d 100644 --- a/libs/langchain/langchain/agents/openai_functions_multi_agent/base.py +++ b/libs/langchain/langchain/agents/openai_functions_multi_agent/base.py @@ -26,7 +26,6 @@ ) from langchain.callbacks.base import BaseCallbackManager from langchain.callbacks.manager import Callbacks -from langchain.chat_models.openai import ChatOpenAI from langchain.tools import BaseTool # For backwards compatibility @@ -109,12 +108,6 @@ def get_allowed_tools(self) -> List[str]: """Get allowed tools.""" return [t.name for t in self.tools] - @root_validator - def validate_llm(cls, values: dict) -> dict: - if not isinstance(values["llm"], ChatOpenAI): - raise ValueError("Only supported with ChatOpenAI models.") - return values - @root_validator def validate_prompt(cls, values: dict) -> dict: prompt: BasePromptTemplate = values["prompt"] From ea0afd07ca1ef7715c0df606efe12ae9f9ad7e62 Mon Sep 17 00:00:00 2001 From: guillaumedelande <115003496+guillaumedelande@users.noreply.github.com> Date: Tue, 5 Dec 2023 05:29:20 +0100 Subject: [PATCH 45/49] Update azuresearch.py following recent change from azure-search-documents library (#13472) - **Description:** Reference library azure-search-documents has been adapted in version 11.4.0: 1. Notebook explaining Azure AI Search updated with most recent info 2. HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration 3. PrioritizedFields(prioritized_content_fields) --> SemanticPrioritizedFields(content_fields) 4. SemanticSettings --> SemanticSearch 5. VectorSearch(algorithm_configurations) --> VectorSearch(configurations) --> Changes now reflected on Langchain: default vector search config from langchain is now compatible with officially released library from Azure. - **Issue:** Issue creating a new index (due to wrong class used for default vector search configuration) if using latest version of azure-search-documents with current langchain version - **Dependencies:** azure-search-documents>=11.4.0, - **Tag maintainer:** , --------- Co-authored-by: Erick Friis --- .../vectorstores/azuresearch.ipynb | 11 +- .../langchain/vectorstores/azuresearch.py | 120 +++++++++++++----- 2 files changed, 93 insertions(+), 38 deletions(-) diff --git a/docs/docs/integrations/vectorstores/azuresearch.ipynb b/docs/docs/integrations/vectorstores/azuresearch.ipynb index 1ffb30e5c7287..710e5b3fcb862 100644 --- a/docs/docs/integrations/vectorstores/azuresearch.ipynb +++ b/docs/docs/integrations/vectorstores/azuresearch.ipynb @@ -6,18 +6,17 @@ "collapsed": false }, "source": [ - "# Azure Cognitive Search\n", + "# Azure AI Search\n", "\n", - "[Azure Cognitive Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n", - "\n", - "Vector search is currently in public preview. It's available through the Azure portal, preview REST API and beta client libraries. [More info](https://learn.microsoft.com/en-us/azure/search/vector-search-overview) Beta client libraries are subject to potential breaking changes, please be sure to use the SDK package version identified below. azure-search-documents==11.4.0b8" + "[Azure AI Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search` and `Azure Cognitive Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n", + "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Install Azure Cognitive Search SDK" + "# Install Azure AI Search SDK" ] }, { @@ -26,7 +25,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install azure-search-documents==11.4.0b8\n", + "!pip install azure-search-documents\n", "!pip install azure-identity" ] }, diff --git a/libs/langchain/langchain/vectorstores/azuresearch.py b/libs/langchain/langchain/vectorstores/azuresearch.py index 2cd1f9546de72..b818dc71fd59a 100644 --- a/libs/langchain/langchain/vectorstores/azuresearch.py +++ b/libs/langchain/langchain/vectorstores/azuresearch.py @@ -14,6 +14,7 @@ Optional, Tuple, Type, + Union, ) import numpy as np @@ -36,10 +37,13 @@ from azure.search.documents.indexes.models import ( ScoringProfile, SearchField, - SemanticSettings, VectorSearch, ) + try: + from azure.search.documents.indexes.models import SemanticSearch + except ImportError: + from azure.search.documents.indexes.models import SemanticSettings # <11.4.0 # Allow overriding field names for Azure Search FIELDS_ID = get_from_env( @@ -69,7 +73,7 @@ def _get_search_client( semantic_configuration_name: Optional[str] = None, fields: Optional[List[SearchField]] = None, vector_search: Optional[VectorSearch] = None, - semantic_settings: Optional[SemanticSettings] = None, + semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None, scoring_profiles: Optional[List[ScoringProfile]] = None, default_scoring_profile: Optional[str] = None, default_fields: Optional[List[SearchField]] = None, @@ -81,15 +85,30 @@ def _get_search_client( from azure.search.documents import SearchClient from azure.search.documents.indexes import SearchIndexClient from azure.search.documents.indexes.models import ( - HnswVectorSearchAlgorithmConfiguration, - PrioritizedFields, SearchIndex, SemanticConfiguration, SemanticField, - SemanticSettings, VectorSearch, ) + # class names changed for versions >= 11.4.0 + try: + from azure.search.documents.indexes.models import ( + HnswAlgorithmConfiguration, # HnswVectorSearchAlgorithmConfiguration is old + SemanticPrioritizedFields, # PrioritizedFields outdated + SemanticSearch, # SemanticSettings outdated + ) + + NEW_VERSION = True + except ImportError: + from azure.search.documents.indexes.models import ( + HnswVectorSearchAlgorithmConfiguration, + PrioritizedFields, + SemanticSettings, + ) + + NEW_VERSION = False + default_fields = default_fields or [] if key is None: credential = DefaultAzureCredential() @@ -135,34 +154,71 @@ def fmt_err(x: str) -> str: fields = default_fields # Vector search configuration if vector_search is None: - vector_search = VectorSearch( - algorithm_configurations=[ - HnswVectorSearchAlgorithmConfiguration( - name="default", - kind="hnsw", - parameters={ # type: ignore - "m": 4, - "efConstruction": 400, - "efSearch": 500, - "metric": "cosine", - }, - ) - ] - ) + if NEW_VERSION: + # >= 11.4.0: + # VectorSearch(algorithm_configuration) --> VectorSearch(algorithms) + # HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration + vector_search = VectorSearch( + algorithms=[ + HnswAlgorithmConfiguration( + name="default", + kind="hnsw", + parameters={ # type: ignore + "m": 4, + "efConstruction": 400, + "efSearch": 500, + "metric": "cosine", + }, + ) + ] + ) + else: # < 11.4.0 + vector_search = VectorSearch( + algorithm_configurations=[ + HnswVectorSearchAlgorithmConfiguration( + name="default", + kind="hnsw", + parameters={ # type: ignore + "m": 4, + "efConstruction": 400, + "efSearch": 500, + "metric": "cosine", + }, + ) + ] + ) + # Create the semantic settings with the configuration if semantic_settings is None and semantic_configuration_name is not None: - semantic_settings = SemanticSettings( - configurations=[ - SemanticConfiguration( - name=semantic_configuration_name, - prioritized_fields=PrioritizedFields( - prioritized_content_fields=[ - SemanticField(field_name=FIELDS_CONTENT) - ], - ), - ) - ] - ) + if NEW_VERSION: + # <=11.4.0: SemanticSettings --> SemanticSearch + # PrioritizedFields(prioritized_content_fields) + # --> SemanticPrioritizedFields(content_fields) + semantic_settings = SemanticSearch( + configurations=[ + SemanticConfiguration( + name=semantic_configuration_name, + prioritized_fields=SemanticPrioritizedFields( + content_fields=[ + SemanticField(field_name=FIELDS_CONTENT) + ], + ), + ) + ] + ) + else: # < 11.4.0 + semantic_settings = SemanticSettings( + configurations=[ + SemanticConfiguration( + name=semantic_configuration_name, + prioritized_fields=PrioritizedFields( + prioritized_content_fields=[ + SemanticField(field_name=FIELDS_CONTENT) + ], + ), + ) + ] + ) # Create the search index with the semantic settings and vector search index = SearchIndex( name=index_name, @@ -196,7 +252,7 @@ def __init__( semantic_query_language: str = "en-us", fields: Optional[List[SearchField]] = None, vector_search: Optional[VectorSearch] = None, - semantic_settings: Optional[SemanticSettings] = None, + semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None, scoring_profiles: Optional[List[ScoringProfile]] = None, default_scoring_profile: Optional[str] = None, **kwargs: Any, From e0c03d6c44f42c3704a8ab28b45d4a424db9899b Mon Sep 17 00:00:00 2001 From: Philippe PRADOS Date: Tue, 5 Dec 2023 05:31:21 +0100 Subject: [PATCH 46/49] Pprados/lite google drive (#13175) - Fix bug in the document - Add clarification on the use of langchain-google drive. --- .../document_loaders/google_drive.ipynb | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/docs/docs/integrations/document_loaders/google_drive.ipynb b/docs/docs/integrations/document_loaders/google_drive.ipynb index 2e3067e584344..1bfab2c8f295d 100644 --- a/docs/docs/integrations/document_loaders/google_drive.ipynb +++ b/docs/docs/integrations/document_loaders/google_drive.ipynb @@ -217,7 +217,7 @@ "It's compatible with the ̀`langchain.document_loaders.GoogleDriveLoader` and can be used\n", "in its place.\n", "\n", - "To be compatible with containers, the authentication uses an environment variable ̀GOOGLE_ACCOUNT_FILE` to credential file (for user or service)." + "To be compatible with containers, the authentication uses an environment variable `̀GOOGLE_ACCOUNT_FILE` to credential file (for user or service)." ] }, { @@ -331,6 +331,7 @@ "Some pre-formated request are proposed (use `{query}`, `{folder_id}` and/or `{mime_type}`):\n", "\n", "You can customize the criteria to select the files. A set of predefined filter are proposed:\n", + "\n", "| template | description |\n", "| -------------------------------------- | --------------------------------------------------------------------- |\n", "| gdrive-all-in-folder | Return all compatible files from a `folder_id` |\n", @@ -401,6 +402,14 @@ "id": "375bb465-8f69-407b-94bd-ffa3718ef500", "metadata": {}, "source": [ + "The conversion can manage in Markdown format:\n", + "- bullet\n", + "- link\n", + "- table\n", + "- titles\n", + "\n", + "Set the attribut `return_link` to `True` to export links.\n", + "\n", "#### Modes for GSlide and GSheet\n", "The parameter mode accepts different values:\n", "\n", @@ -408,12 +417,6 @@ "- \"snippets\": return the description of each file (set in metadata of Google Drive files).\n", "\n", "\n", - "The conversion can manage in Markdown format:\n", - "- bullet\n", - "- link\n", - "- table\n", - "- titles\n", - "\n", "The parameter `gslide_mode` accepts different values:\n", "\n", "- \"single\" : one document with <PAGE BREAK>\n", @@ -503,14 +506,6 @@ " print(\"---\")\n", " print(doc.page_content.strip()[:60] + \"...\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "51efa73a-4e2d-4f9c-abaf-6c9bde2ff69d", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From dcccf8fa6635fa04622a9d08437d023081422bfc Mon Sep 17 00:00:00 2001 From: Joan Fontanals Date: Tue, 5 Dec 2023 05:40:33 +0100 Subject: [PATCH 47/49] adapt Jina Embeddings to new Jina AI Embedding API (#13658) - **Description:** Adapt JinaEmbeddings to run with the new Jina AI Embedding platform - **Twitter handle:** https://twitter.com/JinaAI_ --------- Co-authored-by: Joan Fontanals Martinez Co-authored-by: Harrison Chase --- docs/docs/integrations/providers/jina.mdx | 69 ++----------- .../integrations/text_embedding/jina.ipynb | 24 +++-- libs/langchain/langchain/embeddings/jina.py | 98 +++++++------------ 3 files changed, 58 insertions(+), 133 deletions(-) diff --git a/docs/docs/integrations/providers/jina.mdx b/docs/docs/integrations/providers/jina.mdx index 181b57a4abf99..a3900c446df86 100644 --- a/docs/docs/integrations/providers/jina.mdx +++ b/docs/docs/integrations/providers/jina.mdx @@ -1,75 +1,20 @@ # Jina -This page covers how to use the Jina ecosystem within LangChain. +This page covers how to use the Jina Embeddings within LangChain. It is broken into two parts: installation and setup, and then references to specific Jina wrappers. ## Installation and Setup -- Install the Python SDK with `pip install jina` -- Get a Jina AI Cloud auth token from [here](https://cloud.jina.ai/settings/tokens) and set it as an environment variable (`JINA_AUTH_TOKEN`) - -## Wrappers - -### Embeddings +- Get a Jina AI API token from [here](https://jina.ai/embeddings/) and set it as an environment variable (`JINA_API_TOKEN`) There exists a Jina Embeddings wrapper, which you can access with -```python -from langchain.embeddings import JinaEmbeddings -``` -For a more detailed walkthrough of this, see [this notebook](/docs/integrations/text_embedding/jina) - -## Deployment - -[Langchain-serve](https://github.com/jina-ai/langchain-serve), powered by Jina, helps take LangChain apps to production with easy to use REST/WebSocket APIs and Slack bots. - -### Usage - -Install the package from PyPI. - -```bash -pip install langchain-serve -``` - -Wrap your LangChain app with the `@serving` decorator. ```python -# app.py -from lcserve import serving - -@serving -def ask(input: str) -> str: - from langchain.chains import LLMChain - from langchain.llms import OpenAI - from langchain.agents import AgentExecutor, ZeroShotAgent - - tools = [...] # list of tools - prompt = ZeroShotAgent.create_prompt( - tools, input_variables=["input", "agent_scratchpad"], - ) - llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt) - agent = ZeroShotAgent( - llm_chain=llm_chain, allowed_tools=[tool.name for tool in tools] - ) - agent_executor = AgentExecutor.from_agent_and_tools( - agent=agent, - tools=tools, - verbose=True, - ) - return agent_executor.run(input) -``` - -Deploy on Jina AI Cloud with `lc-serve deploy jcloud app`. Once deployed, we can send a POST request to the API endpoint to get a response. +from langchain.embeddings import JinaEmbeddings -```bash -curl -X 'POST' 'https://.wolf.jina.ai/ask' \ - -d '{ - "input": "Your Question here?", - "envs": { - "OPENAI_API_KEY": "sk-***" - } -}' +# you can pas jina_api_key, if none is passed it will be taken from `JINA_API_TOKEN` environment variable +embeddings = JinaEmbeddings(jina_api_key='jina_**', model_name='jina-embeddings-v2-base-en') ``` -You can also self-host the app on your infrastructure with Docker-compose or Kubernetes. See [here](https://github.com/jina-ai/langchain-serve#-self-host-llm-apps-with-docker-compose-or-kubernetes) for more details. - +You can check the list of available models from [here](https://jina.ai/embeddings/) -Langchain-serve also allows to deploy the apps with WebSocket APIs and Slack Bots both on [Jina AI Cloud](https://cloud.jina.ai/) or self-hosted infrastructure. +For a more detailed walkthrough of this, see [this notebook](/docs/integrations/text_embedding/jina.ipynb) diff --git a/docs/docs/integrations/text_embedding/jina.ipynb b/docs/docs/integrations/text_embedding/jina.ipynb index cba95327425bf..7eb75678764f1 100644 --- a/docs/docs/integrations/text_embedding/jina.ipynb +++ b/docs/docs/integrations/text_embedding/jina.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "d94c62b4", "metadata": {}, "outputs": [], @@ -28,7 +28,7 @@ "outputs": [], "source": [ "embeddings = JinaEmbeddings(\n", - " jina_auth_token=jina_auth_token, model_name=\"ViT-B-32::openai\"\n", + " jina_api_key=\"jina_*\", model_name=\"jina-embeddings-v2-base-en\"\n", ")" ] }, @@ -55,28 +55,32 @@ { "cell_type": "code", "execution_count": null, - "id": "b790fd09", + "id": "aea3ca33-1e6e-499c-8284-b7e26f38c514", "metadata": {}, "outputs": [], "source": [ - "doc_result = embeddings.embed_documents([text])" + "print(query_result)" ] }, { - "cell_type": "markdown", - "id": "6f3607a0", + "cell_type": "code", + "execution_count": null, + "id": "b790fd09", "metadata": {}, + "outputs": [], "source": [ - "In the above example, `ViT-B-32::openai`, OpenAI's pretrained `ViT-B-32` model is used. For a full list of models, see [here](https://cloud.jina.ai/user/inference/model/63dca9df5a0da83009d519cd)." + "doc_result = embeddings.embed_documents([text])" ] }, { "cell_type": "code", "execution_count": null, - "id": "cd5f148e", + "id": "c2e6b743-768c-4d7e-a331-27d5f0e8e30e", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "print(doc_result)" + ] } ], "metadata": { @@ -95,7 +99,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.10.11" } }, "nbformat": 4, diff --git a/libs/langchain/langchain/embeddings/jina.py b/libs/langchain/langchain/embeddings/jina.py index a781f908f2d38..ebf941870fbd1 100644 --- a/libs/langchain/langchain/embeddings/jina.py +++ b/libs/langchain/langchain/embeddings/jina.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict, List, Optional import requests @@ -7,69 +6,54 @@ from langchain.utils import get_from_dict_or_env +JINA_API_URL: str = "https://api.jina.ai/v1/embeddings" + class JinaEmbeddings(BaseModel, Embeddings): """Jina embedding models.""" - client: Any #: :meta private: - - model_name: str = "ViT-B-32::openai" - """Model name to use.""" - - jina_auth_token: Optional[str] = None - jina_api_url: str = "https://api.clip.jina.ai/api/v1/models/" - request_headers: Optional[dict] = None + session: Any #: :meta private: + model_name: str = "jina-embeddings-v2-base-en" + jina_api_key: Optional[str] = None @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that auth token exists in environment.""" - # Set Auth - jina_auth_token = get_from_dict_or_env( - values, "jina_auth_token", "JINA_AUTH_TOKEN" - ) - values["jina_auth_token"] = jina_auth_token - values["request_headers"] = (("authorization", jina_auth_token),) - - # Test that package is installed try: - import jina - except ImportError: - raise ImportError( - "Could not import `jina` python package. " - "Please install it with `pip install jina`." - ) + jina_api_key = get_from_dict_or_env(values, "jina_api_key", "JINA_API_KEY") + except ValueError as original_exc: + try: + jina_api_key = get_from_dict_or_env( + values, "jina_auth_token", "JINA_AUTH_TOKEN" + ) + except ValueError: + raise original_exc + session = requests.Session() + session.headers.update( + { + "Authorization": f"Bearer {jina_api_key}", + "Accept-Encoding": "identity", + "Content-type": "application/json", + } + ) + values["session"] = session + return values - # Setup client - jina_api_url = os.environ.get("JINA_API_URL", values["jina_api_url"]) - model_name = values["model_name"] - try: - resp = requests.get( - jina_api_url + f"?model_name={model_name}", - headers={"Authorization": jina_auth_token}, - ) + def _embed(self, texts: List[str]) -> List[List[float]]: + # Call Jina AI Embedding API + resp = self.session.post( # type: ignore + JINA_API_URL, json={"input": texts, "model": self.model_name} + ).json() + if "data" not in resp: + raise RuntimeError(resp["detail"]) - if resp.status_code == 401: - raise ValueError( - "The given Jina auth token is invalid. " - "Please check your Jina auth token." - ) - elif resp.status_code == 404: - raise ValueError( - f"The given model name `{model_name}` is not valid. " - f"Please go to https://cloud.jina.ai/user/inference " - f"and create a model with the given model name." - ) - resp.raise_for_status() + embeddings = resp["data"] - endpoint = resp.json()["endpoints"]["grpc"] - values["client"] = jina.Client(host=endpoint) - except requests.exceptions.HTTPError as err: - raise ValueError(f"Error: {err!r}") - return values + # Sort resulting embeddings by index + sorted_embeddings = sorted(embeddings, key=lambda e: e["index"]) # type: ignore - def _post(self, docs: List[Any], **kwargs: Any) -> Any: - payload = dict(inputs=docs, metadata=self.request_headers, **kwargs) - return self.client.post(on="/encode", **payload) + # Return just the embeddings + return [result["embedding"] for result in sorted_embeddings] def embed_documents(self, texts: List[str]) -> List[List[float]]: """Call out to Jina's embedding endpoint. @@ -78,12 +62,7 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]: Returns: List of embeddings, one for each text. """ - from docarray import Document, DocumentArray - - embeddings = self._post( - docs=DocumentArray([Document(text=t) for t in texts]) - ).embeddings - return [list(map(float, e)) for e in embeddings] + return self._embed(texts) def embed_query(self, text: str) -> List[float]: """Call out to Jina's embedding endpoint. @@ -92,7 +71,4 @@ def embed_query(self, text: str) -> List[float]: Returns: Embeddings for the text. """ - from docarray import Document, DocumentArray - - embedding = self._post(docs=DocumentArray([Document(text=text)])).embeddings[0] - return list(map(float, embedding)) + return self._embed([text])[0] From 77a15fa9888a3e81a014895a6ec3f1b34c016d06 Mon Sep 17 00:00:00 2001 From: Sean Bearden <72461227+seanbearden@users.noreply.github.com> Date: Mon, 4 Dec 2023 20:48:09 -0800 Subject: [PATCH 48/49] Added ability to pass arguments to the Playwright browser (#13146) - **Description:** Enhanced `create_sync_playwright_browser` and `create_async_playwright_browser` functions to accept a list of arguments. These arguments are now forwarded to `browser.chromium.launch()` for customizable browser instantiation. - **Issue:** #13143 - **Dependencies:** None - **Tag maintainer:** @eyurtsev, - **Twitter handle:** Dr_Bearden --------- Co-authored-by: Harrison Chase --- .../langchain/tools/playwright/utils.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/libs/langchain/langchain/tools/playwright/utils.py b/libs/langchain/langchain/tools/playwright/utils.py index eb874f2eb4d65..692288fdde318 100644 --- a/libs/langchain/langchain/tools/playwright/utils.py +++ b/libs/langchain/langchain/tools/playwright/utils.py @@ -2,7 +2,7 @@ from __future__ import annotations import asyncio -from typing import TYPE_CHECKING, Any, Coroutine, TypeVar +from typing import TYPE_CHECKING, Any, Coroutine, List, Optional, TypeVar if TYPE_CHECKING: from playwright.async_api import Browser as AsyncBrowser @@ -50,12 +50,15 @@ def get_current_page(browser: SyncBrowser) -> SyncPage: return context.pages[-1] -def create_async_playwright_browser(headless: bool = True) -> AsyncBrowser: +def create_async_playwright_browser( + headless: bool = True, args: Optional[List[str]] = None +) -> AsyncBrowser: """ Create an async playwright browser. Args: headless: Whether to run the browser in headless mode. Defaults to True. + args: arguments to pass to browser.chromium.launch Returns: AsyncBrowser: The playwright browser. @@ -63,15 +66,18 @@ def create_async_playwright_browser(headless: bool = True) -> AsyncBrowser: from playwright.async_api import async_playwright browser = run_async(async_playwright().start()) - return run_async(browser.chromium.launch(headless=headless)) + return run_async(browser.chromium.launch(headless=headless, args=args)) -def create_sync_playwright_browser(headless: bool = True) -> SyncBrowser: +def create_sync_playwright_browser( + headless: bool = True, args: Optional[List[str]] = None +) -> SyncBrowser: """ Create a playwright browser. Args: headless: Whether to run the browser in headless mode. Defaults to True. + args: arguments to pass to browser.chromium.launch Returns: SyncBrowser: The playwright browser. @@ -79,7 +85,7 @@ def create_sync_playwright_browser(headless: bool = True) -> SyncBrowser: from playwright.sync_api import sync_playwright browser = sync_playwright().start() - return browser.chromium.launch(headless=headless) + return browser.chromium.launch(headless=headless, args=args) T = TypeVar("T") From f758c8adc43ebbbdb3a13caa5a022a2d043229cc Mon Sep 17 00:00:00 2001 From: Eun Hye Kim Date: Tue, 5 Dec 2023 13:54:08 +0900 Subject: [PATCH 49/49] Fix #11737 issue (extra_tools option of create_pandas_dataframe_agent is not working) (#13203) - **Description:** Fix #11737 issue (extra_tools option of create_pandas_dataframe_agent is not working), - **Issue:** #11737 , - **Dependencies:** no, - **Tag maintainer:** @baskaryan, @eyurtsev, @hwchase17 I needed this method at work, so I modified it myself and used it. There is a similar issue(#11737) and PR(#13018) of @PyroGenesis, so I combined my code at the original PR. You may be busy, but it would be great help for me if you checked. Thank you. - **Twitter handle:** @lunara_x If you need an .ipynb example about this, please tag me. I will share what I am working on after removing any work-related content. --------- Co-authored-by: Harrison Chase --- .../agents/agent_toolkits/pandas/base.py | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/base.py b/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/base.py index ef5e1eae8a566..cc5205a3dcc83 100644 --- a/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/base.py +++ b/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/base.py @@ -33,7 +33,8 @@ def _get_multi_prompt( input_variables: Optional[List[str]] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, -) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]: + extra_tools: Sequence[BaseTool] = (), +) -> Tuple[BasePromptTemplate, List[BaseTool]]: num_dfs = len(dfs) if suffix is not None: suffix_to_use = suffix @@ -55,12 +56,13 @@ def _get_multi_prompt( df_locals = {} for i, dataframe in enumerate(dfs): df_locals[f"df{i + 1}"] = dataframe - tools = [PythonAstREPLTool(locals=df_locals)] - + tools = [PythonAstREPLTool(locals=df_locals)] + list(extra_tools) prompt = ZeroShotAgent.create_prompt( - tools, prefix=prefix, suffix=suffix_to_use, input_variables=input_variables + tools, + prefix=prefix, + suffix=suffix_to_use, + input_variables=input_variables, ) - partial_prompt = prompt.partial() if "dfs_head" in input_variables: dfs_head = "\n\n".join([d.head(number_of_head_rows).to_markdown() for d in dfs]) @@ -77,7 +79,8 @@ def _get_single_prompt( input_variables: Optional[List[str]] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, -) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]: + extra_tools: Sequence[BaseTool] = (), +) -> Tuple[BasePromptTemplate, List[BaseTool]]: if suffix is not None: suffix_to_use = suffix include_df_head = True @@ -96,10 +99,13 @@ def _get_single_prompt( if prefix is None: prefix = PREFIX - tools = [PythonAstREPLTool(locals={"df": df})] + tools = [PythonAstREPLTool(locals={"df": df})] + list(extra_tools) prompt = ZeroShotAgent.create_prompt( - tools, prefix=prefix, suffix=suffix_to_use, input_variables=input_variables + tools, + prefix=prefix, + suffix=suffix_to_use, + input_variables=input_variables, ) partial_prompt = prompt.partial() @@ -117,7 +123,8 @@ def _get_prompt_and_tools( input_variables: Optional[List[str]] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, -) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]: + extra_tools: Sequence[BaseTool] = (), +) -> Tuple[BasePromptTemplate, List[BaseTool]]: try: import pandas as pd @@ -141,6 +148,7 @@ def _get_prompt_and_tools( input_variables=input_variables, include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, + extra_tools=extra_tools, ) else: if not isinstance(df, pd.DataFrame): @@ -152,6 +160,7 @@ def _get_prompt_and_tools( input_variables=input_variables, include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, + extra_tools=extra_tools, ) @@ -287,6 +296,7 @@ def create_pandas_dataframe_agent( ) -> AgentExecutor: """Construct a pandas agent from an LLM and dataframe.""" agent: BaseSingleActionAgent + base_tools: Sequence[BaseTool] if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION: prompt, base_tools = _get_prompt_and_tools( df, @@ -295,8 +305,9 @@ def create_pandas_dataframe_agent( input_variables=input_variables, include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, + extra_tools=extra_tools, ) - tools = base_tools + list(extra_tools) + tools = base_tools llm_chain = LLMChain( llm=llm, prompt=prompt, @@ -318,7 +329,7 @@ def create_pandas_dataframe_agent( include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, ) - tools = base_tools + list(extra_tools) + tools = list(base_tools) + list(extra_tools) agent = OpenAIFunctionsAgent( llm=llm, prompt=_prompt,