From 2213fc9711f944401d8bfeb386b3b7520677c8e7 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 4 Dec 2023 19:42:15 -0800 Subject: [PATCH 01/63] Harrison/bookend ai (#14258) Co-authored-by: stvhu-bookend <142813359+stvhu-bookend@users.noreply.github.com> --- .../integrations/text_embedding/bookend.ipynb | 89 ++++++++++++++++++ .../langchain/embeddings/__init__.py | 2 + .../langchain/langchain/embeddings/bookend.py | 91 +++++++++++++++++++ .../embeddings/test_bookend.py | 27 ++++++ .../unit_tests/embeddings/test_imports.py | 1 + 5 files changed, 210 insertions(+) create mode 100644 docs/docs/integrations/text_embedding/bookend.ipynb create mode 100644 libs/langchain/langchain/embeddings/bookend.py create mode 100644 libs/langchain/tests/integration_tests/embeddings/test_bookend.py diff --git a/docs/docs/integrations/text_embedding/bookend.ipynb b/docs/docs/integrations/text_embedding/bookend.ipynb new file mode 100644 index 0000000000000..3277d49a6c885 --- /dev/null +++ b/docs/docs/integrations/text_embedding/bookend.ipynb @@ -0,0 +1,89 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2c591a6a42ac7f0", + "metadata": {}, + "source": [ + "# Bookend AI\n", + "\n", + "Let's load the Bookend AI Embeddings class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d94c62b4", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings import BookendEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "523a09e3", + "metadata": {}, + "outputs": [], + "source": [ + "embeddings = BookendEmbeddings(\n", + " domain=\"your_domain\",\n", + " api_token=\"your_api_token\",\n", + " model_id=\"your_embeddings_model_id\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b212bd5a", + "metadata": {}, + "outputs": [], + "source": [ + "text = \"This is a test document.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57db66bd", + "metadata": {}, + "outputs": [], + "source": [ + "query_result = embeddings.embed_query(text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b790fd09", + "metadata": {}, + "outputs": [], + "source": [ + "doc_result = embeddings.embed_documents([text])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/embeddings/__init__.py b/libs/langchain/langchain/embeddings/__init__.py index 8f2887942562e..3710a6e1969fa 100644 --- a/libs/langchain/langchain/embeddings/__init__.py +++ b/libs/langchain/langchain/embeddings/__init__.py @@ -22,6 +22,7 @@ from langchain.embeddings.azure_openai import AzureOpenAIEmbeddings from langchain.embeddings.baidu_qianfan_endpoint import QianfanEmbeddingsEndpoint from langchain.embeddings.bedrock import BedrockEmbeddings +from langchain.embeddings.bookend import BookendEmbeddings from langchain.embeddings.cache import CacheBackedEmbeddings from langchain.embeddings.clarifai import ClarifaiEmbeddings from langchain.embeddings.cohere import CohereEmbeddings @@ -127,6 +128,7 @@ "QianfanEmbeddingsEndpoint", "JohnSnowLabsEmbeddings", "VoyageEmbeddings", + "BookendEmbeddings", ] diff --git a/libs/langchain/langchain/embeddings/bookend.py b/libs/langchain/langchain/embeddings/bookend.py new file mode 100644 index 0000000000000..0b1116c4ba947 --- /dev/null +++ b/libs/langchain/langchain/embeddings/bookend.py @@ -0,0 +1,91 @@ +"""Wrapper around Bookend AI embedding models.""" + +import json +from typing import Any, List + +import requests + +from langchain.pydantic_v1 import BaseModel, Field +from langchain.schema.embeddings import Embeddings + +API_URL = "https://api.bookend.ai/" +DEFAULT_TASK = "embeddings" +PATH = "/models/predict" + + +class BookendEmbeddings(BaseModel, Embeddings): + """Bookend AI sentence_transformers embedding models. + + Example: + .. code-block:: python + + from langchain.embeddings import BookendEmbeddings + + bookend = BookendEmbeddings( + domain={domain} + api_token={api_token} + model_id={model_id} + ) + bookend.embed_documents([ + "Please put on these earmuffs because I can't you hear.", + "Baby wipes are made of chocolate stardust.", + ]) + bookend.embed_query( + "She only paints with bold colors; she does not like pastels." + ) + """ + + domain: str + """Request for a domain at https://bookend.ai/ to use this embeddings module.""" + api_token: str + """Request for an API token at https://bookend.ai/ to use this embeddings module.""" + model_id: str + """Embeddings model ID to use.""" + auth_header: dict = Field(default_factory=dict) + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + self.auth_header = {"Authorization": "Basic {}".format(self.api_token)} + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Embed documents using a Bookend deployed embeddings model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + result = [] + headers = self.auth_header + headers["Content-Type"] = "application/json; charset=utf-8" + params = { + "model_id": self.model_id, + "task": DEFAULT_TASK, + } + + for text in texts: + data = json.dumps( + {"text": text, "question": None, "context": None, "instruction": None} + ) + r = requests.request( + "POST", + API_URL + self.domain + PATH, + headers=headers, + params=params, + data=data, + ) + result.append(r.json()[0]["data"]) + + return result + + def embed_query(self, text: str) -> List[float]: + """Embed a query using a Bookend deployed embeddings model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + return self.embed_documents([text])[0] diff --git a/libs/langchain/tests/integration_tests/embeddings/test_bookend.py b/libs/langchain/tests/integration_tests/embeddings/test_bookend.py new file mode 100644 index 0000000000000..940f67063802c --- /dev/null +++ b/libs/langchain/tests/integration_tests/embeddings/test_bookend.py @@ -0,0 +1,27 @@ +"""Test Bookend AI embeddings.""" +from langchain.embeddings.bookend import BookendEmbeddings + + +def test_bookend_embedding_documents() -> None: + """Test Bookend AI embeddings for documents.""" + documents = ["foo bar", "bar foo"] + embedding = BookendEmbeddings( + domain="", + api_token="", + model_id="", + ) + output = embedding.embed_documents(documents) + assert len(output) == 2 + assert len(output[0]) == 768 + + +def test_bookend_embedding_query() -> None: + """Test Bookend AI embeddings for query.""" + document = "foo bar" + embedding = BookendEmbeddings( + domain="", + api_token="", + model_id="", + ) + output = embedding.embed_query(document) + assert len(output) == 768 diff --git a/libs/langchain/tests/unit_tests/embeddings/test_imports.py b/libs/langchain/tests/unit_tests/embeddings/test_imports.py index 9de69602dc6a7..8fe5df0994a50 100644 --- a/libs/langchain/tests/unit_tests/embeddings/test_imports.py +++ b/libs/langchain/tests/unit_tests/embeddings/test_imports.py @@ -53,6 +53,7 @@ "QianfanEmbeddingsEndpoint", "JohnSnowLabsEmbeddings", "VoyageEmbeddings", + "BookendEmbeddings", ] From fcc8e5e83909a2474488e1932d1043591cb57af1 Mon Sep 17 00:00:00 2001 From: Hamza Ahmed <29238429+Hamzahmed@users.noreply.github.com> Date: Mon, 4 Dec 2023 22:44:30 -0500 Subject: [PATCH 02/63] Update geodataframe.py (#13573) here it is validating shapely.geometry.point.Point: if not isinstance(data_frame[page_content_column].iloc[0], gpd.GeoSeries): raise ValueError( f"Expected data_frame[{page_content_column}] to be a GeoSeries" you need it to validate the geoSeries and not the shapely.geometry.point.Point if not isinstance(data_frame[page_content_column], gpd.GeoSeries): raise ValueError( f"Expected data_frame[{page_content_column}] to be a GeoSeries" --- libs/langchain/langchain/document_loaders/geodataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain/langchain/document_loaders/geodataframe.py b/libs/langchain/langchain/document_loaders/geodataframe.py index cd988b7c67788..afb03ea10443f 100644 --- a/libs/langchain/langchain/document_loaders/geodataframe.py +++ b/libs/langchain/langchain/document_loaders/geodataframe.py @@ -35,7 +35,7 @@ def __init__(self, data_frame: Any, page_content_column: str = "geometry"): f"Expected data_frame to have a column named {page_content_column}" ) - if not isinstance(data_frame[page_content_column].iloc[0], gpd.GeoSeries): + if not isinstance(data_frame[page_content_column], gpd.GeoSeries): raise ValueError( f"Expected data_frame[{page_content_column}] to be a GeoSeries" ) From e042e5df35288fb2809d9947c9e1ca8c3266cd12 Mon Sep 17 00:00:00 2001 From: Jon Watte Date: Mon, 4 Dec 2023 19:44:50 -0800 Subject: [PATCH 03/63] fix: call _on_llm_error() (#13581) Description: There's a copy-paste typo where on_llm_error() calls _on_chain_error() instead of _on_llm_error(). Issue: #13580 Dependencies: None Tag maintainer: @hwchase17 Twitter handle: @jwatte "Run `make format`, `make lint` and `make test` to check this locally." The test scripts don't work in a plain Ubuntu LTS 20.04 system. It looks like the dev container pulling is stuck. Or maybe the internet is just ornery today. --------- Co-authored-by: jwatte Co-authored-by: Harrison Chase --- libs/core/langchain_core/tracers/base.py | 2 +- .../callbacks/tracers/test_base_tracer.py | 36 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/libs/core/langchain_core/tracers/base.py b/libs/core/langchain_core/tracers/base.py index db0301b2a1a5f..ddf9e92e2a6f2 100644 --- a/libs/core/langchain_core/tracers/base.py +++ b/libs/core/langchain_core/tracers/base.py @@ -224,7 +224,7 @@ def on_llm_error( llm_run.end_time = datetime.utcnow() llm_run.events.append({"name": "error", "time": llm_run.end_time}) self._end_trace(llm_run) - self._on_chain_error(llm_run) + self._on_llm_error(llm_run) return llm_run def on_chain_start( diff --git a/libs/langchain/tests/unit_tests/callbacks/tracers/test_base_tracer.py b/libs/langchain/tests/unit_tests/callbacks/tracers/test_base_tracer.py index f658abe260854..94be5295c2f3f 100644 --- a/libs/langchain/tests/unit_tests/callbacks/tracers/test_base_tracer.py +++ b/libs/langchain/tests/unit_tests/callbacks/tracers/test_base_tracer.py @@ -332,6 +332,42 @@ def test_tracer_llm_run_on_error() -> None: assert tracer.runs == [compare_run] +@freeze_time("2023-01-01") +def test_tracer_llm_run_on_error_callback() -> None: + """Test tracer on an LLM run with an error and a callback.""" + exception = Exception("test") + uuid = uuid4() + + compare_run = Run( + id=str(uuid), + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + events=[ + {"name": "start", "time": datetime.utcnow()}, + {"name": "error", "time": datetime.utcnow()}, + ], + extra={}, + execution_order=1, + child_execution_order=1, + serialized=SERIALIZED, + inputs=dict(prompts=[]), + outputs=None, + error=repr(exception), + run_type="llm", + ) + + class FakeTracerWithLlmErrorCallback(FakeTracer): + error_run = None + + def _on_llm_error(self, run: Run) -> None: + self.error_run = run + + tracer = FakeTracerWithLlmErrorCallback() + tracer.on_llm_start(serialized=SERIALIZED, prompts=[], run_id=uuid) + tracer.on_llm_error(exception, run_id=uuid) + assert tracer.error_run == compare_run + + @freeze_time("2023-01-01") def test_tracer_chain_run_on_error() -> None: """Test tracer on a Chain run with an error.""" From b49104c2c9a2adfd49d442dff5b64c3bf3afc79f Mon Sep 17 00:00:00 2001 From: Nolan Date: Mon, 4 Dec 2023 19:53:27 -0800 Subject: [PATCH 04/63] Add missing doc key to metadata field in AzureSearch Vectorstore (#13328) - **Description:** Adds doc key to metadata field when adding document to Azure Search. - **Issue:** -, - **Dependencies:** -, - **Tag maintainer:** @eyurtsev, - **Twitter handle:** @finnless Right now the document key with the name FIELDS_ID is not included in the FIELDS_METADATA field, and therefore is not included in the Document returned from a query. This is really annoying if you want to be able to modify that item in the vectorstore. Other's thoughts on this are welcome. --- .../langchain/vectorstores/azuresearch.py | 43 +++++++++++++++---- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/libs/langchain/langchain/vectorstores/azuresearch.py b/libs/langchain/langchain/vectorstores/azuresearch.py index 73c96f738ae7c..2cd1f9546de72 100644 --- a/libs/langchain/langchain/vectorstores/azuresearch.py +++ b/libs/langchain/langchain/vectorstores/azuresearch.py @@ -391,10 +391,21 @@ def vector_search_with_score( ( Document( page_content=result.pop(FIELDS_CONTENT), - metadata=json.loads(result[FIELDS_METADATA]) - if FIELDS_METADATA in result - else { - k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR + metadata={ + **( + {FIELDS_ID: result.pop(FIELDS_ID)} + if FIELDS_ID in result + else {} + ), + **( + json.loads(result[FIELDS_METADATA]) + if FIELDS_METADATA in result + else { + k: v + for k, v in result.items() + if k != FIELDS_CONTENT_VECTOR + } + ), }, ), float(result["@search.score"]), @@ -452,10 +463,21 @@ def hybrid_search_with_score( ( Document( page_content=result.pop(FIELDS_CONTENT), - metadata=json.loads(result[FIELDS_METADATA]) - if FIELDS_METADATA in result - else { - k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR + metadata={ + **( + {FIELDS_ID: result.pop(FIELDS_ID)} + if FIELDS_ID in result + else {} + ), + **( + json.loads(result[FIELDS_METADATA]) + if FIELDS_METADATA in result + else { + k: v + for k, v in result.items() + if k != FIELDS_CONTENT_VECTOR + } + ), }, ), float(result["@search.score"]), @@ -547,6 +569,11 @@ def semantic_hybrid_search_with_score_and_rerank( Document( page_content=result.pop(FIELDS_CONTENT), metadata={ + **( + {FIELDS_ID: result.pop(FIELDS_ID)} + if FIELDS_ID in result + else {} + ), **( json.loads(result[FIELDS_METADATA]) if FIELDS_METADATA in result From 956d55de2b434c62ff28175d4b234e5f698fbf82 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 4 Dec 2023 20:08:41 -0800 Subject: [PATCH 05/63] docs[patch]: chat model page names (#14264) --- docs/docs/integrations/chat/anthropic.ipynb | 12 +++- docs/docs/integrations/chat/anyscale.ipynb | 14 +++- .../integrations/chat/azure_chat_openai.ipynb | 12 +++- .../chat/azureml_chat_endpoint.ipynb | 11 ++- docs/docs/integrations/chat/baichuan.ipynb | 59 +++++++++------- .../chat/baidu_qianfan_endpoint.ipynb | 11 ++- docs/docs/integrations/chat/bedrock.ipynb | 12 +++- docs/docs/integrations/chat/cohere.ipynb | 12 +++- docs/docs/integrations/chat/ernie.ipynb | 11 ++- docs/docs/integrations/chat/everlyai.ipynb | 12 +++- docs/docs/integrations/chat/fireworks.ipynb | 12 +++- .../chat/google_vertex_ai_palm.ipynb | 11 ++- docs/docs/integrations/chat/hunyuan.ipynb | 69 +++++++++++-------- docs/docs/integrations/chat/konko.ipynb | 11 ++- docs/docs/integrations/chat/litellm.ipynb | 12 +++- docs/docs/integrations/chat/llama2_chat.ipynb | 14 +++- docs/docs/integrations/chat/llama_api.ipynb | 12 +++- docs/docs/integrations/chat/minimax.ipynb | 11 ++- docs/docs/integrations/chat/ollama.ipynb | 11 ++- .../integrations/chat/ollama_functions.ipynb | 11 ++- docs/docs/integrations/chat/openai.ipynb | 12 +++- .../chat/pai_eas_chat_endpoint.ipynb | 11 ++- .../chat/promptlayer_chatopenai.ipynb | 12 +++- docs/docs/integrations/chat/tongyi.ipynb | 13 +++- docs/docs/integrations/chat/vllm.ipynb | 10 +++ .../integrations/chat/volcengine_maas.ipynb | 20 +++++- docs/docs/integrations/chat/yandex.ipynb | 12 +++- 27 files changed, 350 insertions(+), 80 deletions(-) diff --git a/docs/docs/integrations/chat/anthropic.ipynb b/docs/docs/integrations/chat/anthropic.ipynb index 00ad314f45dc0..5c99085bb933b 100644 --- a/docs/docs/integrations/chat/anthropic.ipynb +++ b/docs/docs/integrations/chat/anthropic.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "a016701c", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Anthropic\n", + "---" + ] + }, { "cell_type": "markdown", "id": "bf733a38-db84-4363-89e2-de6735c37230", "metadata": {}, "source": [ - "# Anthropic\n", + "# ChatAnthropic\n", "\n", "This notebook covers how to get started with Anthropic chat models." ] diff --git a/docs/docs/integrations/chat/anyscale.ipynb b/docs/docs/integrations/chat/anyscale.ipynb index 674549a6568c8..3d2e9e80cba4d 100644 --- a/docs/docs/integrations/chat/anyscale.ipynb +++ b/docs/docs/integrations/chat/anyscale.ipynb @@ -1,12 +1,22 @@ { "cells": [ + { + "cell_type": "raw", + "id": "31895fc4", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Anyscale\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "id": "642fd21c-600a-47a1-be96-6e1438b421a9", "metadata": {}, "source": [ - "# Anyscale\n", + "# ChatAnyscale\n", "\n", "This notebook demonstrates the use of `langchain.chat_models.ChatAnyscale` for [Anyscale Endpoints](https://endpoints.anyscale.com/).\n", "\n", @@ -33,7 +43,7 @@ "metadata": {}, "outputs": [ { - "name": "stdin", + "name": "stdout", "output_type": "stream", "text": [ " ········\n" diff --git a/docs/docs/integrations/chat/azure_chat_openai.ipynb b/docs/docs/integrations/chat/azure_chat_openai.ipynb index b4568ca2fd0aa..a6bee8f49cb39 100644 --- a/docs/docs/integrations/chat/azure_chat_openai.ipynb +++ b/docs/docs/integrations/chat/azure_chat_openai.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "641f8cb0", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Azure OpenAI\n", + "---" + ] + }, { "cell_type": "markdown", "id": "38f26d7a", "metadata": {}, "source": [ - "# Azure OpenAI\n", + "# AzureChatOpenAI\n", "\n", ">[Azure OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview) provides REST API access to OpenAI's powerful language models including the GPT-4, GPT-3.5-Turbo, and Embeddings model series. These models can be easily adapted to your specific task including but not limited to content generation, summarization, semantic search, and natural language to code translation. Users can access the service through REST APIs, Python SDK, or a web-based interface in the Azure OpenAI Studio.\n", "\n", diff --git a/docs/docs/integrations/chat/azureml_chat_endpoint.ipynb b/docs/docs/integrations/chat/azureml_chat_endpoint.ipynb index 4444f7fdf672e..2e0a09aff45a9 100644 --- a/docs/docs/integrations/chat/azureml_chat_endpoint.ipynb +++ b/docs/docs/integrations/chat/azureml_chat_endpoint.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Azure ML Endpoint\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Azure ML Endpoint\n", + "# AzureMLChatOnlineEndpoint\n", "\n", ">[Azure Machine Learning](https://azure.microsoft.com/en-us/products/machine-learning/) is a platform used to build, train, and deploy machine learning models. Users can explore the types of models to deploy in the Model Catalog, which provides Azure Foundation Models and OpenAI Models. `Azure Foundation Models` include various open-source models and popular Hugging Face models. Users can also import models of their liking into AzureML.\n", ">\n", diff --git a/docs/docs/integrations/chat/baichuan.ipynb b/docs/docs/integrations/chat/baichuan.ipynb index 9f9376beeeaf8..462a6aa2bc0e8 100644 --- a/docs/docs/integrations/chat/baichuan.ipynb +++ b/docs/docs/integrations/chat/baichuan.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Baichuan Chat\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Baichuan Chat\n", + "# ChatBaichuan\n", "\n", "Baichuan chat models API by Baichuan Intelligent Technology. For more information, see [https://platform.baichuan-ai.com/docs/api](https://platform.baichuan-ai.com/docs/api)" ] @@ -63,7 +72,9 @@ "outputs": [ { "data": { - "text/plain": "AIMessage(content='首先,我们需要确定闰年的二月有多少天。闰年的二月有29天。\\n\\n然后,我们可以计算你的月薪:\\n\\n日薪 = 月薪 / (当月天数)\\n\\n所以,你的月薪 = 日薪 * 当月天数\\n\\n将数值代入公式:\\n\\n月薪 = 8元/天 * 29天 = 232元\\n\\n因此,你在闰年的二月的月薪是232元。')" + "text/plain": [ + "AIMessage(content='首先,我们需要确定闰年的二月有多少天。闰年的二月有29天。\\n\\n然后,我们可以计算你的月薪:\\n\\n日薪 = 月薪 / (当月天数)\\n\\n所以,你的月薪 = 日薪 * 当月天数\\n\\n将数值代入公式:\\n\\n月薪 = 8元/天 * 29天 = 232元\\n\\n因此,你在闰年的二月的月薪是232元。')" + ] }, "execution_count": 3, "metadata": {}, @@ -76,16 +87,23 @@ }, { "cell_type": "markdown", - "source": [ - "## For ChatBaichuan with Streaming" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## For ChatBaichuan with Streaming" + ] }, { "cell_type": "code", "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-17T15:14:25.870044Z", + "start_time": "2023-10-17T15:14:25.863381Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "chat = ChatBaichuan(\n", @@ -93,22 +111,24 @@ " baichuan_secret_key=\"YOUR_SECRET_KEY\",\n", " streaming=True,\n", ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-17T15:14:25.870044Z", - "start_time": "2023-10-17T15:14:25.863381Z" - } - } + ] }, { "cell_type": "code", "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-17T15:14:27.153546Z", + "start_time": "2023-10-17T15:14:25.868470Z" + }, + "collapsed": false + }, "outputs": [ { "data": { - "text/plain": "AIMessageChunk(content='首先,我们需要确定闰年的二月有多少天。闰年的二月有29天。\\n\\n然后,我们可以计算你的月薪:\\n\\n日薪 = 月薪 / (当月天数)\\n\\n所以,你的月薪 = 日薪 * 当月天数\\n\\n将数值代入公式:\\n\\n月薪 = 8元/天 * 29天 = 232元\\n\\n因此,你在闰年的二月的月薪是232元。')" + "text/plain": [ + "AIMessageChunk(content='首先,我们需要确定闰年的二月有多少天。闰年的二月有29天。\\n\\n然后,我们可以计算你的月薪:\\n\\n日薪 = 月薪 / (当月天数)\\n\\n所以,你的月薪 = 日薪 * 当月天数\\n\\n将数值代入公式:\\n\\n月薪 = 8元/天 * 29天 = 232元\\n\\n因此,你在闰年的二月的月薪是232元。')" + ] }, "execution_count": 6, "metadata": {}, @@ -117,14 +137,7 @@ ], "source": [ "chat([HumanMessage(content=\"我日薪8块钱,请问在闰年的二月,我月薪多少\")])" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-17T15:14:27.153546Z", - "start_time": "2023-10-17T15:14:25.868470Z" - } - } + ] } ], "metadata": { diff --git a/docs/docs/integrations/chat/baidu_qianfan_endpoint.ipynb b/docs/docs/integrations/chat/baidu_qianfan_endpoint.ipynb index 57749548a8b1a..65f7826815686 100644 --- a/docs/docs/integrations/chat/baidu_qianfan_endpoint.ipynb +++ b/docs/docs/integrations/chat/baidu_qianfan_endpoint.ipynb @@ -1,11 +1,20 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Baidu Qianfan\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "# Baidu Qianfan\n", + "# QianfanChatEndpoint\n", "\n", "Baidu AI Cloud Qianfan Platform is a one-stop large model development and service operation platform for enterprise developers. Qianfan not only provides including the model of Wenxin Yiyan (ERNIE-Bot) and the third-party open-source models, but also provides various AI development tools and the whole set of development environment, which facilitates customers to use and develop large model applications easily.\n", "\n", diff --git a/docs/docs/integrations/chat/bedrock.ipynb b/docs/docs/integrations/chat/bedrock.ipynb index 02dfb5b9fbde7..3957c9c1e4645 100644 --- a/docs/docs/integrations/chat/bedrock.ipynb +++ b/docs/docs/integrations/chat/bedrock.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "fbc66410", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Bedrock Chat\n", + "---" + ] + }, { "cell_type": "markdown", "id": "bf733a38-db84-4363-89e2-de6735c37230", "metadata": {}, "source": [ - "# Bedrock Chat\n", + "# BedrockChat\n", "\n", ">[Amazon Bedrock](https://aws.amazon.com/bedrock/) is a fully managed service that offers a choice of \n", "> high-performing foundation models (FMs) from leading AI companies like `AI21 Labs`, `Anthropic`, `Cohere`, \n", diff --git a/docs/docs/integrations/chat/cohere.ipynb b/docs/docs/integrations/chat/cohere.ipynb index e9b90af3d215c..8f05b1c66739c 100644 --- a/docs/docs/integrations/chat/cohere.ipynb +++ b/docs/docs/integrations/chat/cohere.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "53fbf15f", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Cohere\n", + "---" + ] + }, { "cell_type": "markdown", "id": "bf733a38-db84-4363-89e2-de6735c37230", "metadata": {}, "source": [ - "# Cohere\n", + "# ChatCohere\n", "\n", "This notebook covers how to get started with Cohere chat models." ] diff --git a/docs/docs/integrations/chat/ernie.ipynb b/docs/docs/integrations/chat/ernie.ipynb index bcd28fd9cfb09..d98fcdb592f3f 100644 --- a/docs/docs/integrations/chat/ernie.ipynb +++ b/docs/docs/integrations/chat/ernie.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Ernie Bot Chat\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# ERNIE-Bot Chat\n", + "# ErnieBotChat\n", "\n", "[ERNIE-Bot](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/jlil56u11) is a large language model developed by Baidu, covering a huge amount of Chinese data.\n", "This notebook covers how to get started with ErnieBot chat models.\n", diff --git a/docs/docs/integrations/chat/everlyai.ipynb b/docs/docs/integrations/chat/everlyai.ipynb index 3310f8f21397b..3f18b36f7e6e3 100644 --- a/docs/docs/integrations/chat/everlyai.ipynb +++ b/docs/docs/integrations/chat/everlyai.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "5e45f35c", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: EverlyAI\n", + "---" + ] + }, { "cell_type": "markdown", "id": "642fd21c-600a-47a1-be96-6e1438b421a9", "metadata": {}, "source": [ - "# EverlyAI\n", + "# ChatEverlyAI\n", "\n", ">[EverlyAI](https://everlyai.xyz) allows you to run your ML models at scale in the cloud. It also provides API access to [several LLM models](https://everlyai.xyz).\n", "\n", diff --git a/docs/docs/integrations/chat/fireworks.ipynb b/docs/docs/integrations/chat/fireworks.ipynb index 6a5b0ad01d992..a0a3932bc5ef5 100644 --- a/docs/docs/integrations/chat/fireworks.ipynb +++ b/docs/docs/integrations/chat/fireworks.ipynb @@ -1,12 +1,22 @@ { "cells": [ + { + "cell_type": "raw", + "id": "529aeba9", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Fireworks\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "id": "642fd21c-600a-47a1-be96-6e1438b421a9", "metadata": {}, "source": [ - "# Fireworks\n", + "# ChatFireworks\n", "\n", ">[Fireworks](https://app.fireworks.ai/) accelerates product development on generative AI by creating an innovative AI experiment and production platform. \n", "\n", diff --git a/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb b/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb index af44d316e90a6..436e2fd14246e 100644 --- a/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb +++ b/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb @@ -1,11 +1,20 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Google Cloud Vertex AI\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "# Google Cloud Vertex AI \n", + "# ChatVertexAI\n", "\n", "Note: This is separate from the Google PaLM integration. Google has chosen to offer an enterprise version of PaLM through GCP, and this supports the models made available through there. \n", "\n", diff --git a/docs/docs/integrations/chat/hunyuan.ipynb b/docs/docs/integrations/chat/hunyuan.ipynb index 20779607dc56b..2cb334bfb937a 100644 --- a/docs/docs/integrations/chat/hunyuan.ipynb +++ b/docs/docs/integrations/chat/hunyuan.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Tencent Hunyuan\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Tencent Hunyuan\n", + "# ChatHunyuan\n", "\n", "Hunyuan chat model API by Tencent. For more information, see [https://cloud.tencent.com/document/product/1729](https://cloud.tencent.com/document/product/1729)" ] @@ -54,7 +63,9 @@ "outputs": [ { "data": { - "text/plain": "AIMessage(content=\"J'aime programmer.\")" + "text/plain": [ + "AIMessage(content=\"J'aime programmer.\")" + ] }, "execution_count": 3, "metadata": {}, @@ -73,16 +84,23 @@ }, { "cell_type": "markdown", - "source": [ - "## For ChatHunyuan with Streaming" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## For ChatHunyuan with Streaming" + ] }, { "cell_type": "code", "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-19T10:20:41.507720Z", + "start_time": "2023-10-19T10:20:41.496456Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "chat = ChatHunyuan(\n", @@ -91,22 +109,24 @@ " hunyuan_secret_key=\"YOUR_SECRET_KEY\",\n", " streaming=True,\n", ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-19T10:20:41.507720Z", - "start_time": "2023-10-19T10:20:41.496456Z" - } - } + ] }, { "cell_type": "code", "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-19T10:20:46.275673Z", + "start_time": "2023-10-19T10:20:44.241097Z" + }, + "collapsed": false + }, "outputs": [ { "data": { - "text/plain": "AIMessageChunk(content=\"J'aime programmer.\")" + "text/plain": [ + "AIMessageChunk(content=\"J'aime programmer.\")" + ] }, "execution_count": 3, "metadata": {}, @@ -121,26 +141,19 @@ " )\n", " ]\n", ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-19T10:20:46.275673Z", - "start_time": "2023-10-19T10:20:44.241097Z" - } - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [], "metadata": { - "collapsed": false, "ExecuteTime": { "start_time": "2023-10-19T10:19:56.233477Z" - } - } + }, + "collapsed": false + }, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/docs/docs/integrations/chat/konko.ipynb b/docs/docs/integrations/chat/konko.ipynb index 2250a242e55b3..6e4e19bf3dc80 100644 --- a/docs/docs/integrations/chat/konko.ipynb +++ b/docs/docs/integrations/chat/konko.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Konko\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Konko\n", + "# ChatKonko\n", "\n", ">[Konko](https://www.konko.ai/) API is a fully managed Web API designed to help application developers:\n", "\n", diff --git a/docs/docs/integrations/chat/litellm.ipynb b/docs/docs/integrations/chat/litellm.ipynb index bd3c8ef2823c2..a93d595bfbdbe 100644 --- a/docs/docs/integrations/chat/litellm.ipynb +++ b/docs/docs/integrations/chat/litellm.ipynb @@ -1,12 +1,22 @@ { "cells": [ + { + "cell_type": "raw", + "id": "59148044", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: LiteLLM\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "id": "bf733a38-db84-4363-89e2-de6735c37230", "metadata": {}, "source": [ - "# 🚅 LiteLLM\n", + "# ChatLiteLLM\n", "\n", "[LiteLLM](https://github.com/BerriAI/litellm) is a library that simplifies calling Anthropic, Azure, Huggingface, Replicate, etc. \n", "\n", diff --git a/docs/docs/integrations/chat/llama2_chat.ipynb b/docs/docs/integrations/chat/llama2_chat.ipynb index 48493973f685e..98cce09dfa0dd 100644 --- a/docs/docs/integrations/chat/llama2_chat.ipynb +++ b/docs/docs/integrations/chat/llama2_chat.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "7320f16b", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Llama 2 Chat\n", + "---" + ] + }, { "cell_type": "markdown", "id": "90a1faf2", "metadata": {}, "source": [ - "# Llama-2 Chat\n", + "# Llama2Chat\n", "\n", "This notebook shows how to augment Llama-2 `LLM`s with the `Llama2Chat` wrapper to support the [Llama-2 chat prompt format](https://huggingface.co/blog/llama2#how-to-prompt-llama-2). Several `LLM` implementations in LangChain can be used as interface to Llama-2 chat models. These include [HuggingFaceTextGenInference](https://python.langchain.com/docs/integrations/llms/huggingface_textgen_inference), [LlamaCpp](https://python.langchain.com/docs/use_cases/question_answering/how_to/local_retrieval_qa), [GPT4All](https://python.langchain.com/docs/integrations/llms/gpt4all), ..., to mention a few examples. \n", "\n", @@ -721,7 +731,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/docs/integrations/chat/llama_api.ipynb b/docs/docs/integrations/chat/llama_api.ipynb index 329904a6bc3e8..e75cd5b4b4292 100644 --- a/docs/docs/integrations/chat/llama_api.ipynb +++ b/docs/docs/integrations/chat/llama_api.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "71b5cfca", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Llama API\n", + "---" + ] + }, { "cell_type": "markdown", "id": "90a1faf2", "metadata": {}, "source": [ - "# Llama API\n", + "# ChatLlamaAPI\n", "\n", "This notebook shows how to use LangChain with [LlamaAPI](https://llama-api.com/) - a hosted version of Llama2 that adds in support for function calling." ] diff --git a/docs/docs/integrations/chat/minimax.ipynb b/docs/docs/integrations/chat/minimax.ipynb index 8b4d683d0fe84..e10eeb0d2a74d 100644 --- a/docs/docs/integrations/chat/minimax.ipynb +++ b/docs/docs/integrations/chat/minimax.ipynb @@ -1,11 +1,20 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: MiniMax\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "# MiniMax\n", + "# MiniMaxChat\n", "\n", "[Minimax](https://api.minimax.chat) is a Chinese startup that provides LLM service for companies and individuals.\n", "\n", diff --git a/docs/docs/integrations/chat/ollama.ipynb b/docs/docs/integrations/chat/ollama.ipynb index 7fb4b2984cacf..7f069112e6ea1 100644 --- a/docs/docs/integrations/chat/ollama.ipynb +++ b/docs/docs/integrations/chat/ollama.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Ollama\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Ollama\n", + "# ChatOllama\n", "\n", "[Ollama](https://ollama.ai/) allows you to run open-source large language models, such as LLaMA2, locally.\n", "\n", diff --git a/docs/docs/integrations/chat/ollama_functions.ipynb b/docs/docs/integrations/chat/ollama_functions.ipynb index a4f365bf3ae3f..707b8d74ccaff 100644 --- a/docs/docs/integrations/chat/ollama_functions.ipynb +++ b/docs/docs/integrations/chat/ollama_functions.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Ollama Functions\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Ollama Functions\n", + "# OllamaFunctions\n", "\n", "This notebook shows how to use an experimental wrapper around Ollama that gives it the same API as OpenAI Functions.\n", "\n", diff --git a/docs/docs/integrations/chat/openai.ipynb b/docs/docs/integrations/chat/openai.ipynb index 4c66ab76570cc..5fa123d0c98dd 100644 --- a/docs/docs/integrations/chat/openai.ipynb +++ b/docs/docs/integrations/chat/openai.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: OpenAI\n", + "---" + ] + }, { "cell_type": "markdown", "id": "e49f1e0d", "metadata": {}, "source": [ - "# OpenAI\n", + "# ChatOpenAI\n", "\n", "This notebook covers how to get started with OpenAI chat models." ] diff --git a/docs/docs/integrations/chat/pai_eas_chat_endpoint.ipynb b/docs/docs/integrations/chat/pai_eas_chat_endpoint.ipynb index 55bde5e5311fe..395d64775f66e 100644 --- a/docs/docs/integrations/chat/pai_eas_chat_endpoint.ipynb +++ b/docs/docs/integrations/chat/pai_eas_chat_endpoint.ipynb @@ -1,10 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: AliCloud PAI EAS\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# AliCloud PAI EAS\n", + "# PaiEasChatEndpoint\n", "Machine Learning Platform for AI of Alibaba Cloud is a machine learning or deep learning engineering platform intended for enterprises and developers. It provides easy-to-use, cost-effective, high-performance, and easy-to-scale plug-ins that can be applied to various industry scenarios. With over 140 built-in optimization algorithms, Machine Learning Platform for AI provides whole-process AI engineering capabilities including data labeling (PAI-iTAG), model building (PAI-Designer and PAI-DSW), model training (PAI-DLC), compilation optimization, and inference deployment (PAI-EAS). PAI-EAS supports different types of hardware resources, including CPUs and GPUs, and features high throughput and low latency. It allows you to deploy large-scale complex models with a few clicks and perform elastic scale-ins and scale-outs in real time. It also provides a comprehensive O&M and monitoring system." ] }, diff --git a/docs/docs/integrations/chat/promptlayer_chatopenai.ipynb b/docs/docs/integrations/chat/promptlayer_chatopenai.ipynb index 4b20a5852da6a..623bfbe1ae296 100644 --- a/docs/docs/integrations/chat/promptlayer_chatopenai.ipynb +++ b/docs/docs/integrations/chat/promptlayer_chatopenai.ipynb @@ -1,12 +1,22 @@ { "cells": [ + { + "cell_type": "raw", + "id": "ce3672d3", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: PromptLayer ChatOpenAI\n", + "---" + ] + }, { "attachments": {}, "cell_type": "markdown", "id": "959300d4", "metadata": {}, "source": [ - "# PromptLayer ChatOpenAI\n", + "# PromptLayerChatOpenAI\n", "\n", "This example showcases how to connect to [PromptLayer](https://www.promptlayer.com) to start recording your ChatOpenAI requests." ] diff --git a/docs/docs/integrations/chat/tongyi.ipynb b/docs/docs/integrations/chat/tongyi.ipynb index f3c64f283065d..3de68b1e5ce13 100644 --- a/docs/docs/integrations/chat/tongyi.ipynb +++ b/docs/docs/integrations/chat/tongyi.ipynb @@ -1,5 +1,14 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Tongyi Qwen\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": { @@ -9,7 +18,7 @@ } }, "source": [ - "# Tongyi Qwen\n", + "# ChatTongyi\n", "Tongyi Qwen is a large language model developed by Alibaba's Damo Academy. It is capable of understanding user intent through natural language understanding and semantic analysis, based on user input in natural language. It provides services and assistance to users in different domains and tasks. By providing clear and detailed instructions, you can obtain results that better align with your expectations.\n", "In this notebook, we will introduce how to use langchain with [Tongyi](https://www.aliyun.com/product/dashscope) mainly in `Chat` corresponding\n", " to the package `langchain/chat_models` in langchain" @@ -41,7 +50,7 @@ }, "outputs": [ { - "name": "stdin", + "name": "stdout", "output_type": "stream", "text": [ " ········\n" diff --git a/docs/docs/integrations/chat/vllm.ipynb b/docs/docs/integrations/chat/vllm.ipynb index 5cc825d6d8df0..11023a201b037 100644 --- a/docs/docs/integrations/chat/vllm.ipynb +++ b/docs/docs/integrations/chat/vllm.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "raw", + "id": "eb65deaa", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: vLLM Chat\n", + "---" + ] + }, { "cell_type": "markdown", "id": "eb7e5679-aa06-47e4-a1a3-b6b70e604017", diff --git a/docs/docs/integrations/chat/volcengine_maas.ipynb b/docs/docs/integrations/chat/volcengine_maas.ipynb index 32dd0c16d0964..e7c39c6b6f65d 100644 --- a/docs/docs/integrations/chat/volcengine_maas.ipynb +++ b/docs/docs/integrations/chat/volcengine_maas.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "raw", + "id": "66107bdd", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Volc Enging Maas\n", + "---" + ] + }, { "cell_type": "markdown", "id": "404758628c7b20f6", @@ -7,7 +17,7 @@ "collapsed": false }, "source": [ - "# Volc Engine Maas\n", + "# VolcEngineMaasChat\n", "\n", "This notebook provides you with a guide on how to get started with volc engine maas chat models." ] @@ -86,7 +96,9 @@ "outputs": [ { "data": { - "text/plain": "AIMessage(content='好的,这是一个笑话:\\n\\n为什么鸟儿不会玩电脑游戏?\\n\\n因为它们没有翅膀!')" + "text/plain": [ + "AIMessage(content='好的,这是一个笑话:\\n\\n为什么鸟儿不会玩电脑游戏?\\n\\n因为它们没有翅膀!')" + ] }, "execution_count": 26, "metadata": {}, @@ -141,7 +153,9 @@ "outputs": [ { "data": { - "text/plain": "AIMessage(content='好的,这是一个笑话:\\n\\n三岁的女儿说她会造句了,妈妈让她用“年轻”造句,女儿说:“妈妈减肥,一年轻了好几斤”。')" + "text/plain": [ + "AIMessage(content='好的,这是一个笑话:\\n\\n三岁的女儿说她会造句了,妈妈让她用“年轻”造句,女儿说:“妈妈减肥,一年轻了好几斤”。')" + ] }, "execution_count": 28, "metadata": {}, diff --git a/docs/docs/integrations/chat/yandex.ipynb b/docs/docs/integrations/chat/yandex.ipynb index 598c9379562aa..0e1ced9b6397a 100644 --- a/docs/docs/integrations/chat/yandex.ipynb +++ b/docs/docs/integrations/chat/yandex.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "b4154fbe", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: YandexGPT\n", + "---" + ] + }, { "cell_type": "markdown", "id": "af63c9db-e4bd-4d3b-a4d7-7927f5541734", "metadata": {}, "source": [ - "# YandexGPT\n", + "# ChatYandexGPT\n", "\n", "This notebook goes over how to use Langchain with [YandexGPT](https://cloud.yandex.com/en/services/yandexgpt) chat model.\n", "\n", From 8eab4d95c078ba4b2d2f1ac0d1cbd7283f7bf4ff Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 4 Dec 2023 20:18:15 -0800 Subject: [PATCH 06/63] Harrison/delegate from template (#14266) Co-authored-by: M.R. Sopacua <144725145+msopacua@users.noreply.github.com> --- libs/core/langchain_core/prompts/prompt.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/libs/core/langchain_core/prompts/prompt.py b/libs/core/langchain_core/prompts/prompt.py index 012b926953701..566cf02bd2115 100644 --- a/libs/core/langchain_core/prompts/prompt.py +++ b/libs/core/langchain_core/prompts/prompt.py @@ -1,6 +1,7 @@ """Prompt schema definition.""" from __future__ import annotations +import warnings from pathlib import Path from typing import Any, Dict, List, Literal, Optional, Union @@ -176,21 +177,30 @@ def from_examples( @classmethod def from_file( - cls, template_file: Union[str, Path], input_variables: List[str], **kwargs: Any + cls, + template_file: Union[str, Path], + input_variables: Optional[List[str]] = None, + **kwargs: Any, ) -> PromptTemplate: """Load a prompt from a file. Args: template_file: The path to the file containing the prompt template. - input_variables: A list of variable names the final prompt template - will expect. + input_variables: [DEPRECATED] A list of variable names the final prompt + template will expect. + + input_variables is ignored as from_file now delegates to from_template(). Returns: The prompt loaded from the file. """ with open(str(template_file), "r") as f: template = f.read() - return cls(input_variables=input_variables, template=template, **kwargs) + if input_variables: + warnings.warn( + "`input_variables' is deprecated and ignored.", DeprecationWarning + ) + return cls.from_template(template=template, **kwargs) @classmethod def from_template( From abbba6c7d831b4d7fc6ad7e091c579815c122dd9 Mon Sep 17 00:00:00 2001 From: Arnaud Gelas Date: Tue, 5 Dec 2023 05:27:22 +0100 Subject: [PATCH 07/63] openapi/planner.py: Deal with json in markdown output cases (#13576) - **Description:** In openapi/planner deal with json in markdown output cases - **Issue:** In some cases LLMs could return json in markdown which can't be loaded. - **Dependencies:** - **Tag maintainer:** @eyurtsev - **Twitter handle:** --------- Co-authored-by: Harrison Chase --- .../agents/agent_toolkits/openapi/planner.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libs/langchain/langchain/agents/agent_toolkits/openapi/planner.py b/libs/langchain/langchain/agents/agent_toolkits/openapi/planner.py index 530418e28841a..754201f5bdc1e 100644 --- a/libs/langchain/langchain/agents/agent_toolkits/openapi/planner.py +++ b/libs/langchain/langchain/agents/agent_toolkits/openapi/planner.py @@ -36,6 +36,7 @@ from langchain.chains.llm import LLMChain from langchain.llms.openai import OpenAI from langchain.memory import ReadOnlySharedMemory +from langchain.output_parsers.json import parse_json_markdown from langchain.tools.base import BaseTool from langchain.tools.requests.tool import BaseRequestsTool from langchain.utilities.requests import RequestsWrapper @@ -80,7 +81,7 @@ class RequestsGetToolWithParsing(BaseRequestsTool, BaseTool): def _run(self, text: str) -> str: try: - data = json.loads(text) + data = parse_json_markdown(text) except json.JSONDecodeError as e: raise e data_params = data.get("params") @@ -110,7 +111,7 @@ class RequestsPostToolWithParsing(BaseRequestsTool, BaseTool): def _run(self, text: str) -> str: try: - data = json.loads(text) + data = parse_json_markdown(text) except json.JSONDecodeError as e: raise e response = self.requests_wrapper.post(data["url"], data["data"]) @@ -139,7 +140,7 @@ class RequestsPatchToolWithParsing(BaseRequestsTool, BaseTool): def _run(self, text: str) -> str: try: - data = json.loads(text) + data = parse_json_markdown(text) except json.JSONDecodeError as e: raise e response = self.requests_wrapper.patch(data["url"], data["data"]) @@ -168,7 +169,7 @@ class RequestsPutToolWithParsing(BaseRequestsTool, BaseTool): def _run(self, text: str) -> str: try: - data = json.loads(text) + data = parse_json_markdown(text) except json.JSONDecodeError as e: raise e response = self.requests_wrapper.put(data["url"], data["data"]) @@ -198,7 +199,7 @@ class RequestsDeleteToolWithParsing(BaseRequestsTool, BaseTool): def _run(self, text: str) -> str: try: - data = json.loads(text) + data = parse_json_markdown(text) except json.JSONDecodeError as e: raise e response = self.requests_wrapper.delete(data["url"]) From 74c7b799ef16a9987c5d13200b16f176b9d16ae1 Mon Sep 17 00:00:00 2001 From: Max Weng Date: Tue, 5 Dec 2023 12:27:54 +0800 Subject: [PATCH 08/63] migrate openai audio api (#13557) for issue https://github.com/langchain-ai/langchain/issues/13162 migrate openai audio api, as [openai v1.0.0 Migration Guide](https://github.com/openai/openai-python/discussions/742) --------- Co-authored-by: Double Max --- .../document_loaders/parsers/audio.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/parsers/audio.py b/libs/langchain/langchain/document_loaders/parsers/audio.py index 073eb82ea0883..737cfd147a3bc 100644 --- a/libs/langchain/langchain/document_loaders/parsers/audio.py +++ b/libs/langchain/langchain/document_loaders/parsers/audio.py @@ -6,6 +6,7 @@ from langchain.document_loaders.base import BaseBlobParser from langchain.document_loaders.blob_loaders import Blob +from langchain.utils.openai import is_openai_v1 logger = logging.getLogger(__name__) @@ -36,9 +37,13 @@ def lazy_parse(self, blob: Blob) -> Iterator[Document]: "pydub package not found, please install it with " "`pip install pydub`" ) - # Set the API key if provided - if self.api_key: - openai.api_key = self.api_key + if is_openai_v1(): + # api_key optional, defaults to `os.environ['OPENAI_API_KEY']` + client = openai.OpenAI(api_key=self.api_key) + else: + # Set the API key if provided + if self.api_key: + openai.api_key = self.api_key # Audio file from disk audio = AudioSegment.from_file(blob.path) @@ -63,7 +68,12 @@ def lazy_parse(self, blob: Blob) -> Iterator[Document]: attempts = 0 while attempts < 3: try: - transcript = openai.Audio.transcribe("whisper-1", file_obj) + if is_openai_v1(): + transcript = client.audio.transcriptions.create( + model="whisper-1", file=file_obj + ) + else: + transcript = openai.Audio.transcribe("whisper-1", file_obj) break except Exception as e: attempts += 1 From 5cb3393e20d9b95f93e37289933146514e5c880a Mon Sep 17 00:00:00 2001 From: price-deshaw <137956190+price-deshaw@users.noreply.github.com> Date: Mon, 4 Dec 2023 23:28:13 -0500 Subject: [PATCH 09/63] update OpenAI function agents' llm validation (#13538) - **Description:** This PR modifies the LLM validation in OpenAI function agents to check whether the LLM supports OpenAI functions based on a property (`supports_oia_functions`) instead of whether the LLM passed to the agent `isinstance` of `ChatOpenAI`. This allows classes that extend `BaseChatModel` to be passed to these agents as long as they've been integrated with the OpenAI APIs and have this property set, even if they don't extend `ChatOpenAI`. - **Issue:** N/A - **Dependencies:** none --- .../conversational_retrieval/openai_functions.py | 3 --- .../langchain/agents/openai_functions_agent/base.py | 9 --------- .../agents/openai_functions_multi_agent/base.py | 7 ------- 3 files changed, 19 deletions(-) diff --git a/libs/langchain/langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py b/libs/langchain/langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py index 105a6047a14f9..f577124d2e2cd 100644 --- a/libs/langchain/langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +++ b/libs/langchain/langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py @@ -10,7 +10,6 @@ AgentTokenBufferMemory, ) from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent -from langchain.chat_models.openai import ChatOpenAI from langchain.memory.token_buffer import ConversationTokenBufferMemory from langchain.tools.base import BaseTool @@ -57,8 +56,6 @@ def create_conversational_retrieval_agent( An agent executor initialized appropriately """ - if not isinstance(llm, ChatOpenAI): - raise ValueError("Only supported with ChatOpenAI models.") if remember_intermediate_steps: memory: BaseMemory = AgentTokenBufferMemory( memory_key=memory_key, llm=llm, max_token_limit=max_token_limit diff --git a/libs/langchain/langchain/agents/openai_functions_agent/base.py b/libs/langchain/langchain/agents/openai_functions_agent/base.py index 1af9ff39ac0d6..1238c7d4912bf 100644 --- a/libs/langchain/langchain/agents/openai_functions_agent/base.py +++ b/libs/langchain/langchain/agents/openai_functions_agent/base.py @@ -25,7 +25,6 @@ ) from langchain.callbacks.base import BaseCallbackManager from langchain.callbacks.manager import Callbacks -from langchain.chat_models.openai import ChatOpenAI from langchain.tools.base import BaseTool from langchain.tools.render import format_tool_to_openai_function @@ -50,12 +49,6 @@ def get_allowed_tools(self) -> List[str]: """Get allowed tools.""" return [t.name for t in self.tools] - @root_validator - def validate_llm(cls, values: dict) -> dict: - if not isinstance(values["llm"], ChatOpenAI): - raise ValueError("Only supported with ChatOpenAI models.") - return values - @root_validator def validate_prompt(cls, values: dict) -> dict: prompt: BasePromptTemplate = values["prompt"] @@ -222,8 +215,6 @@ def from_llm_and_tools( **kwargs: Any, ) -> BaseSingleActionAgent: """Construct an agent from an LLM and tools.""" - if not isinstance(llm, ChatOpenAI): - raise ValueError("Only supported with ChatOpenAI models.") prompt = cls.create_prompt( extra_prompt_messages=extra_prompt_messages, system_message=system_message, diff --git a/libs/langchain/langchain/agents/openai_functions_multi_agent/base.py b/libs/langchain/langchain/agents/openai_functions_multi_agent/base.py index 8b8d1da9f095b..d25944863757d 100644 --- a/libs/langchain/langchain/agents/openai_functions_multi_agent/base.py +++ b/libs/langchain/langchain/agents/openai_functions_multi_agent/base.py @@ -26,7 +26,6 @@ ) from langchain.callbacks.base import BaseCallbackManager from langchain.callbacks.manager import Callbacks -from langchain.chat_models.openai import ChatOpenAI from langchain.tools import BaseTool # For backwards compatibility @@ -109,12 +108,6 @@ def get_allowed_tools(self) -> List[str]: """Get allowed tools.""" return [t.name for t in self.tools] - @root_validator - def validate_llm(cls, values: dict) -> dict: - if not isinstance(values["llm"], ChatOpenAI): - raise ValueError("Only supported with ChatOpenAI models.") - return values - @root_validator def validate_prompt(cls, values: dict) -> dict: prompt: BasePromptTemplate = values["prompt"] From ea0afd07ca1ef7715c0df606efe12ae9f9ad7e62 Mon Sep 17 00:00:00 2001 From: guillaumedelande <115003496+guillaumedelande@users.noreply.github.com> Date: Tue, 5 Dec 2023 05:29:20 +0100 Subject: [PATCH 10/63] Update azuresearch.py following recent change from azure-search-documents library (#13472) - **Description:** Reference library azure-search-documents has been adapted in version 11.4.0: 1. Notebook explaining Azure AI Search updated with most recent info 2. HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration 3. PrioritizedFields(prioritized_content_fields) --> SemanticPrioritizedFields(content_fields) 4. SemanticSettings --> SemanticSearch 5. VectorSearch(algorithm_configurations) --> VectorSearch(configurations) --> Changes now reflected on Langchain: default vector search config from langchain is now compatible with officially released library from Azure. - **Issue:** Issue creating a new index (due to wrong class used for default vector search configuration) if using latest version of azure-search-documents with current langchain version - **Dependencies:** azure-search-documents>=11.4.0, - **Tag maintainer:** , --------- Co-authored-by: Erick Friis --- .../vectorstores/azuresearch.ipynb | 11 +- .../langchain/vectorstores/azuresearch.py | 120 +++++++++++++----- 2 files changed, 93 insertions(+), 38 deletions(-) diff --git a/docs/docs/integrations/vectorstores/azuresearch.ipynb b/docs/docs/integrations/vectorstores/azuresearch.ipynb index 1ffb30e5c7287..710e5b3fcb862 100644 --- a/docs/docs/integrations/vectorstores/azuresearch.ipynb +++ b/docs/docs/integrations/vectorstores/azuresearch.ipynb @@ -6,18 +6,17 @@ "collapsed": false }, "source": [ - "# Azure Cognitive Search\n", + "# Azure AI Search\n", "\n", - "[Azure Cognitive Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n", - "\n", - "Vector search is currently in public preview. It's available through the Azure portal, preview REST API and beta client libraries. [More info](https://learn.microsoft.com/en-us/azure/search/vector-search-overview) Beta client libraries are subject to potential breaking changes, please be sure to use the SDK package version identified below. azure-search-documents==11.4.0b8" + "[Azure AI Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search` and `Azure Cognitive Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n", + "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Install Azure Cognitive Search SDK" + "# Install Azure AI Search SDK" ] }, { @@ -26,7 +25,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install azure-search-documents==11.4.0b8\n", + "!pip install azure-search-documents\n", "!pip install azure-identity" ] }, diff --git a/libs/langchain/langchain/vectorstores/azuresearch.py b/libs/langchain/langchain/vectorstores/azuresearch.py index 2cd1f9546de72..b818dc71fd59a 100644 --- a/libs/langchain/langchain/vectorstores/azuresearch.py +++ b/libs/langchain/langchain/vectorstores/azuresearch.py @@ -14,6 +14,7 @@ Optional, Tuple, Type, + Union, ) import numpy as np @@ -36,10 +37,13 @@ from azure.search.documents.indexes.models import ( ScoringProfile, SearchField, - SemanticSettings, VectorSearch, ) + try: + from azure.search.documents.indexes.models import SemanticSearch + except ImportError: + from azure.search.documents.indexes.models import SemanticSettings # <11.4.0 # Allow overriding field names for Azure Search FIELDS_ID = get_from_env( @@ -69,7 +73,7 @@ def _get_search_client( semantic_configuration_name: Optional[str] = None, fields: Optional[List[SearchField]] = None, vector_search: Optional[VectorSearch] = None, - semantic_settings: Optional[SemanticSettings] = None, + semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None, scoring_profiles: Optional[List[ScoringProfile]] = None, default_scoring_profile: Optional[str] = None, default_fields: Optional[List[SearchField]] = None, @@ -81,15 +85,30 @@ def _get_search_client( from azure.search.documents import SearchClient from azure.search.documents.indexes import SearchIndexClient from azure.search.documents.indexes.models import ( - HnswVectorSearchAlgorithmConfiguration, - PrioritizedFields, SearchIndex, SemanticConfiguration, SemanticField, - SemanticSettings, VectorSearch, ) + # class names changed for versions >= 11.4.0 + try: + from azure.search.documents.indexes.models import ( + HnswAlgorithmConfiguration, # HnswVectorSearchAlgorithmConfiguration is old + SemanticPrioritizedFields, # PrioritizedFields outdated + SemanticSearch, # SemanticSettings outdated + ) + + NEW_VERSION = True + except ImportError: + from azure.search.documents.indexes.models import ( + HnswVectorSearchAlgorithmConfiguration, + PrioritizedFields, + SemanticSettings, + ) + + NEW_VERSION = False + default_fields = default_fields or [] if key is None: credential = DefaultAzureCredential() @@ -135,34 +154,71 @@ def fmt_err(x: str) -> str: fields = default_fields # Vector search configuration if vector_search is None: - vector_search = VectorSearch( - algorithm_configurations=[ - HnswVectorSearchAlgorithmConfiguration( - name="default", - kind="hnsw", - parameters={ # type: ignore - "m": 4, - "efConstruction": 400, - "efSearch": 500, - "metric": "cosine", - }, - ) - ] - ) + if NEW_VERSION: + # >= 11.4.0: + # VectorSearch(algorithm_configuration) --> VectorSearch(algorithms) + # HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration + vector_search = VectorSearch( + algorithms=[ + HnswAlgorithmConfiguration( + name="default", + kind="hnsw", + parameters={ # type: ignore + "m": 4, + "efConstruction": 400, + "efSearch": 500, + "metric": "cosine", + }, + ) + ] + ) + else: # < 11.4.0 + vector_search = VectorSearch( + algorithm_configurations=[ + HnswVectorSearchAlgorithmConfiguration( + name="default", + kind="hnsw", + parameters={ # type: ignore + "m": 4, + "efConstruction": 400, + "efSearch": 500, + "metric": "cosine", + }, + ) + ] + ) + # Create the semantic settings with the configuration if semantic_settings is None and semantic_configuration_name is not None: - semantic_settings = SemanticSettings( - configurations=[ - SemanticConfiguration( - name=semantic_configuration_name, - prioritized_fields=PrioritizedFields( - prioritized_content_fields=[ - SemanticField(field_name=FIELDS_CONTENT) - ], - ), - ) - ] - ) + if NEW_VERSION: + # <=11.4.0: SemanticSettings --> SemanticSearch + # PrioritizedFields(prioritized_content_fields) + # --> SemanticPrioritizedFields(content_fields) + semantic_settings = SemanticSearch( + configurations=[ + SemanticConfiguration( + name=semantic_configuration_name, + prioritized_fields=SemanticPrioritizedFields( + content_fields=[ + SemanticField(field_name=FIELDS_CONTENT) + ], + ), + ) + ] + ) + else: # < 11.4.0 + semantic_settings = SemanticSettings( + configurations=[ + SemanticConfiguration( + name=semantic_configuration_name, + prioritized_fields=PrioritizedFields( + prioritized_content_fields=[ + SemanticField(field_name=FIELDS_CONTENT) + ], + ), + ) + ] + ) # Create the search index with the semantic settings and vector search index = SearchIndex( name=index_name, @@ -196,7 +252,7 @@ def __init__( semantic_query_language: str = "en-us", fields: Optional[List[SearchField]] = None, vector_search: Optional[VectorSearch] = None, - semantic_settings: Optional[SemanticSettings] = None, + semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None, scoring_profiles: Optional[List[ScoringProfile]] = None, default_scoring_profile: Optional[str] = None, **kwargs: Any, From e0c03d6c44f42c3704a8ab28b45d4a424db9899b Mon Sep 17 00:00:00 2001 From: Philippe PRADOS Date: Tue, 5 Dec 2023 05:31:21 +0100 Subject: [PATCH 11/63] Pprados/lite google drive (#13175) - Fix bug in the document - Add clarification on the use of langchain-google drive. --- .../document_loaders/google_drive.ipynb | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/docs/docs/integrations/document_loaders/google_drive.ipynb b/docs/docs/integrations/document_loaders/google_drive.ipynb index 2e3067e584344..1bfab2c8f295d 100644 --- a/docs/docs/integrations/document_loaders/google_drive.ipynb +++ b/docs/docs/integrations/document_loaders/google_drive.ipynb @@ -217,7 +217,7 @@ "It's compatible with the ̀`langchain.document_loaders.GoogleDriveLoader` and can be used\n", "in its place.\n", "\n", - "To be compatible with containers, the authentication uses an environment variable ̀GOOGLE_ACCOUNT_FILE` to credential file (for user or service)." + "To be compatible with containers, the authentication uses an environment variable `̀GOOGLE_ACCOUNT_FILE` to credential file (for user or service)." ] }, { @@ -331,6 +331,7 @@ "Some pre-formated request are proposed (use `{query}`, `{folder_id}` and/or `{mime_type}`):\n", "\n", "You can customize the criteria to select the files. A set of predefined filter are proposed:\n", + "\n", "| template | description |\n", "| -------------------------------------- | --------------------------------------------------------------------- |\n", "| gdrive-all-in-folder | Return all compatible files from a `folder_id` |\n", @@ -401,6 +402,14 @@ "id": "375bb465-8f69-407b-94bd-ffa3718ef500", "metadata": {}, "source": [ + "The conversion can manage in Markdown format:\n", + "- bullet\n", + "- link\n", + "- table\n", + "- titles\n", + "\n", + "Set the attribut `return_link` to `True` to export links.\n", + "\n", "#### Modes for GSlide and GSheet\n", "The parameter mode accepts different values:\n", "\n", @@ -408,12 +417,6 @@ "- \"snippets\": return the description of each file (set in metadata of Google Drive files).\n", "\n", "\n", - "The conversion can manage in Markdown format:\n", - "- bullet\n", - "- link\n", - "- table\n", - "- titles\n", - "\n", "The parameter `gslide_mode` accepts different values:\n", "\n", "- \"single\" : one document with <PAGE BREAK>\n", @@ -503,14 +506,6 @@ " print(\"---\")\n", " print(doc.page_content.strip()[:60] + \"...\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "51efa73a-4e2d-4f9c-abaf-6c9bde2ff69d", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From dcccf8fa6635fa04622a9d08437d023081422bfc Mon Sep 17 00:00:00 2001 From: Joan Fontanals Date: Tue, 5 Dec 2023 05:40:33 +0100 Subject: [PATCH 12/63] adapt Jina Embeddings to new Jina AI Embedding API (#13658) - **Description:** Adapt JinaEmbeddings to run with the new Jina AI Embedding platform - **Twitter handle:** https://twitter.com/JinaAI_ --------- Co-authored-by: Joan Fontanals Martinez Co-authored-by: Harrison Chase --- docs/docs/integrations/providers/jina.mdx | 69 ++----------- .../integrations/text_embedding/jina.ipynb | 24 +++-- libs/langchain/langchain/embeddings/jina.py | 98 +++++++------------ 3 files changed, 58 insertions(+), 133 deletions(-) diff --git a/docs/docs/integrations/providers/jina.mdx b/docs/docs/integrations/providers/jina.mdx index 181b57a4abf99..a3900c446df86 100644 --- a/docs/docs/integrations/providers/jina.mdx +++ b/docs/docs/integrations/providers/jina.mdx @@ -1,75 +1,20 @@ # Jina -This page covers how to use the Jina ecosystem within LangChain. +This page covers how to use the Jina Embeddings within LangChain. It is broken into two parts: installation and setup, and then references to specific Jina wrappers. ## Installation and Setup -- Install the Python SDK with `pip install jina` -- Get a Jina AI Cloud auth token from [here](https://cloud.jina.ai/settings/tokens) and set it as an environment variable (`JINA_AUTH_TOKEN`) - -## Wrappers - -### Embeddings +- Get a Jina AI API token from [here](https://jina.ai/embeddings/) and set it as an environment variable (`JINA_API_TOKEN`) There exists a Jina Embeddings wrapper, which you can access with -```python -from langchain.embeddings import JinaEmbeddings -``` -For a more detailed walkthrough of this, see [this notebook](/docs/integrations/text_embedding/jina) - -## Deployment - -[Langchain-serve](https://github.com/jina-ai/langchain-serve), powered by Jina, helps take LangChain apps to production with easy to use REST/WebSocket APIs and Slack bots. - -### Usage - -Install the package from PyPI. - -```bash -pip install langchain-serve -``` - -Wrap your LangChain app with the `@serving` decorator. ```python -# app.py -from lcserve import serving - -@serving -def ask(input: str) -> str: - from langchain.chains import LLMChain - from langchain.llms import OpenAI - from langchain.agents import AgentExecutor, ZeroShotAgent - - tools = [...] # list of tools - prompt = ZeroShotAgent.create_prompt( - tools, input_variables=["input", "agent_scratchpad"], - ) - llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt) - agent = ZeroShotAgent( - llm_chain=llm_chain, allowed_tools=[tool.name for tool in tools] - ) - agent_executor = AgentExecutor.from_agent_and_tools( - agent=agent, - tools=tools, - verbose=True, - ) - return agent_executor.run(input) -``` - -Deploy on Jina AI Cloud with `lc-serve deploy jcloud app`. Once deployed, we can send a POST request to the API endpoint to get a response. +from langchain.embeddings import JinaEmbeddings -```bash -curl -X 'POST' 'https://.wolf.jina.ai/ask' \ - -d '{ - "input": "Your Question here?", - "envs": { - "OPENAI_API_KEY": "sk-***" - } -}' +# you can pas jina_api_key, if none is passed it will be taken from `JINA_API_TOKEN` environment variable +embeddings = JinaEmbeddings(jina_api_key='jina_**', model_name='jina-embeddings-v2-base-en') ``` -You can also self-host the app on your infrastructure with Docker-compose or Kubernetes. See [here](https://github.com/jina-ai/langchain-serve#-self-host-llm-apps-with-docker-compose-or-kubernetes) for more details. - +You can check the list of available models from [here](https://jina.ai/embeddings/) -Langchain-serve also allows to deploy the apps with WebSocket APIs and Slack Bots both on [Jina AI Cloud](https://cloud.jina.ai/) or self-hosted infrastructure. +For a more detailed walkthrough of this, see [this notebook](/docs/integrations/text_embedding/jina.ipynb) diff --git a/docs/docs/integrations/text_embedding/jina.ipynb b/docs/docs/integrations/text_embedding/jina.ipynb index cba95327425bf..7eb75678764f1 100644 --- a/docs/docs/integrations/text_embedding/jina.ipynb +++ b/docs/docs/integrations/text_embedding/jina.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "d94c62b4", "metadata": {}, "outputs": [], @@ -28,7 +28,7 @@ "outputs": [], "source": [ "embeddings = JinaEmbeddings(\n", - " jina_auth_token=jina_auth_token, model_name=\"ViT-B-32::openai\"\n", + " jina_api_key=\"jina_*\", model_name=\"jina-embeddings-v2-base-en\"\n", ")" ] }, @@ -55,28 +55,32 @@ { "cell_type": "code", "execution_count": null, - "id": "b790fd09", + "id": "aea3ca33-1e6e-499c-8284-b7e26f38c514", "metadata": {}, "outputs": [], "source": [ - "doc_result = embeddings.embed_documents([text])" + "print(query_result)" ] }, { - "cell_type": "markdown", - "id": "6f3607a0", + "cell_type": "code", + "execution_count": null, + "id": "b790fd09", "metadata": {}, + "outputs": [], "source": [ - "In the above example, `ViT-B-32::openai`, OpenAI's pretrained `ViT-B-32` model is used. For a full list of models, see [here](https://cloud.jina.ai/user/inference/model/63dca9df5a0da83009d519cd)." + "doc_result = embeddings.embed_documents([text])" ] }, { "cell_type": "code", "execution_count": null, - "id": "cd5f148e", + "id": "c2e6b743-768c-4d7e-a331-27d5f0e8e30e", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "print(doc_result)" + ] } ], "metadata": { @@ -95,7 +99,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.10.11" } }, "nbformat": 4, diff --git a/libs/langchain/langchain/embeddings/jina.py b/libs/langchain/langchain/embeddings/jina.py index a781f908f2d38..ebf941870fbd1 100644 --- a/libs/langchain/langchain/embeddings/jina.py +++ b/libs/langchain/langchain/embeddings/jina.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict, List, Optional import requests @@ -7,69 +6,54 @@ from langchain.utils import get_from_dict_or_env +JINA_API_URL: str = "https://api.jina.ai/v1/embeddings" + class JinaEmbeddings(BaseModel, Embeddings): """Jina embedding models.""" - client: Any #: :meta private: - - model_name: str = "ViT-B-32::openai" - """Model name to use.""" - - jina_auth_token: Optional[str] = None - jina_api_url: str = "https://api.clip.jina.ai/api/v1/models/" - request_headers: Optional[dict] = None + session: Any #: :meta private: + model_name: str = "jina-embeddings-v2-base-en" + jina_api_key: Optional[str] = None @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that auth token exists in environment.""" - # Set Auth - jina_auth_token = get_from_dict_or_env( - values, "jina_auth_token", "JINA_AUTH_TOKEN" - ) - values["jina_auth_token"] = jina_auth_token - values["request_headers"] = (("authorization", jina_auth_token),) - - # Test that package is installed try: - import jina - except ImportError: - raise ImportError( - "Could not import `jina` python package. " - "Please install it with `pip install jina`." - ) + jina_api_key = get_from_dict_or_env(values, "jina_api_key", "JINA_API_KEY") + except ValueError as original_exc: + try: + jina_api_key = get_from_dict_or_env( + values, "jina_auth_token", "JINA_AUTH_TOKEN" + ) + except ValueError: + raise original_exc + session = requests.Session() + session.headers.update( + { + "Authorization": f"Bearer {jina_api_key}", + "Accept-Encoding": "identity", + "Content-type": "application/json", + } + ) + values["session"] = session + return values - # Setup client - jina_api_url = os.environ.get("JINA_API_URL", values["jina_api_url"]) - model_name = values["model_name"] - try: - resp = requests.get( - jina_api_url + f"?model_name={model_name}", - headers={"Authorization": jina_auth_token}, - ) + def _embed(self, texts: List[str]) -> List[List[float]]: + # Call Jina AI Embedding API + resp = self.session.post( # type: ignore + JINA_API_URL, json={"input": texts, "model": self.model_name} + ).json() + if "data" not in resp: + raise RuntimeError(resp["detail"]) - if resp.status_code == 401: - raise ValueError( - "The given Jina auth token is invalid. " - "Please check your Jina auth token." - ) - elif resp.status_code == 404: - raise ValueError( - f"The given model name `{model_name}` is not valid. " - f"Please go to https://cloud.jina.ai/user/inference " - f"and create a model with the given model name." - ) - resp.raise_for_status() + embeddings = resp["data"] - endpoint = resp.json()["endpoints"]["grpc"] - values["client"] = jina.Client(host=endpoint) - except requests.exceptions.HTTPError as err: - raise ValueError(f"Error: {err!r}") - return values + # Sort resulting embeddings by index + sorted_embeddings = sorted(embeddings, key=lambda e: e["index"]) # type: ignore - def _post(self, docs: List[Any], **kwargs: Any) -> Any: - payload = dict(inputs=docs, metadata=self.request_headers, **kwargs) - return self.client.post(on="/encode", **payload) + # Return just the embeddings + return [result["embedding"] for result in sorted_embeddings] def embed_documents(self, texts: List[str]) -> List[List[float]]: """Call out to Jina's embedding endpoint. @@ -78,12 +62,7 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]: Returns: List of embeddings, one for each text. """ - from docarray import Document, DocumentArray - - embeddings = self._post( - docs=DocumentArray([Document(text=t) for t in texts]) - ).embeddings - return [list(map(float, e)) for e in embeddings] + return self._embed(texts) def embed_query(self, text: str) -> List[float]: """Call out to Jina's embedding endpoint. @@ -92,7 +71,4 @@ def embed_query(self, text: str) -> List[float]: Returns: Embeddings for the text. """ - from docarray import Document, DocumentArray - - embedding = self._post(docs=DocumentArray([Document(text=text)])).embeddings[0] - return list(map(float, embedding)) + return self._embed([text])[0] From 77a15fa9888a3e81a014895a6ec3f1b34c016d06 Mon Sep 17 00:00:00 2001 From: Sean Bearden <72461227+seanbearden@users.noreply.github.com> Date: Mon, 4 Dec 2023 20:48:09 -0800 Subject: [PATCH 13/63] Added ability to pass arguments to the Playwright browser (#13146) - **Description:** Enhanced `create_sync_playwright_browser` and `create_async_playwright_browser` functions to accept a list of arguments. These arguments are now forwarded to `browser.chromium.launch()` for customizable browser instantiation. - **Issue:** #13143 - **Dependencies:** None - **Tag maintainer:** @eyurtsev, - **Twitter handle:** Dr_Bearden --------- Co-authored-by: Harrison Chase --- .../langchain/tools/playwright/utils.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/libs/langchain/langchain/tools/playwright/utils.py b/libs/langchain/langchain/tools/playwright/utils.py index eb874f2eb4d65..692288fdde318 100644 --- a/libs/langchain/langchain/tools/playwright/utils.py +++ b/libs/langchain/langchain/tools/playwright/utils.py @@ -2,7 +2,7 @@ from __future__ import annotations import asyncio -from typing import TYPE_CHECKING, Any, Coroutine, TypeVar +from typing import TYPE_CHECKING, Any, Coroutine, List, Optional, TypeVar if TYPE_CHECKING: from playwright.async_api import Browser as AsyncBrowser @@ -50,12 +50,15 @@ def get_current_page(browser: SyncBrowser) -> SyncPage: return context.pages[-1] -def create_async_playwright_browser(headless: bool = True) -> AsyncBrowser: +def create_async_playwright_browser( + headless: bool = True, args: Optional[List[str]] = None +) -> AsyncBrowser: """ Create an async playwright browser. Args: headless: Whether to run the browser in headless mode. Defaults to True. + args: arguments to pass to browser.chromium.launch Returns: AsyncBrowser: The playwright browser. @@ -63,15 +66,18 @@ def create_async_playwright_browser(headless: bool = True) -> AsyncBrowser: from playwright.async_api import async_playwright browser = run_async(async_playwright().start()) - return run_async(browser.chromium.launch(headless=headless)) + return run_async(browser.chromium.launch(headless=headless, args=args)) -def create_sync_playwright_browser(headless: bool = True) -> SyncBrowser: +def create_sync_playwright_browser( + headless: bool = True, args: Optional[List[str]] = None +) -> SyncBrowser: """ Create a playwright browser. Args: headless: Whether to run the browser in headless mode. Defaults to True. + args: arguments to pass to browser.chromium.launch Returns: SyncBrowser: The playwright browser. @@ -79,7 +85,7 @@ def create_sync_playwright_browser(headless: bool = True) -> SyncBrowser: from playwright.sync_api import sync_playwright browser = sync_playwright().start() - return browser.chromium.launch(headless=headless) + return browser.chromium.launch(headless=headless, args=args) T = TypeVar("T") From f758c8adc43ebbbdb3a13caa5a022a2d043229cc Mon Sep 17 00:00:00 2001 From: Eun Hye Kim Date: Tue, 5 Dec 2023 13:54:08 +0900 Subject: [PATCH 14/63] Fix #11737 issue (extra_tools option of create_pandas_dataframe_agent is not working) (#13203) - **Description:** Fix #11737 issue (extra_tools option of create_pandas_dataframe_agent is not working), - **Issue:** #11737 , - **Dependencies:** no, - **Tag maintainer:** @baskaryan, @eyurtsev, @hwchase17 I needed this method at work, so I modified it myself and used it. There is a similar issue(#11737) and PR(#13018) of @PyroGenesis, so I combined my code at the original PR. You may be busy, but it would be great help for me if you checked. Thank you. - **Twitter handle:** @lunara_x If you need an .ipynb example about this, please tag me. I will share what I am working on after removing any work-related content. --------- Co-authored-by: Harrison Chase --- .../agents/agent_toolkits/pandas/base.py | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/base.py b/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/base.py index ef5e1eae8a566..cc5205a3dcc83 100644 --- a/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/base.py +++ b/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/base.py @@ -33,7 +33,8 @@ def _get_multi_prompt( input_variables: Optional[List[str]] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, -) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]: + extra_tools: Sequence[BaseTool] = (), +) -> Tuple[BasePromptTemplate, List[BaseTool]]: num_dfs = len(dfs) if suffix is not None: suffix_to_use = suffix @@ -55,12 +56,13 @@ def _get_multi_prompt( df_locals = {} for i, dataframe in enumerate(dfs): df_locals[f"df{i + 1}"] = dataframe - tools = [PythonAstREPLTool(locals=df_locals)] - + tools = [PythonAstREPLTool(locals=df_locals)] + list(extra_tools) prompt = ZeroShotAgent.create_prompt( - tools, prefix=prefix, suffix=suffix_to_use, input_variables=input_variables + tools, + prefix=prefix, + suffix=suffix_to_use, + input_variables=input_variables, ) - partial_prompt = prompt.partial() if "dfs_head" in input_variables: dfs_head = "\n\n".join([d.head(number_of_head_rows).to_markdown() for d in dfs]) @@ -77,7 +79,8 @@ def _get_single_prompt( input_variables: Optional[List[str]] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, -) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]: + extra_tools: Sequence[BaseTool] = (), +) -> Tuple[BasePromptTemplate, List[BaseTool]]: if suffix is not None: suffix_to_use = suffix include_df_head = True @@ -96,10 +99,13 @@ def _get_single_prompt( if prefix is None: prefix = PREFIX - tools = [PythonAstREPLTool(locals={"df": df})] + tools = [PythonAstREPLTool(locals={"df": df})] + list(extra_tools) prompt = ZeroShotAgent.create_prompt( - tools, prefix=prefix, suffix=suffix_to_use, input_variables=input_variables + tools, + prefix=prefix, + suffix=suffix_to_use, + input_variables=input_variables, ) partial_prompt = prompt.partial() @@ -117,7 +123,8 @@ def _get_prompt_and_tools( input_variables: Optional[List[str]] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, -) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]: + extra_tools: Sequence[BaseTool] = (), +) -> Tuple[BasePromptTemplate, List[BaseTool]]: try: import pandas as pd @@ -141,6 +148,7 @@ def _get_prompt_and_tools( input_variables=input_variables, include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, + extra_tools=extra_tools, ) else: if not isinstance(df, pd.DataFrame): @@ -152,6 +160,7 @@ def _get_prompt_and_tools( input_variables=input_variables, include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, + extra_tools=extra_tools, ) @@ -287,6 +296,7 @@ def create_pandas_dataframe_agent( ) -> AgentExecutor: """Construct a pandas agent from an LLM and dataframe.""" agent: BaseSingleActionAgent + base_tools: Sequence[BaseTool] if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION: prompt, base_tools = _get_prompt_and_tools( df, @@ -295,8 +305,9 @@ def create_pandas_dataframe_agent( input_variables=input_variables, include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, + extra_tools=extra_tools, ) - tools = base_tools + list(extra_tools) + tools = base_tools llm_chain = LLMChain( llm=llm, prompt=prompt, @@ -318,7 +329,7 @@ def create_pandas_dataframe_agent( include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, ) - tools = base_tools + list(extra_tools) + tools = list(base_tools) + list(extra_tools) agent = OpenAIFunctionsAgent( llm=llm, prompt=_prompt, From 7ad75edf8bdfafeeeb27695f3c5aae7053428c5b Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Tue, 5 Dec 2023 12:38:04 -0500 Subject: [PATCH 15/63] Fix rag google cloud vertex ai template (#14300) Fix template by exposing chain correctly --- .../rag_google_cloud_vertexai_search/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/templates/rag-google-cloud-vertexai-search/rag_google_cloud_vertexai_search/__init__.py b/templates/rag-google-cloud-vertexai-search/rag_google_cloud_vertexai_search/__init__.py index e69de29bb2d1d..7b31128b4336e 100644 --- a/templates/rag-google-cloud-vertexai-search/rag_google_cloud_vertexai_search/__init__.py +++ b/templates/rag-google-cloud-vertexai-search/rag_google_cloud_vertexai_search/__init__.py @@ -0,0 +1,3 @@ +from rag_google_cloud_vertexai_search.chain import chain + +__all__ = ["chain"] From 4a5a13aab3953ece0b4ec7748f88c07d6236a445 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Tue, 5 Dec 2023 10:20:57 -0800 Subject: [PATCH 16/63] core[patch]: Release 0.0.10 (#14303) --- libs/core/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index 344c36a22b874..ff42a1f572b95 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-core" -version = "0.0.9" +version = "0.0.10" description = "Building applications with LLMs through composability" authors = [] license = "MIT" From b2e756c0a83396acbdc26c96007401843c698d40 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Tue, 5 Dec 2023 11:38:52 -0800 Subject: [PATCH 17/63] langchain[patch]: Release 0.0.346 (#14307) --- libs/langchain/poetry.lock | 5 +++-- libs/langchain/pyproject.toml | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index 8622ddc080d85..2cea6b5ddd801 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -3936,6 +3936,7 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -4285,7 +4286,7 @@ tests = ["pandas (>=1.4)", "pytest", "pytest-asyncio", "pytest-mock"] [[package]] name = "langchain-core" -version = "0.0.9" +version = "0.0.10" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -11518,4 +11519,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "f4791327aca4bf3db1b46731d987347b537e638a1be85b2a6a771e52f95d3f29" +content-hash = "ffccc36a82a8a31fb7b1e3a4d9a024093dfaf25b6115a7c8a1fcbce9d1bb726b" diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index 7377756009ab5..fd27a8f4e8c1b 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain" -version = "0.0.345" +version = "0.0.346" description = "Building applications with LLMs through composability" authors = [] license = "MIT" @@ -12,7 +12,7 @@ langchain-server = "langchain.server:main" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.0.9,<0.1" +langchain-core = ">=0.0.10,<0.1" pydantic = ">=1,<3" SQLAlchemy = ">=1.4,<3" requests = "^2" From 80637727ea875e465046eb9535c95b63def20acc Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Tue, 5 Dec 2023 14:49:55 -0500 Subject: [PATCH 18/63] hide api key: arcee (#14304) Hide API key for Arcee --------- Co-authored-by: raphael --- libs/langchain/langchain/llms/arcee.py | 12 ++-- libs/langchain/langchain/retrievers/arcee.py | 2 +- libs/langchain/langchain/utilities/arcee.py | 21 ++++-- .../integration_tests/llms/test_arcee.py | 72 ++++++++++++++----- 4 files changed, 77 insertions(+), 30 deletions(-) diff --git a/libs/langchain/langchain/llms/arcee.py b/libs/langchain/langchain/llms/arcee.py index 04b278f676363..7e83219cdd6b7 100644 --- a/libs/langchain/langchain/llms/arcee.py +++ b/libs/langchain/langchain/llms/arcee.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, cast +from typing import Any, Dict, List, Optional, Union, cast from langchain_core.pydantic_v1 import Extra, SecretStr, root_validator @@ -30,7 +30,7 @@ class Arcee(LLM): _client: Optional[ArceeWrapper] = None #: :meta private: """Arcee _client.""" - arcee_api_key: Optional[SecretStr] = None + arcee_api_key: Union[SecretStr, str, None] = None """Arcee API Key""" model: str @@ -66,15 +66,16 @@ def __init__(self, **data: Any) -> None: """Initializes private fields.""" super().__init__(**data) + api_key = cast(SecretStr, self.arcee_api_key) self._client = ArceeWrapper( - arcee_api_key=cast(SecretStr, self.arcee_api_key), + arcee_api_key=api_key, arcee_api_url=self.arcee_api_url, arcee_api_version=self.arcee_api_version, model_kwargs=self.model_kwargs, model_name=self.model, ) - @root_validator() + @root_validator(pre=False) def validate_environments(cls, values: Dict) -> Dict: """Validate Arcee environment variables.""" @@ -106,7 +107,7 @@ def validate_environments(cls, values: Dict) -> Dict: ) # validate model kwargs - if values["model_kwargs"]: + if values.get("model_kwargs"): kw = values["model_kwargs"] # validate size @@ -120,7 +121,6 @@ def validate_environments(cls, values: Dict) -> Dict: raise ValueError("`filters` must be a list") for f in kw.get("filters"): DALMFilter(**f) - return values def _call( diff --git a/libs/langchain/langchain/retrievers/arcee.py b/libs/langchain/langchain/retrievers/arcee.py index 7d3e7b822f5b2..e360f62a03f3b 100644 --- a/libs/langchain/langchain/retrievers/arcee.py +++ b/libs/langchain/langchain/retrievers/arcee.py @@ -61,7 +61,7 @@ def __init__(self, **data: Any) -> None: super().__init__(**data) self._client = ArceeWrapper( - arcee_api_key=self.arcee_api_key, + arcee_api_key=self.arcee_api_key.get_secret_value(), arcee_api_url=self.arcee_api_url, arcee_api_version=self.arcee_api_version, model_kwargs=self.model_kwargs, diff --git a/libs/langchain/langchain/utilities/arcee.py b/libs/langchain/langchain/utilities/arcee.py index 743930b93e8d4..7217034858310 100644 --- a/libs/langchain/langchain/utilities/arcee.py +++ b/libs/langchain/langchain/utilities/arcee.py @@ -96,11 +96,14 @@ def adapt(cls, arcee_document: ArceeDocument) -> Document: class ArceeWrapper: - """Wrapper for Arcee API.""" + """Wrapper for Arcee API. + + For more details, see: https://www.arcee.ai/ + """ def __init__( self, - arcee_api_key: SecretStr, + arcee_api_key: Union[str, SecretStr], arcee_api_url: str, arcee_api_version: str, model_kwargs: Optional[Dict[str, Any]], @@ -114,9 +117,12 @@ def __init__( arcee_api_version: Version of Arcee API. model_kwargs: Keyword arguments for Arcee API. model_name: Name of an Arcee model. - """ - self.arcee_api_key = arcee_api_key + if isinstance(arcee_api_key, str): + arcee_api_key_ = SecretStr(arcee_api_key) + else: + arcee_api_key_ = arcee_api_key + self.arcee_api_key: SecretStr = arcee_api_key_ self.model_kwargs = model_kwargs self.arcee_api_url = arcee_api_url self.arcee_api_version = arcee_api_version @@ -166,8 +172,13 @@ def _make_request( def _make_request_headers(self, headers: Optional[Dict] = None) -> Dict: headers = headers or {} + if not isinstance(self.arcee_api_key, SecretStr): + raise TypeError( + f"arcee_api_key must be a SecretStr. Got {type(self.arcee_api_key)}" + ) + api_key = self.arcee_api_key.get_secret_value() internal_headers = { - "X-Token": self.arcee_api_key.get_secret_value(), + "X-Token": api_key, "Content-Type": "application/json", } headers.update(internal_headers) diff --git a/libs/langchain/tests/integration_tests/llms/test_arcee.py b/libs/langchain/tests/integration_tests/llms/test_arcee.py index a795c1909ca3d..40daec3682fb9 100644 --- a/libs/langchain/tests/integration_tests/llms/test_arcee.py +++ b/libs/langchain/tests/integration_tests/llms/test_arcee.py @@ -1,34 +1,70 @@ -"""Test Arcee llm""" +from unittest.mock import MagicMock, patch + from langchain_core.pydantic_v1 import SecretStr from pytest import CaptureFixture, MonkeyPatch from langchain.llms.arcee import Arcee -def test_api_key_is_secret_string() -> None: - llm = Arcee(model="DALM-PubMed", arcee_api_key="test-arcee-api-key") - assert isinstance(llm.arcee_api_key, SecretStr) +@patch("langchain.utilities.arcee.requests.get") +def test_arcee_api_key_is_secret_string(mock_get: MagicMock) -> None: + mock_response = mock_get.return_value + mock_response.status_code = 200 + mock_response.json.return_value = { + "model_id": "", + "status": "training_complete", + } + arcee_without_env_var = Arcee( + model="DALM-PubMed", + arcee_api_key="secret_api_key", + arcee_api_url="https://localhost", + arcee_api_version="version", + ) + assert isinstance(arcee_without_env_var.arcee_api_key, SecretStr) -def test_api_key_masked_when_passed_from_env( - monkeypatch: MonkeyPatch, capsys: CaptureFixture -) -> None: - """Test initialization with an API key provided via an env variable""" - monkeypatch.setenv("ARCEE_API_KEY", "test-arcee-api-key") - llm = Arcee(model="DALM-PubMed") +@patch("langchain.utilities.arcee.requests.get") +def test_api_key_masked_when_passed_via_constructor( + mock_get: MagicMock, capsys: CaptureFixture +) -> None: + mock_response = mock_get.return_value + mock_response.status_code = 200 + mock_response.json.return_value = { + "model_id": "", + "status": "training_complete", + } - print(llm.arcee_api_key, end="") + arcee_without_env_var = Arcee( + model="DALM-PubMed", + arcee_api_key="secret_api_key", + arcee_api_url="https://localhost", + arcee_api_version="version", + ) + print(arcee_without_env_var.arcee_api_key, end="") captured = capsys.readouterr() - assert captured.out == "**********" + assert "**********" == captured.out -def test_api_key_masked_when_passed_via_constructor( - capsys: CaptureFixture, + +@patch("langchain.utilities.arcee.requests.get") +def test_api_key_masked_when_passed_from_env( + mock_get: MagicMock, capsys: CaptureFixture, monkeypatch: MonkeyPatch ) -> None: - """Test initialization with an API key provided via the initializer""" - llm = Arcee(model="DALM-PubMed", arcee_api_key="test-arcee-api-key") + mock_response = mock_get.return_value + mock_response.status_code = 200 + mock_response.json.return_value = { + "model_id": "", + "status": "training_complete", + } - print(llm.arcee_api_key, end="") + monkeypatch.setenv("ARCEE_API_KEY", "secret_api_key") + arcee_with_env_var = Arcee( + model="DALM-PubMed", + arcee_api_url="https://localhost", + arcee_api_version="version", + ) + print(arcee_with_env_var.arcee_api_key, end="") captured = capsys.readouterr() - assert captured.out == "**********" + + assert "**********" == captured.out From 6607cc6eab42a7eacd5e7812f6821c3b598c1ca8 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Tue, 5 Dec 2023 12:11:42 -0800 Subject: [PATCH 19/63] experimental[patch]: Release 0.0.44 (#14310) --- libs/experimental/poetry.lock | 12 ++++++------ libs/experimental/pyproject.toml | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/libs/experimental/poetry.lock b/libs/experimental/poetry.lock index d4e6fa1fe964d..f9b73b96f15d5 100644 --- a/libs/experimental/poetry.lock +++ b/libs/experimental/poetry.lock @@ -1642,7 +1642,7 @@ files = [ [[package]] name = "langchain" -version = "0.0.343" +version = "0.0.346" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -1655,7 +1655,7 @@ anyio = "<4.0" async-timeout = {version = "^4.0.0", markers = "python_version < \"3.11\""} dataclasses-json = ">= 0.5.7, < 0.7" jsonpatch = "^1.33" -langchain-core = ">=0.0.7,<0.1" +langchain-core = ">=0.0.10,<0.1" langsmith = "~0.0.63" numpy = "^1" pydantic = ">=1,<3" @@ -1665,14 +1665,14 @@ SQLAlchemy = ">=1.4,<3" tenacity = "^8.1.0" [package.extras] -all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.13.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] +all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.13.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (<2)"] clarifai = ["clarifai (>=9.1.0)"] cli = ["typer (>=0.9.0,<0.10.0)"] cohere = ["cohere (>=4,<5)"] docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] embeddings = ["sentence-transformers (>=2,<3)"] -extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.6.0,<0.7.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] +extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "couchbase (>=4.1.9,<5.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.6.0,<0.7.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] javascript = ["esprima (>=4.0.1,<5.0.0)"] llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] openai = ["openai (<2)", "tiktoken (>=0.3.2,<0.6.0)"] @@ -1685,7 +1685,7 @@ url = "../langchain" [[package]] name = "langchain-core" -version = "0.0.7" +version = "0.0.10" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -4931,4 +4931,4 @@ extended-testing = ["faker", "presidio-analyzer", "presidio-anonymizer", "senten [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "5ea902253757caa8e1708b13096e13bdd16931a20c4ea0e402af5dfe1c8a30ac" +content-hash = "82bebfc5475be48f180bcb5013850eb88f451ffdc1f126a12112e10ed56f6529" diff --git a/libs/experimental/pyproject.toml b/libs/experimental/pyproject.toml index 6c601eb5d4017..88da63a64c036 100644 --- a/libs/experimental/pyproject.toml +++ b/libs/experimental/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-experimental" -version = "0.0.43" +version = "0.0.44" description = "Building applications with LLMs through composability" authors = [] license = "MIT" @@ -10,8 +10,8 @@ repository = "https://github.com/langchain-ai/langchain" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.0.7,<0.1" -langchain = ">=0.0.342,<0.1" +langchain-core = ">=0.0.10,<0.1" +langchain = ">=0.0.346,<0.1" presidio-anonymizer = {version = "^2.2.33", optional = true} presidio-analyzer = {version = "^2.2.33", optional = true} faker = {version = "^19.3.1", optional = true} From 0f02e94565c4924baccb376fe79725143ba2ecd3 Mon Sep 17 00:00:00 2001 From: Leonid Ganeline Date: Tue, 5 Dec 2023 13:05:29 -0800 Subject: [PATCH 20/63] docs: `integrations/providers/` update (#14315) - added missed provider files (from `integrations/Callbacks` - updated notebooks: added links; updated into consistent formats --- .../docs/integrations/callbacks/argilla.ipynb | 4 +--- .../docs/integrations/callbacks/context.ipynb | 21 ++++++---------- docs/docs/integrations/callbacks/infino.ipynb | 12 ++++++---- .../integrations/callbacks/labelstudio.ipynb | 22 ++++++++--------- docs/docs/integrations/callbacks/llmonitor.md | 2 +- .../integrations/callbacks/promptlayer.ipynb | 19 +++++++-------- .../callbacks/sagemaker_tracking.ipynb | 15 ++++++------ .../integrations/callbacks/trubrics.ipynb | 15 ++++++------ docs/docs/integrations/providers/context.mdx | 20 ++++++++++++++++ .../integrations/providers/labelstudio.mdx | 23 ++++++++++++++++++ .../docs/integrations/providers/llmonitor.mdx | 22 +++++++++++++++++ .../docs/integrations/providers/streamlit.mdx | 22 +++++++++++++++++ docs/docs/integrations/providers/trubrics.mdx | 24 +++++++++++++++++++ 13 files changed, 163 insertions(+), 58 deletions(-) create mode 100644 docs/docs/integrations/providers/context.mdx create mode 100644 docs/docs/integrations/providers/labelstudio.mdx create mode 100644 docs/docs/integrations/providers/llmonitor.mdx create mode 100644 docs/docs/integrations/providers/streamlit.mdx create mode 100644 docs/docs/integrations/providers/trubrics.mdx diff --git a/docs/docs/integrations/callbacks/argilla.ipynb b/docs/docs/integrations/callbacks/argilla.ipynb index 015f29e790101..9e89cb5da9249 100644 --- a/docs/docs/integrations/callbacks/argilla.ipynb +++ b/docs/docs/integrations/callbacks/argilla.ipynb @@ -7,8 +7,6 @@ "source": [ "# Argilla\n", "\n", - "![Argilla - Open-source data platform for LLMs](https://argilla.io/og.png)\n", - "\n", ">[Argilla](https://argilla.io/) is an open-source data curation platform for LLMs.\n", "> Using Argilla, everyone can build robust language models through faster data curation \n", "> using both human and machine feedback. We provide support for each step in the MLOps cycle, \n", @@ -410,7 +408,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/docs/docs/integrations/callbacks/context.ipynb b/docs/docs/integrations/callbacks/context.ipynb index d9edf77e2600e..b250f439b6164 100644 --- a/docs/docs/integrations/callbacks/context.ipynb +++ b/docs/docs/integrations/callbacks/context.ipynb @@ -7,12 +7,9 @@ "source": [ "# Context\n", "\n", - "![Context - User Analytics for LLM Powered Products](https://with.context.ai/langchain.png)\n", + ">[Context](https://context.ai/) provides user analytics for LLM-powered products and features.\n", "\n", - "[Context](https://context.ai/) provides user analytics for LLM powered products and features.\n", - "\n", - "With Context, you can start understanding your users and improving their experiences in less than 30 minutes.\n", - "\n" + "With `Context`, you can start understanding your users and improving their experiences in less than 30 minutes.\n" ] }, { @@ -89,11 +86,9 @@ "metadata": {}, "source": [ "## Usage\n", - "### Using the Context callback within a chat model\n", - "\n", - "The Context callback handler can be used to directly record transcripts between users and AI assistants.\n", + "### Context callback within a chat model\n", "\n", - "#### Example" + "The Context callback handler can be used to directly record transcripts between users and AI assistants." ] }, { @@ -132,7 +127,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Using the Context callback within Chains\n", + "### Context callback within Chains\n", "\n", "The Context callback handler can also be used to record the inputs and outputs of chains. Note that intermediate steps of the chain are not recorded - only the starting inputs and final outputs.\n", "\n", @@ -149,9 +144,7 @@ ">handler = ContextCallbackHandler(token)\n", ">chat = ChatOpenAI(temperature=0.9, callbacks=[callback])\n", ">chain = LLMChain(llm=chat, prompt=chat_prompt_template, callbacks=[callback])\n", - ">```\n", - "\n", - "#### Example" + ">```\n" ] }, { @@ -203,7 +196,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/docs/docs/integrations/callbacks/infino.ipynb b/docs/docs/integrations/callbacks/infino.ipynb index bfcac65c7d512..367f3a2f2d88a 100644 --- a/docs/docs/integrations/callbacks/infino.ipynb +++ b/docs/docs/integrations/callbacks/infino.ipynb @@ -7,12 +7,14 @@ "source": [ "# Infino\n", "\n", + ">[Infino](https://github.com/infinohq/infino) is a scalable telemetry store designed for logs, metrics, and traces. Infino can function as a standalone observability solution or as the storage layer in your observability stack.\n", + "\n", "This example shows how one can track the following while calling OpenAI and ChatOpenAI models via `LangChain` and [Infino](https://github.com/infinohq/infino):\n", "\n", - "* prompt input,\n", - "* response from `ChatGPT` or any other `LangChain` model,\n", - "* latency,\n", - "* errors,\n", + "* prompt input\n", + "* response from `ChatGPT` or any other `LangChain` model\n", + "* latency\n", + "* errors\n", "* number of tokens consumed" ] }, @@ -454,7 +456,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/docs/docs/integrations/callbacks/labelstudio.ipynb b/docs/docs/integrations/callbacks/labelstudio.ipynb index de88fc1cbe796..bb733f0dc15e2 100644 --- a/docs/docs/integrations/callbacks/labelstudio.ipynb +++ b/docs/docs/integrations/callbacks/labelstudio.ipynb @@ -4,6 +4,9 @@ "cell_type": "markdown", "metadata": { "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, "pycharm": { "name": "#%% md\n" } @@ -11,17 +14,14 @@ "source": [ "# Label Studio\n", "\n", - "
\n", - "\n", - "
\n", "\n", - "Label Studio is an open-source data labeling platform that provides LangChain with flexibility when it comes to labeling data for fine-tuning large language models (LLMs). It also enables the preparation of custom training data and the collection and evaluation of responses through human feedback.\n", + ">[Label Studio](https://labelstud.io/guide/get_started) is an open-source data labeling platform that provides LangChain with flexibility when it comes to labeling data for fine-tuning large language models (LLMs). It also enables the preparation of custom training data and the collection and evaluation of responses through human feedback.\n", "\n", - "In this guide, you will learn how to connect a LangChain pipeline to Label Studio to:\n", + "In this guide, you will learn how to connect a LangChain pipeline to `Label Studio` to:\n", "\n", - "- Aggregate all input prompts, conversations, and responses in a single LabelStudio project. This consolidates all the data in one place for easier labeling and analysis.\n", + "- Aggregate all input prompts, conversations, and responses in a single `Label Studio` project. This consolidates all the data in one place for easier labeling and analysis.\n", "- Refine prompts and responses to create a dataset for supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) scenarios. The labeled data can be used to further train the LLM to improve its performance.\n", - "- Evaluate model responses through human feedback. LabelStudio provides an interface for humans to review and provide feedback on model responses, allowing evaluation and iteration." + "- Evaluate model responses through human feedback. `Label Studio` provides an interface for humans to review and provide feedback on model responses, allowing evaluation and iteration." ] }, { @@ -362,9 +362,9 @@ ], "metadata": { "kernelspec": { - "display_name": "labelops", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "labelops" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -376,9 +376,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.10.12" } }, "nbformat": 4, - "nbformat_minor": 1 + "nbformat_minor": 4 } diff --git a/docs/docs/integrations/callbacks/llmonitor.md b/docs/docs/integrations/callbacks/llmonitor.md index 9cbf1e36756a4..4ee85429f6213 100644 --- a/docs/docs/integrations/callbacks/llmonitor.md +++ b/docs/docs/integrations/callbacks/llmonitor.md @@ -1,6 +1,6 @@ # LLMonitor -[LLMonitor](https://llmonitor.com?utm_source=langchain&utm_medium=py&utm_campaign=docs) is an open-source observability platform that provides cost and usage analytics, user tracking, tracing and evaluation tools. +>[LLMonitor](https://llmonitor.com?utm_source=langchain&utm_medium=py&utm_campaign=docs) is an open-source observability platform that provides cost and usage analytics, user tracking, tracing and evaluation tools.