From 43dc5d34163829720ba46d2268db61909e7e5c6c Mon Sep 17 00:00:00 2001 From: Mohammad Mohtashim <45242107+keenborder786@users.noreply.github.com> Date: Mon, 19 Feb 2024 23:09:11 +0500 Subject: [PATCH 01/12] community[patch]: OpenLLM Client Fixes + Added Timeout Parameter (#17478) - OpenLLM was using outdated method to get the final text output from openllm client invocation which was raising the error. Therefore corrected that. - OpenLLM `_identifying_params` was getting the openllm's client configuration using outdated attributes which was raising error. - Updated the docstring for OpenLLM. - Added timeout parameter to be passed to underlying openllm client. --- .../langchain_community/llms/openllm.py | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/libs/community/langchain_community/llms/openllm.py b/libs/community/langchain_community/llms/openllm.py index afb5a18f9ba45..fa3b03e1f98d5 100644 --- a/libs/community/langchain_community/llms/openllm.py +++ b/libs/community/langchain_community/llms/openllm.py @@ -72,7 +72,7 @@ class OpenLLM(LLM): from langchain_community.llms import OpenLLM llm = OpenLLM(server_url='http://localhost:3000') - llm("What is the difference between a duck and a goose?") + llm.invoke("What is the difference between a duck and a goose?") """ model_name: Optional[str] = None @@ -82,6 +82,8 @@ class OpenLLM(LLM): See 'openllm models' for all available model variants.""" server_url: Optional[str] = None """Optional server URL that currently runs a LLMServer with 'openllm start'.""" + timeout: int = 30 + """"Time out for the openllm client""" server_type: ServerType = "http" """Optional server type. Either 'http' or 'grpc'.""" embedded: bool = True @@ -125,6 +127,7 @@ def __init__( *, model_id: Optional[str] = None, server_url: Optional[str] = None, + timeout: int = 30, server_type: Literal["grpc", "http"] = "http", embedded: bool = True, **llm_kwargs: Any, @@ -149,11 +152,12 @@ def __init__( if server_type == "http" else openllm.client.GrpcClient ) - client = client_cls(server_url) + client = client_cls(server_url, timeout) super().__init__( **{ "server_url": server_url, + "timeout": timeout, "server_type": server_type, "llm_kwargs": llm_kwargs, } @@ -217,9 +221,9 @@ def chat(input_text: str): def _identifying_params(self) -> IdentifyingParams: """Get the identifying parameters.""" if self._client is not None: - self.llm_kwargs.update(self._client._config()) - model_name = self._client._metadata()["model_name"] - model_id = self._client._metadata()["model_id"] + self.llm_kwargs.update(self._client._config) + model_name = self._client._metadata.model_dump()["model_name"] + model_id = self._client._metadata.model_dump()["model_id"] else: if self._runner is None: raise ValueError("Runner must be initialized.") @@ -265,9 +269,11 @@ def _call( self._identifying_params["model_name"], **copied ) if self._client: - res = self._client.generate( - prompt, **config.model_dump(flatten=True) - ).responses[0] + res = ( + self._client.generate(prompt, **config.model_dump(flatten=True)) + .outputs[0] + .text + ) else: assert self._runner is not None res = self._runner(prompt, **config.model_dump(flatten=True)) From e92e96193fc3fb263a3379b685ab365123e23ebc Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Mon, 19 Feb 2024 19:11:49 +0100 Subject: [PATCH 02/12] community[minor]: Add async methods to the AstraDB BaseStore (#16872) --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> --- .../langchain_community/storage/astradb.py | 59 ++++++++++-- .../integration_tests/storage/test_astradb.py | 95 ++++++++++++++++--- 2 files changed, 136 insertions(+), 18 deletions(-) diff --git a/libs/community/langchain_community/storage/astradb.py b/libs/community/langchain_community/storage/astradb.py index 0cb2ea310aad2..959ef374124c7 100644 --- a/libs/community/langchain_community/storage/astradb.py +++ b/libs/community/langchain_community/storage/astradb.py @@ -5,6 +5,7 @@ from typing import ( TYPE_CHECKING, Any, + AsyncIterator, Generic, Iterator, List, @@ -16,10 +17,13 @@ from langchain_core.stores import BaseStore, ByteStore -from langchain_community.utilities.astradb import _AstraDBEnvironment +from langchain_community.utilities.astradb import ( + SetupMode, + _AstraDBCollectionEnvironment, +) if TYPE_CHECKING: - from astrapy.db import AstraDB + from astrapy.db import AstraDB, AsyncAstraDB V = TypeVar("V") @@ -34,17 +38,23 @@ def __init__( api_endpoint: Optional[str] = None, astra_db_client: Optional[AstraDB] = None, namespace: Optional[str] = None, + *, + async_astra_db_client: Optional[AsyncAstraDB] = None, + pre_delete_collection: bool = False, + setup_mode: SetupMode = SetupMode.SYNC, ) -> None: - astra_env = _AstraDBEnvironment( + self.astra_env = _AstraDBCollectionEnvironment( + collection_name=collection_name, token=token, api_endpoint=api_endpoint, astra_db_client=astra_db_client, + async_astra_db_client=async_astra_db_client, namespace=namespace, + setup_mode=setup_mode, + pre_delete_collection=pre_delete_collection, ) - self.astra_db = astra_env.astra_db - self.collection = self.astra_db.create_collection( - collection_name=collection_name, - ) + self.collection = self.astra_env.collection + self.async_collection = self.astra_env.async_collection @abstractmethod def decode_value(self, value: Any) -> Optional[V]: @@ -56,28 +66,63 @@ def encode_value(self, value: Optional[V]) -> Any: def mget(self, keys: Sequence[str]) -> List[Optional[V]]: """Get the values associated with the given keys.""" + self.astra_env.ensure_db_setup() docs_dict = {} for doc in self.collection.paginated_find(filter={"_id": {"$in": list(keys)}}): docs_dict[doc["_id"]] = doc.get("value") return [self.decode_value(docs_dict.get(key)) for key in keys] + async def amget(self, keys: Sequence[str]) -> List[Optional[V]]: + """Get the values associated with the given keys.""" + await self.astra_env.aensure_db_setup() + docs_dict = {} + async for doc in self.async_collection.paginated_find( + filter={"_id": {"$in": list(keys)}} + ): + docs_dict[doc["_id"]] = doc.get("value") + return [self.decode_value(docs_dict.get(key)) for key in keys] + def mset(self, key_value_pairs: Sequence[Tuple[str, V]]) -> None: """Set the given key-value pairs.""" + self.astra_env.ensure_db_setup() for k, v in key_value_pairs: self.collection.upsert({"_id": k, "value": self.encode_value(v)}) + async def amset(self, key_value_pairs: Sequence[Tuple[str, V]]) -> None: + """Set the given key-value pairs.""" + await self.astra_env.aensure_db_setup() + for k, v in key_value_pairs: + await self.async_collection.upsert( + {"_id": k, "value": self.encode_value(v)} + ) + def mdelete(self, keys: Sequence[str]) -> None: """Delete the given keys.""" + self.astra_env.ensure_db_setup() self.collection.delete_many(filter={"_id": {"$in": list(keys)}}) + async def amdelete(self, keys: Sequence[str]) -> None: + """Delete the given keys.""" + await self.astra_env.aensure_db_setup() + await self.async_collection.delete_many(filter={"_id": {"$in": list(keys)}}) + def yield_keys(self, *, prefix: Optional[str] = None) -> Iterator[str]: """Yield keys in the store.""" + self.astra_env.ensure_db_setup() docs = self.collection.paginated_find() for doc in docs: key = doc["_id"] if not prefix or key.startswith(prefix): yield key + async def ayield_keys(self, *, prefix: Optional[str] = None) -> AsyncIterator[str]: + """Yield keys in the store.""" + await self.astra_env.aensure_db_setup() + async for doc in self.async_collection.paginated_find(): + key = doc["_id"] + if not prefix or key.startswith(prefix): + yield key + class AstraDBStore(AstraDBBaseStore[Any]): """BaseStore implementation using DataStax AstraDB as the underlying store. diff --git a/libs/community/tests/integration_tests/storage/test_astradb.py b/libs/community/tests/integration_tests/storage/test_astradb.py index 643b4e93a3185..63108ef0c84a7 100644 --- a/libs/community/tests/integration_tests/storage/test_astradb.py +++ b/libs/community/tests/integration_tests/storage/test_astradb.py @@ -1,9 +1,16 @@ """Implement integration tests for AstraDB storage.""" +from __future__ import annotations + import os +from typing import TYPE_CHECKING import pytest from langchain_community.storage.astradb import AstraDBByteStore, AstraDBStore +from langchain_community.utilities.astradb import SetupMode + +if TYPE_CHECKING: + from astrapy.db import AstraDB, AsyncAstraDB def _has_env_vars() -> bool: @@ -16,7 +23,7 @@ def _has_env_vars() -> bool: @pytest.fixture -def astra_db(): # type: ignore[no-untyped-def] +def astra_db() -> AstraDB: from astrapy.db import AstraDB return AstraDB( @@ -26,24 +33,45 @@ def astra_db(): # type: ignore[no-untyped-def] ) -def init_store(astra_db, collection_name: str): # type: ignore[no-untyped-def, no-untyped-def] - astra_db.create_collection(collection_name) +@pytest.fixture +def async_astra_db() -> AsyncAstraDB: + from astrapy.db import AsyncAstraDB + + return AsyncAstraDB( + token=os.environ["ASTRA_DB_APPLICATION_TOKEN"], + api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"], + namespace=os.environ.get("ASTRA_DB_KEYSPACE"), + ) + + +def init_store(astra_db: AstraDB, collection_name: str) -> AstraDBStore: store = AstraDBStore(collection_name=collection_name, astra_db_client=astra_db) store.mset([("key1", [0.1, 0.2]), ("key2", "value2")]) return store -def init_bytestore(astra_db, collection_name: str): # type: ignore[no-untyped-def, no-untyped-def] - astra_db.create_collection(collection_name) +def init_bytestore(astra_db: AstraDB, collection_name: str) -> AstraDBByteStore: store = AstraDBByteStore(collection_name=collection_name, astra_db_client=astra_db) store.mset([("key1", b"value1"), ("key2", b"value2")]) return store +async def init_async_store( + async_astra_db: AsyncAstraDB, collection_name: str +) -> AstraDBStore: + store = AstraDBStore( + collection_name=collection_name, + async_astra_db_client=async_astra_db, + setup_mode=SetupMode.ASYNC, + ) + await store.amset([("key1", [0.1, 0.2]), ("key2", "value2")]) + return store + + @pytest.mark.requires("astrapy") @pytest.mark.skipif(not _has_env_vars(), reason="Missing Astra DB env. vars") class TestAstraDBStore: - def test_mget(self, astra_db) -> None: # type: ignore[no-untyped-def] + def test_mget(self, astra_db: AstraDB) -> None: """Test AstraDBStore mget method.""" collection_name = "lc_test_store_mget" try: @@ -52,7 +80,16 @@ def test_mget(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_mset(self, astra_db) -> None: # type: ignore[no-untyped-def] + async def test_amget(self, async_astra_db: AsyncAstraDB) -> None: + """Test AstraDBStore amget method.""" + collection_name = "lc_test_store_mget" + try: + store = await init_async_store(async_astra_db, collection_name) + assert await store.amget(["key1", "key2"]) == [[0.1, 0.2], "value2"] + finally: + await async_astra_db.delete_collection(collection_name) + + def test_mset(self, astra_db: AstraDB) -> None: """Test that multiple keys can be set with AstraDBStore.""" collection_name = "lc_test_store_mset" try: @@ -64,7 +101,19 @@ def test_mset(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_mdelete(self, astra_db) -> None: # type: ignore[no-untyped-def] + async def test_amset(self, async_astra_db: AsyncAstraDB) -> None: + """Test that multiple keys can be set with AstraDBStore.""" + collection_name = "lc_test_store_mset" + try: + store = await init_async_store(async_astra_db, collection_name) + result = await store.async_collection.find_one({"_id": "key1"}) + assert result["data"]["document"]["value"] == [0.1, 0.2] + result = await store.async_collection.find_one({"_id": "key2"}) + assert result["data"]["document"]["value"] == "value2" + finally: + await async_astra_db.delete_collection(collection_name) + + def test_mdelete(self, astra_db: AstraDB) -> None: """Test that deletion works as expected.""" collection_name = "lc_test_store_mdelete" try: @@ -75,7 +124,18 @@ def test_mdelete(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_yield_keys(self, astra_db) -> None: # type: ignore[no-untyped-def] + async def test_amdelete(self, async_astra_db: AsyncAstraDB) -> None: + """Test that deletion works as expected.""" + collection_name = "lc_test_store_mdelete" + try: + store = await init_async_store(async_astra_db, collection_name) + await store.amdelete(["key1", "key2"]) + result = await store.amget(["key1", "key2"]) + assert result == [None, None] + finally: + await async_astra_db.delete_collection(collection_name) + + def test_yield_keys(self, astra_db: AstraDB) -> None: collection_name = "lc_test_store_yield_keys" try: store = init_store(astra_db, collection_name) @@ -85,7 +145,20 @@ def test_yield_keys(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_bytestore_mget(self, astra_db) -> None: # type: ignore[no-untyped-def] + async def test_ayield_keys(self, async_astra_db: AsyncAstraDB) -> None: + collection_name = "lc_test_store_yield_keys" + try: + store = await init_async_store(async_astra_db, collection_name) + assert {key async for key in store.ayield_keys()} == {"key1", "key2"} + assert {key async for key in store.ayield_keys(prefix="key")} == { + "key1", + "key2", + } + assert {key async for key in store.ayield_keys(prefix="lang")} == set() + finally: + await async_astra_db.delete_collection(collection_name) + + def test_bytestore_mget(self, astra_db: AstraDB) -> None: """Test AstraDBByteStore mget method.""" collection_name = "lc_test_bytestore_mget" try: @@ -94,7 +167,7 @@ def test_bytestore_mget(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_bytestore_mset(self, astra_db) -> None: # type: ignore[no-untyped-def] + def test_bytestore_mset(self, astra_db: AstraDB) -> None: """Test that multiple keys can be set with AstraDBByteStore.""" collection_name = "lc_test_bytestore_mset" try: From 6c18f73ca56bb72cb964aaa668c3f8ac14237619 Mon Sep 17 00:00:00 2001 From: Raghav Dixit <34462078+raghavdixit99@users.noreply.github.com> Date: Mon, 19 Feb 2024 13:22:02 -0500 Subject: [PATCH 03/12] community[patch]: LanceDB integration improvements/fixes (#16173) Hi, I'm from the LanceDB team. Improves LanceDB integration by making it easier to use - now you aren't required to create tables manually and pass them in the constructor, although that is still backward compatible. Bug fix - pandas was being used even though it's not a dependency for LanceDB or langchain PS - this issue was raised a few months ago but lost traction. It is a feature improvement for our users kindly review this , Thanks ! --- .../integrations/vectorstores/lancedb.ipynb | 178 ++++++++++++++---- .../data_connection/vectorstores/index.mdx | 2 +- .../vectorstores/lancedb.py | 84 +++++++-- .../vectorstores/test_lancedb.py | 34 ++-- 4 files changed, 225 insertions(+), 73 deletions(-) diff --git a/docs/docs/integrations/vectorstores/lancedb.ipynb b/docs/docs/integrations/vectorstores/lancedb.ipynb index ab5c56eb8f3cd..18eb519eecd3b 100644 --- a/docs/docs/integrations/vectorstores/lancedb.ipynb +++ b/docs/docs/integrations/vectorstores/lancedb.ipynb @@ -14,14 +14,50 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "bfcf346a", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: lancedb in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (0.4.4)\n", + "Requirement already satisfied: deprecation in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (2.1.0)\n", + "Requirement already satisfied: pylance==0.9.6 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (0.9.6)\n", + "Requirement already satisfied: ratelimiter~=1.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (1.2.0.post0)\n", + "Requirement already satisfied: retry>=0.9.2 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (0.9.2)\n", + "Requirement already satisfied: tqdm>=4.27.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (4.66.1)\n", + "Requirement already satisfied: pydantic>=1.10 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (2.4.2)\n", + "Requirement already satisfied: attrs>=21.3.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (23.1.0)\n", + "Requirement already satisfied: semver>=3.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (3.0.2)\n", + "Requirement already satisfied: cachetools in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (5.3.2)\n", + "Requirement already satisfied: pyyaml>=6.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (6.0.1)\n", + "Requirement already satisfied: click>=8.1.7 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (8.1.7)\n", + "Requirement already satisfied: requests>=2.31.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (2.31.0)\n", + "Requirement already satisfied: overrides>=0.7 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (7.4.0)\n", + "Requirement already satisfied: pyarrow>=12 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pylance==0.9.6->lancedb) (14.0.2)\n", + "Requirement already satisfied: numpy>=1.22 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pylance==0.9.6->lancedb) (1.24.4)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pydantic>=1.10->lancedb) (0.5.0)\n", + "Requirement already satisfied: pydantic-core==2.10.1 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pydantic>=1.10->lancedb) (2.10.1)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pydantic>=1.10->lancedb) (4.8.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from requests>=2.31.0->lancedb) (3.3.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from requests>=2.31.0->lancedb) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from requests>=2.31.0->lancedb) (2.0.6)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from requests>=2.31.0->lancedb) (2023.7.22)\n", + "Requirement already satisfied: decorator>=3.4.2 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from retry>=0.9.2->lancedb) (5.1.1)\n", + "Requirement already satisfied: py<2.0.0,>=1.4.26 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from retry>=0.9.2->lancedb) (1.11.0)\n", + "Requirement already satisfied: packaging in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from deprecation->lancedb) (23.2)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], "source": [ - "%pip install --upgrade --quiet lancedb" + "! pip install lancedb" ] }, { @@ -34,20 +70,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "a0361f5c-e6f4-45f4-b829-11680cf03cec", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "OpenAI API Key: ········\n" - ] - } - ], + "outputs": [], "source": [ "import getpass\n", "import os\n", @@ -57,15 +85,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "aac9563e", "metadata": { "tags": [] }, "outputs": [], "source": [ - "from langchain_community.vectorstores import LanceDB\n", - "from langchain_openai import OpenAIEmbeddings" + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.vectorstores import LanceDB" ] }, { @@ -75,14 +103,13 @@ "metadata": {}, "outputs": [], "source": [ + "from langchain.document_loaders import TextLoader\n", "from langchain.text_splitter import CharacterTextSplitter\n", - "from langchain_community.document_loaders import TextLoader\n", "\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "\n", "documents = CharacterTextSplitter().split_documents(documents)\n", - "\n", "embeddings = OpenAIEmbeddings()" ] }, @@ -93,22 +120,7 @@ "metadata": {}, "outputs": [], "source": [ - "import lancedb\n", - "\n", - "db = lancedb.connect(\"/tmp/lancedb\")\n", - "table = db.create_table(\n", - " \"my_table\",\n", - " data=[\n", - " {\n", - " \"vector\": embeddings.embed_query(\"Hello World\"),\n", - " \"text\": \"Hello World\",\n", - " \"id\": \"1\",\n", - " }\n", - " ],\n", - " mode=\"overwrite\",\n", - ")\n", - "\n", - "docsearch = LanceDB.from_documents(documents, embeddings, connection=table)\n", + "docsearch = LanceDB.from_documents(documents, embeddings)\n", "\n", "query = \"What did the president say about Ketanji Brown Jackson\"\n", "docs = docsearch.similarity_search(query)" @@ -116,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "9c608226", "metadata": {}, "outputs": [ @@ -136,7 +148,7 @@ "\n", "I’ve worked on these issues a long time. \n", "\n", - "I know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n", + "I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n", "\n", "So let’s not abandon our streets. Or choose between safety and equal justice. \n", "\n", @@ -192,11 +204,97 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "a359ed74", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n", + "\n", + "Officer Mora was 27 years old. \n", + "\n", + "Officer Rivera was 22. \n", + "\n", + "Both Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n", + "\n", + "I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n", + "\n", + "I’ve worked on these issues a long time. \n", + "\n", + "I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n", + "\n", + "So let’s not abandon our streets. Or choose between safety and equal justice. \n", + "\n", + "Let’s come together to protect our communities, restore trust, and hold law enforcement accountable. \n", + "\n", + "That’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. \n", + "\n", + "That’s why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope. \n", + "\n", + "We should all agree: The answer is not to Defund the police. The answer is to FUND the police with the resources and training they need to protect our communities. \n", + "\n", + "I ask Democrats and Republicans alike: Pass my budget and keep our neighborhoods safe. \n", + "\n", + "And I will keep doing everything in my power to crack down on gun trafficking and ghost guns you can buy online and make at home—they have no serial numbers and can’t be traced. \n", + "\n", + "And I ask Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone on a terrorist list be able to purchase a weapon? \n", + "\n", + "Ban assault weapons and high-capacity magazines. \n", + "\n", + "Repeal the liability shield that makes gun manufacturers the only industry in America that can’t be sued. \n", + "\n", + "These laws don’t infringe on the Second Amendment. They save lives. \n", + "\n", + "The most fundamental right in America is the right to vote – and to have it counted. And it’s under assault. \n", + "\n", + "In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \n", + "\n", + "We cannot let this happen. \n", + "\n", + "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", + "\n", + "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", + "\n", + "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", + "\n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n", + "\n", + "A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", + "\n", + "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n", + "\n", + "We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n", + "\n", + "We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n", + "\n", + "We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster.\n" + ] + } + ], + "source": [ + "print(docs[0].page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "12ca9ea8-3d09-49fb-922e-47c64ba90f28", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'vector': [-0.005863776430487633, -0.0019847142975777388, -0.004525014664977789, -0.002664136001840234, -0.0007940530776977539, 0.01969318464398384, 0.01712276227772236, 0.008474362082779408, -0.01931833289563656, -0.016988886520266533, 0.01086405199021101, 0.010763644240796566, -0.0004455566522665322, -0.007537228986620903, -0.003405475290492177, -0.0009003172744996846, 0.03338871896266937, -0.009672553278505802, 0.007657717447727919, -0.03087184764444828, -0.014016835950314999, 0.003234783187508583, 0.014552340842783451, 0.0068009099923074245, 0.0008007469004951417, 0.010261609219014645, 0.03170187771320343, -0.010013937950134277, 0.011004622094333172, -0.018608788028359413, -0.01729680225253105, 0.0061917733401060104, -0.036789171397686005, -0.018448136746883392, -0.02779269404709339, -0.0061415694653987885, 0.0002734002482611686, -0.011084947735071182, 0.018943479284644127, -0.014217650517821312, 0.036173343658447266, -0.02574438974261284, 0.002319404622539878, -0.01838119886815548, -0.019104130566120148, 0.017952794209122658, -0.00919059943407774, -0.020764194428920746, -0.026052303612232208, 0.025610512122511864, 0.044580765068531036, 0.0020282240584492683, -0.029211781919002533, -0.024994682520627975, 0.011586982756853104, -0.013735695742070675, -0.013327373191714287, 0.009378026239573956, -0.01097115222364664, -0.011607064865529537, 0.013882959261536598, 0.0014149037888273597, -0.02219666913151741, 0.01697549782693386, -0.009411495178937912, -0.01838119886815548, 0.0012860479764640331, 0.02172810398042202, -0.003882409306243062, 0.015797387808561325, 0.054246626794338226, 0.0028314811643213034, 0.026186181232333183, -0.0068678478710353374, 0.031621553003787994, -0.019719960168004036, -0.005365087650716305, -0.004725828766822815, -0.0011948448373004794, -0.017725205048918724, 0.022451035678386688, -0.01289896946400404, -0.02246442250907421, 0.015917876735329628, 0.013206885196268559, -0.014579115435481071, -0.002242425922304392, -0.0010567849967628717, 0.002655768534168601, 0.0006116467993706465, 0.013006070628762245, 0.024378851056098938, -0.003266578773036599, 0.006626870948821306, -0.009639084339141846, 0.015261884778738022, -0.02694927528500557, 0.02162100188434124, 0.008112896233797073, -0.026386994868516922, 0.016881786286830902, -0.02089807018637657, -0.026453932747244835, -0.011473188176751137, -0.028970805928111076, -0.02961341105401516, -0.006188426166772842, 0.002182181691750884, 0.004344281740486622, 0.011011315509676933, -0.006827685050666332, 0.009029948152601719, 0.0015763919800519943, 0.0075706979259848595, -0.011533432640135288, -0.02203601785004139, -0.018314260989427567, -0.025583738461136818, 0.022330546751618385, -0.03890441730618477, 0.019037192687392235, 0.014445239678025246, 0.0022390789818018675, -0.027953345328569412, 0.01969318464398384, -0.019974324852228165, -0.014164099469780922, 0.008199915289878845, 0.0008442566613666713, 0.003725104732438922, -0.011553513817489147, -0.011473188176751137, 0.023334616795182228, -0.008400729857385159, 0.011406250298023224, 0.007885306142270565, -0.02093823440372944, 0.01755116693675518, -0.01376247126609087, -0.01838119886815548, 0.01917106844484806, -0.01279856264591217, -0.02579793892800808, -0.01538237277418375, 0.01271823700517416, 0.021272923797369003, 0.0005706471856683493, 0.005903939250856638, 0.014552340842783451, 0.015810776501893997, 0.014766542240977287, -0.01603836566209793, -0.0003526800428517163, -0.007845143787562847, 0.004970152862370014, -0.002126957755535841, -0.024539504200220108, 0.0015303720720112324, 0.008969703689217567, 0.0027461349964141846, 0.006509729195386171, -0.01994754932820797, -0.009331169538199902, 0.03649464622139931, 0.02314719185233116, 0.016426606103777885, -0.014498789794743061, 0.02684217318892479, -0.0007497065817005932, 0.02554357424378395, 0.01915767975151539, 0.017899245023727417, -0.015288659371435642, 0.02773914486169815, 0.00015939632430672646, 0.007778205908834934, 0.018407974392175674, -0.008748807944357395, -0.02694927528500557, 0.01713615097105503, 0.01801973208785057, 0.0008266853983514011, 0.012222895398736, 0.04380428418517113, -0.023120416328310966, -0.009337862953543663, 0.017939407378435135, 0.0074836784042418, -0.023334616795182228, -0.007443515583872795, -0.0010659890249371529, 0.020871296525001526, 0.011138497851788998, -0.012832031585276127, -0.6456044912338257, -0.014552340842783451, 0.017484229058027267, -0.012115794233977795, -0.0034573522862046957, 0.010121039114892483, -0.0011714164866134524, 0.01785908080637455, -0.016426606103777885, 0.01538237277418375, -0.013534881174564362, 0.012805256061255932, 0.0006769114406779408, -0.022852662950754166, -0.026092467829585075, -0.027926571667194366, -0.013039539568126202, -0.00830701645463705, 0.031139599159359932, -0.006164997816085815, -0.02611924149096012, 0.004387791734188795, -0.006108100526034832, 0.0072493948973715305, 0.008353873156011105, 0.015676898881793022, 0.020509829744696617, -0.016105303540825844, -0.015650125220417976, 0.010515973903238773, -0.030175691470503807, 0.03204995393753052, -0.0017805531388148665, 0.0056227995082736015, 0.040136076509952545, -0.0022223445121198893, 0.0030105405021458864, 0.022866051644086838, 0.013668757863342762, 0.021808428689837456, -0.012336689978837967, 0.024378851056098938, 0.03954702243208885, -0.0028113997541368008, 0.025664063170552254, -0.00548222940415144, 0.021768266335129738, -0.010094263590872288, 0.0003871950029861182, 0.0241780374199152, -0.005867123603820801, 0.019559308886528015, -0.000377781834686175, 0.001261782948859036, -0.015730449929833412, -0.002237405627965927, -0.007162375375628471, -0.02146035060286522, 0.0009747859439812601, 0.0026674827095121145, -0.0057165129110217094, 0.008655094541609287, -0.022544747218489647, -0.011131804436445236, -0.01958608441054821, 0.02856917679309845, 0.012336689978837967, 0.011801185086369514, 0.018916703760623932, -0.0066201770678162575, 0.014659442007541656, 0.004689013119786978, -0.01013442687690258, -0.03515588492155075, 0.010054100304841995, -0.004340935032814741, 0.026025528088212013, -0.013019458390772343, -0.005673002917319536, 0.011312536895275116, 0.0013747409684583545, -0.00547218881547451, 7.080794603098184e-05, -0.0010944376699626446, 0.01607852801680565, 0.008929540403187275, -0.02172810398042202, 0.00571985961869359, 0.003490821458399296, 0.012283138930797577, 0.025463249534368515, 0.0025536881294101477, 0.011185354553163052, -0.017992958426475525, 0.010930989868938923, 0.02230377122759819, -0.023321229964494705, 0.0025202189572155476, 0.012390240095555782, -0.03170187771320343, -0.003520943457260728, -0.011841347441077232, 0.02370947040617466, 0.007282864302396774, 0.01650693267583847, 0.013193497434258461, -0.013949898071587086, -0.010355322621762753, 0.036066241562366486, -0.03818148374557495, -0.015275271609425545, 0.005187701899558306, -0.018889928236603737, -0.017618104815483093, 0.006600095424801111, -0.01665419526398182, 0.00783175602555275, 0.018622176721692085, -0.015061070211231709, -0.019974324852228165, 0.005164273548871279, -2.9782220735796727e-05, 0.013012764044106007, -0.03906506672501564, 0.015502861700952053, 0.005204436369240284, 0.006499688606709242, -0.003090866142883897, -0.0010735195828601718, -0.01049589179456234, 0.0033569452352821827, -0.0045149740763008595, 0.020978396758437157, 0.009210680611431599, 0.014846867881715298, -0.005047131795436144, 0.013802633620798588, -0.010904214344918728, 0.016774684190750122, -0.011325924657285213, -0.0029034395702183247, -0.001386455143801868, -0.006041162647306919, -0.003771961433812976, -0.02480725571513176, -0.02579793892800808, -0.0007149824523366988, -0.002642381004989147, -0.030041813850402832, -0.027498167008161545, 0.009779654443264008, -0.0185418501496315, -0.021607615053653717, -0.005960837006568909, 0.0074836784042418, -0.0010919275227934122, -0.01571706309914589, 0.01543592382222414, -0.004866398870944977, -0.02208956889808178, 0.01602497696876526, 0.0035744940396398306, -0.02779269404709339, -0.01475315447896719, 0.009833205491304398, -0.010268302634358406, 0.04099288582801819, 0.013461249880492687, 0.006600095424801111, -0.027873020619153976, 0.0012266404228284955, -0.013949898071587086, -0.0015337190125137568, -0.0008810725994408131, 0.03740500286221504, 0.017015662044286728, -0.02878337912261486, 0.01376247126609087, 0.016627419739961624, 0.011607064865529537, -0.007389965001493692, -0.013166721910238266, -0.02532937191426754, -0.007021805737167597, 0.018394585698843002, 0.016105303540825844, 0.004120039287954569, 0.014994132332503796, -0.02423158846795559, 0.020871296525001526, -0.0006279629305936396, -0.007784899789839983, -0.01801973208785057, 0.009813123382627964, 0.012597748078405857, 0.030148915946483612, 0.0088559091091156, 0.00596753042191267, 0.0033619655296206474, 0.02862272784113884, 0.011265680193901062, 0.011138497851788998, 0.02214311994612217, -0.010455729439854622, -0.01828748546540737, -0.03842246159911156, 0.009752878919243813, -0.031621553003787994, 0.0212996993213892, 0.0025720959529280663, -0.005709819030016661, -0.027350902557373047, -0.02632005698978901, -0.03234448283910751, 0.009712716564536095, 0.018394585698843002, -0.009732797741889954, 0.030363118276000023, -0.010054100304841995, -0.016828235238790512, 0.011613758280873299, 0.016855010762810707, 0.017216475680470467, -0.008273547515273094, 0.004893174394965172, 0.0032967007718980312, -0.0019311638316139579, 0.011680696159601212, 0.010857357643544674, -0.0015220048371702433, 8.377720223506913e-05, 0.01875605247914791, 0.015368985012173653, 0.031353797763586044, -0.01013442687690258, -0.02167455293238163, 0.0024649950210005045, -0.0015939632430672646, 0.04184969142079353, 0.004638809245079756, 0.02615940570831299, 0.020228689536452293, 0.016373055055737495, -0.001106151845306158, 0.02574438974261284, -0.031675104051828384, 0.0442059151828289, 0.00973949208855629, 0.030416667461395264, 0.013695533387362957, 0.00031586410477757454, 0.002749481936916709, -0.0013362516183406115, 0.008153058588504791, 0.01760471612215042, -0.03510233387351036, -0.0022072833962738514, 0.02083113230764866, 0.014659442007541656, 0.02575777657330036, 0.033549368381500244, 0.03060409426689148, 0.01654709503054619, -0.017511002719402313, -0.007543922867625952, 0.0015379026299342513, -0.010462422855198383, 0.007677799090743065, -0.0044681173749268055, -0.01812683418393135, 0.0018374505452811718, -0.017926020547747612, 0.009993856772780418, 0.00771796191111207, 0.031675104051828384, 0.022892825305461884, -0.004879786632955074, 0.015181559138000011, 0.0022223445121198893, 0.003467393107712269, -0.00917051825672388, -0.03413842245936394, 0.02721702679991722, 0.0240307729691267, -0.014900418929755688, -0.003497515106573701, -0.010462422855198383, -0.021594226360321045, -0.021085496991872787, 0.019452208653092384, -0.01739051565527916, -0.007624248508363962, -0.008688563480973244, 0.029800837859511375, -0.004983540624380112, -0.016051752492785454, 0.030684420838952065, -0.01376247126609087, 0.017899245023727417, -0.0014584135496988893, 0.005458801053464413, -0.001113682403229177, -0.022999927401542664, -0.0038388995453715324, 0.008782276883721352, -0.0030590705573558807, 0.012624523602426052, -0.011807878501713276, 0.023200741037726402, -0.017939407378435135, 0.01827409863471985, -0.009839898906648159, -0.013461249880492687, 0.010382097214460373, 0.002767889993265271, -0.003795389784500003, -0.02741784043610096, -0.014378301799297333, 0.004387791734188795, -0.012082325294613838, -0.002431526081636548, -0.024419015273451805, -0.04466109350323677, -0.016573870554566383, 0.13719630241394043, 0.02590504102408886, -0.00403301976621151, 0.007021805737167597, -0.006486300844699144, 0.0037083702627569437, -0.003395434468984604, -0.004461423493921757, 0.011332618072628975, -0.018461523577570915, 0.002367934910580516, 0.009324475191533566, -0.0032833132427185774, -0.003731798380613327, 0.012517422437667847, 0.003226415952667594, 0.018822990357875824, -0.025677450001239777, -0.010060794651508331, -0.013990060426294804, -0.01472637988626957, -0.005027050152420998, 0.021821817383170128, 0.032826438546180725, -0.02428513765335083, -0.01634628139436245, 0.031246699392795563, 0.026306668296456337, 0.012691461481153965, 0.003889102954417467, -0.002913480391725898, 0.014980744570493698, 0.02241087146103382, -0.0004777706053573638, -0.02302670292556286, -0.002781277522444725, 0.017162924632430077, -0.033817119896411896, 0.023227516561746597, 0.016413219273090363, 0.013153334148228168, 9.360873082187027e-05, 0.004320853389799595, -0.01154012605547905, 0.029907938092947006, -0.01634628139436245, 0.009157130494713783, 0.0020901416428387165, 0.01021475251764059, 0.0007053600857034326, 0.016948724165558815, -0.006663686595857143, -0.0106498496606946, -0.012939132750034332, 0.0024951172526925802, 0.012544197961688042, -0.0002017555816564709, -0.005006968975067139, -0.019238006323575974, 0.02329445444047451, -0.026909111067652702, -0.03411164879798889, 0.002063366584479809, -0.01650693267583847, 0.005686390679329634, -0.019666410982608795, -0.0056930845603346825, 0.003350251354277134, -0.0167612973600626, -0.013220272958278656, -0.006221895571798086, -0.008420811034739017, -0.03834213688969612, 0.02459305338561535, 0.009444964118301868, 0.011004622094333172, 0.03293353691697121, 0.0016491871792823076, 0.005070560146123171, -0.0012902315938845277, 0.006767440587282181, -0.042278096079826355, -0.0022859356831759214, 0.004946724511682987, -0.0013019457692280412, 0.00628213956952095, -0.01822054758667946, -0.00854129996150732, -0.02433868870139122, 0.037726305425167084, -0.00562949338927865, 0.0016885133227333426, 0.014619278721511364, -0.02183520421385765, -0.002321078209206462, 0.01796618290245533, 0.024218199774622917, 0.018033120781183243, -0.002704298822209239, -0.006185079459100962, 0.015904489904642105, -0.030041813850402832, -0.016908559948205948, -0.0203224029392004, -0.005957489833235741, -0.016373055055737495, 0.0074769845232367516, 0.02590504102408886, -0.01289896946400404, -0.011098334565758705, -0.005438719876110554, -0.011607064865529537, 0.0039058374240994453, 0.017484229058027267, -0.004863052163273096, 0.0024716889020055532, 0.01947898417711258, 0.007222619839012623, 0.001441679080016911, -0.02365592122077942, 0.0056897373870015144, -0.018367810174822807, 0.035798490047454834, 0.02194230444729328, -0.0063256495632231236, -0.008661787956953049, 0.006837725639343262, -0.021487126126885414, 0.018207158893346786, 0.0043978323228657246, 0.002235732041299343, 0.020603543147444725, -0.012269752100110054, -0.022009244188666344, -0.011238904669880867, -0.01645338162779808, -0.014445239678025246, 0.021540677174925804, 0.009913531132042408, 0.008159752935171127, -0.014485402964055538, -0.011707471683621407, -0.00022989050194155425, -0.04701731353998184, 0.014405076391994953, -0.014699604362249374, 0.006265405099838972, 0.000786940916441381, -0.01755116693675518, 0.0030791519675403833, -0.030577318742871284, -0.007256088778376579, -0.024834031239151955, -0.0010777032002806664, -0.0423048697412014, -0.021179210394620895, -0.0007501249783672392, -0.026547646149992943, 0.03692304715514183, 0.02684217318892479, 0.019345106557011604, 0.0041702426970005035, -0.012055549770593643, 0.0120890187099576, 0.01522172149270773, 0.01645338162779808, -0.007008417975157499, 0.023588981479406357, -0.009953693486750126, 0.04289392754435539, 0.031996406614780426, 0.018247323110699654, -0.028488850221037865, 0.008869296871125698, 0.008581462316215038, 0.02084452100098133, -0.028194323182106018, -0.004401179030537605, -0.011198742315173149, -0.022076182067394257, -0.023856734856963158, -0.008835827000439167, -0.002734420821070671, -0.0035811876878142357, -0.014284588396549225, 7.746252776996698e-06, 0.04931998252868652, -0.012450484558939934, 0.029185006394982338, -0.011894898489117622, 0.02167455293238163, -0.015047682449221611, -0.004223793279379606, -0.008849214762449265, -0.014927193522453308, -0.02057676762342453, -0.04626760631799698, 0.0051709674298763275, 0.03373679518699646, -0.013320679776370525, 0.009023253805935383, -0.0013772511156275868, -0.010382097214460373, -0.015168171375989914, 0.013521494343876839, 0.010669930838048458, -0.018608788028359413, -0.018501687794923782, 0.016828235238790512, -0.019974324852228165, -0.00033385370625182986, -0.00965916644781828, -0.027190251275897026, -0.029907938092947006, 0.0012400280684232712, 0.0006639421335421503, 0.01015450805425644, 0.010837276466190815, -0.007597472984343767, -0.015128008089959621, -0.027297353371977806, -0.014364914037287235, 0.008782276883721352, -0.005820266902446747, 0.011272373609244823, 0.007543922867625952, 0.00016619471716694534, -0.013789246790111065, 0.02172810398042202, 0.033549368381500244, 0.004357669502496719, 0.005398556590080261, 0.02700282447040081, -0.013775859028100967, -0.0007513800519518554, 0.00041815388249233365, 0.006379199679940939, -0.016774684190750122, -0.03071119636297226, 0.024271750822663307, 0.018836377188563347, -0.012992682866752148, -0.017002273350954056, -0.0008354710298590362, -0.018140221014618874, -0.010254914872348309, -0.01480670552700758, 0.02518210932612419, -0.001659227884374559, -0.010984539985656738, -0.020282240584492683, -0.004571871366351843, -0.006262058392167091, 0.005890551954507828, 0.02255813591182232, -0.01587771438062191, 0.011098334565758705, -0.0019261435372754931, 0.00572990020737052, 0.00644948473200202, -0.01433813851326704, 0.03164832666516304, -0.01827409863471985, 0.0040397136472165585, 0.0010484177619218826, 0.020697256550192833, -0.031086048111319542, 0.0005011989269405603, 0.024820642545819283, 0.024298526346683502, 0.0009639085037633777, 0.004568524658679962, -0.012343383394181728, -0.0011270700488239527, -0.01728341355919838, -0.007938857190310955, -0.026239730417728424, -0.020483054220676422, 0.00014914642088115215, 0.0016567177372053266, 0.007851837202906609, -0.0022240178659558296, -0.034754253923892975, -0.0017253292025998235, -0.003218048717826605, -0.019438819959759712, -0.016279341652989388, -0.018582012504339218, 0.025396311655640602, -0.0009371332707814872, -0.017484229058027267, -0.02178165316581726, -0.0014542299322783947, 0.027444615960121155, -0.004106651525944471, 0.009578839875757694, 0.021072110161185265, 0.003062417497858405, -0.027042988687753677, 0.01522172149270773, -0.038877639919519424, 0.007851837202906609, -0.03547718748450279, -0.005974224302917719, -0.03279966115951538, -0.013909734785556793, 0.00917051825672388, -0.002953643212094903, -0.025918427854776382, -0.020857907831668854, -0.007577391806989908, 0.0018910010112449527, 0.0018290833104401827, -0.017403902485966682, -0.006459525786340237, -0.003008867148309946, -0.00241646496579051, -0.013963285833597183, -0.01980028674006462, 0.05140845105051994, -0.016640808433294296, -0.005783450789749622, 0.0005053825443610549, -0.02532937191426754, -0.009799735620617867, 0.00089613365707919, 0.010763644240796566, 0.012537503615021706, -0.01013442687690258, -0.02266523614525795, -0.010623074136674404, 0.022705400362610817, -0.036949824541807175, -0.03055054321885109, -0.0149673568084836, 0.004394485615193844, -0.02037595398724079, 0.004702400416135788, 0.008547993376851082, -0.012932438403367996, 0.020014489069581032, 0.01303284615278244, 0.01488703116774559, -0.012517422437667847, -0.010040713474154472, -0.01602497696876526, 0.004357669502496719, -0.015342210419476032, -0.013073008507490158, -0.03306741639971733, -0.017939407378435135, 0.027096537873148918, -8.273129060398787e-05, -0.014458627440035343, -0.009726104326546192, -0.020242078229784966, -0.023776408284902573, -0.00950520858168602, -0.03175542876124382, 0.002734420821070671, 0.031166374683380127, 0.02356220781803131, 0.004628768656402826, 0.024164650589227676, -0.011714165098965168, 0.023120416328310966, -0.00443799514323473, -0.0036749010905623436, 0.01927816867828369, -0.037056926637887955, 0.036066241562366486, 0.0077514308504760265, -0.0211524348706007, -0.0005325761740095913, 0.009304394014179707, -0.0036347382701933384, 0.029238557443022728, 0.01613207906484604, -0.0362536683678627, 0.0003723431145772338, 0.0048965211026370525, 0.0051709674298763275, 0.011680696159601212, 0.006784175522625446, 0.0164935439825058, -0.0384492389857769, -0.023388167843222618, -0.0013287210604175925, -0.0023545471485704184, -0.008574768900871277, -0.01755116693675518, 0.01281864382326603, 0.0014215976698324084, 5.653130938299e-05, -0.015757225453853607, -0.001877613365650177, 0.03665529564023018, -0.01921123079955578, 0.028087222948670387, 0.015636736527085304, -0.009257537312805653, 0.018582012504339218, 0.02725718915462494, -0.016640808433294296, -0.005117416847497225, -0.005201089195907116, 0.015061070211231709, 0.012537503615021706, -0.0033569452352821827, 0.00042484767618589103, 0.036173343658447266, -0.02093823440372944, -0.005298149771988392, -0.012477260082960129, 0.02277233824133873, -0.01008087582886219, -0.005455454345792532, -0.002896745689213276, 0.00771796191111207, 0.0073230271227657795, -0.016587257385253906, -0.008688563480973244, 0.013467943295836449, -0.02575777657330036, 0.0033318432979285717, -0.019653022289276123, -0.014953969046473503, -0.010261609219014645, -0.010870745405554771, -0.0026055651251226664, -0.006968255154788494, -0.02282588742673397, -0.0021236108150333166, -0.012631217017769814, -0.007637635804712772, 0.021955693140625954, 0.23198063671588898, 0.003340210532769561, 0.005271374247968197, 0.016252567991614342, -0.013260435312986374, 0.030577318742871284, 0.010141120292246342, 0.011801185086369514, -0.003544371807947755, 0.021018559113144875, -0.01392312254756689, -0.010917602106928825, -0.021594226360321045, 0.004434648435562849, 0.0007823389023542404, -0.008869296871125698, -0.035798490047454834, -0.02345510572195053, -0.007938857190310955, 0.002749481936916709, -0.01917106844484806, 0.00942488294094801, -0.0058938986621797085, -0.014538953080773354, 0.015810776501893997, 0.016051752492785454, 0.0073698838241398335, 0.014980744570493698, 0.00692139845341444, -0.002874990925192833, -0.022892825305461884, -0.006335690151900053, 0.012390240095555782, -0.000747614772990346, -0.0023311187978833914, -0.011787797324359417, -0.024941131472587585, -0.012336689978837967, -0.0055993711575865746, 0.015556411817669868, -0.020616931840777397, 0.03245158493518829, 0.0018876540707424283, 0.007242701482027769, -0.004287384450435638, 0.041448064148426056, -0.00667372765019536, -0.013039539568126202, 0.0083806486800313, 0.006014387123286724, -0.03175542876124382, 0.011707471683621407, 0.01791263185441494, 0.02565067633986473, 0.0006677074125036597, -0.015569799579679966, 0.0005300659686326981, 0.003358618589118123, -0.018394585698843002, -0.013675451278686523, -0.015757225453853607, 0.00861493218690157, -0.013635288923978806, 0.039921876043081284, -0.013882959261536598, 0.04053770750761032, 0.020871296525001526, 0.009250843897461891, 0.007952244952321053, -0.013019458390772343, -0.030068589374423027, 0.011841347441077232, -0.01151335146278143, -0.004846317693591118, -0.017564553767442703, -0.01733696460723877, 0.012537503615021706, 0.01135939359664917, 0.014016835950314999, -0.0024348730221390724, 0.003607962979003787, -0.01692194864153862, 0.010562830604612827, 0.004247221630066633, -0.00266246241517365, -0.035075556486845016, 0.022384095937013626, -0.0034874745178967714, -0.007490372285246849, 0.004682319238781929, 0.0035477187484502792, -0.015810776501893997, -0.014873643405735493, -0.00848774891346693, -0.0013136599445715547, -0.00976626668125391, 0.010362016037106514, 0.035022005438804626, -0.020094813778996468, 0.01859540119767189, -0.031407348811626434, 0.02172810398042202, 0.033442266285419464, -0.011064865626394749, -0.004893174394965172, -0.0010484177619218826, -0.001434985315427184, 0.039975427091121674, 0.020710645243525505, -0.026360219344496727, -0.0004292404919397086, -0.021607615053653717, -0.004451382905244827, -0.006914704572409391, -0.0019964284729212523, 0.018193772062659264, 0.02282588742673397, -0.021433575078845024, 0.02569083869457245, 0.0027327474672347307, -0.004769338760524988, -0.035691387951374054, -0.031166374683380127, -0.002039938233792782, 0.0015805755974724889, -0.020175140351057053, -0.0075706979259848595, -0.005197742488235235, -0.004056448116898537, -0.024927744641900063, 0.0060445093549788, -0.011018008925020695, 0.03357614576816559, -0.003554412629455328, -0.001986387651413679, -0.0008844194817356765, 0.02188875526189804, 9.198757470585406e-05, -0.01157359592616558, 0.0019211231265217066, -0.00507725402712822, 0.0004426281084306538, 0.0055960239842534065, -0.013481331057846546, 0.00846097432076931, -0.014980744570493698, 0.02507500723004341, -0.025516798719763756, -0.0013119864743202925, -0.0033251496497541666, -0.03858311474323273, 0.02627989463508129, 0.008608237840235233, -0.018983641639351845, 0.016841622069478035, -0.029265332967042923, -0.02381657250225544, -0.03545041009783745, -0.01681484654545784, 0.015529637224972248, -0.03852956369519234, 0.024686766788363457, 0.023281067609786987, 0.004605340305715799, -0.019023803994059563, -0.009150436148047447, -0.17104020714759827, 0.03346904367208481, 0.004354322329163551, -0.006837725639343262, 9.397479880135506e-05, -0.007309639360755682, 0.00911027379333973, -0.014712992124259472, -0.0008484402787871659, -0.00233781267888844, 0.01791263185441494, 0.005883858073502779, -0.017216475680470467, -0.011225517839193344, 0.0003819654812105, -0.018863152712583542, -0.022692011669278145, 0.010522667318582535, 0.022437646985054016, 0.010221445932984352, 0.047392167150974274, -0.027083151042461395, 0.011319230310618877, -0.04361685737967491, -0.001145477988757193, -0.0149673568084836, -0.009277618490159512, 0.02005465142428875, -0.012376852333545685, -0.019934162497520447, -0.02036256715655327, -0.009853286668658257, 0.006974949035793543, 0.023334616795182228, 0.005950795952230692, 0.00274278805591166, 0.0021102232858538628, -0.0019964284729212523, -0.0013805980561301112, 0.015623349696397781, 0.0439649373292923, 0.020764194428920746, -0.012517422437667847, -0.006496341433376074, -0.015449310652911663, 0.01279856264591217, 0.005766716320067644, -0.004755950998514891, -0.006814297288656235, -0.003343557473272085, -0.01598481461405754, -0.043429430574178696, -0.011145191267132759, 0.01953253336250782, 0.0174440648406744, -0.004819542169570923, -0.03657497093081474, -0.006228588987141848, -0.014231037348508835, -0.009719409979879856, -0.0068477666936814785, 0.013695533387362957, 0.00506721343845129, 0.002038264647126198, -0.015837552025914192, -0.007905388250946999, -0.023669308051466942, -0.007356496062129736, -0.03368324413895607, 0.010274996049702168, -0.03279966115951538, 0.006007693242281675, -0.007450209464877844, -0.02950630895793438, 0.005003622267395258, 0.01884976588189602, -0.0044413418509066105, 0.002751155523583293, 0.008025876246392727, 0.006315608508884907, -0.0177118182182312, 0.023200741037726402, -0.01733696460723877, 0.007584085687994957, 0.005355047062039375, 0.011038091033697128, 0.010589605197310448, 0.0029569901525974274, -0.008440893143415451, -0.029104681685566902, 0.008829133585095406, -0.03676239773631096, 0.018247323110699654, -0.012102406471967697, -0.008447586558759212, 0.013481331057846546, 0.023588981479406357, -0.014445239678025246, 0.0023562207352370024, -0.019519146531820297, 0.0013780879089608788, -0.02204940654337406, 0.0029168270993977785, 0.017899245023727417, 0.0054654949344694614, 0.01660064607858658, 0.027350902557373047, 0.04324200376868248, 0.013856184668838978, -0.0054420665837824345, -0.015114620327949524, 0.01102470327168703, 0.009257537312805653, 0.003929265774786472, 0.009244149550795555, -0.007356496062129736, -0.010348628275096416, -0.0007384108030237257, 0.021487126126885414, -0.028381749987602234, 0.06345730274915695, 0.005137498024851084, -0.023629145696759224, 0.005478882696479559, 0.004732522647827864, -0.012296526692807674, -0.1011032909154892, 0.004304118920117617, 0.006305567920207977, 0.01467282883822918, -0.009880061261355877, 0.03143412619829178, 0.0030657644383609295, 0.04152838885784149, -0.013099784031510353, 0.03290676325559616, -0.01480670552700758, -0.030282791703939438, -0.007617554627358913, 0.013595125637948513, 0.018421361222863197, 0.00241479161195457, 0.0012592728016898036, -0.004458076786249876, -0.005428678821772337, 0.026146017014980316, -0.0044212606735527515, 0.002905112924054265, 0.009157130494713783, -0.013963285833597183, -0.012999377213418484, -0.014846867881715298, -0.0211524348706007, 0.016252567991614342, -0.009083498269319534, 0.013816021382808685, -0.005308190360665321, 0.014953969046473503, 0.01706921122968197, 0.00627879286184907, -0.020871296525001526, 0.003490821458399296, -0.0332280658185482, -0.02203601785004139, 0.027029599994421005, -0.015328822657465935, 0.004776032641530037, -0.020496442914009094, -0.0027160129975527525, -0.028381749987602234, -0.007363189943134785, -0.0024599747266620398, -0.006031121592968702, 0.005281415302306414, 0.022009244188666344, -0.01656048186123371, -0.02428513765335083, 0.010020631365478039, -0.0014249446103349328, -0.030898621305823326, 0.00443799514323473, 0.005187701899558306, -0.001059295260347426, -0.014699604362249374, -0.005227864719927311, 0.002454954432323575, 0.00030477746622636914, 0.01071009412407875, -0.010442341677844524, 0.015944652259349823, -0.0012893949169665575, -0.024767093360424042, -0.047606367617845535, 0.0022775684483349323, 0.007778205908834934, -0.012825338169932365, -0.0022240178659558296, 0.013554963283240795, -0.022892825305461884, 0.008869296871125698, -0.0288369283080101, 0.007918776012957096, -0.037940509617328644, -0.0014174140524119139, 0.020536605268716812, -0.02768559381365776, -0.00047484206152148545, -0.0174440648406744, 0.016828235238790512, -0.007597472984343767, 0.0252758227288723, 0.009826511144638062, -0.0054420665837824345, 0.01185473520308733, 0.0018960214219987392, -0.012524116784334183, 0.00861493218690157, 0.0318625271320343, -0.002891725394874811, -0.009177211672067642, 0.004334241151809692, 0.032505135983228683, 0.008400729857385159, 0.0021369983442127705, 0.008547993376851082, 0.007885306142270565, -0.0063256495632231236, 0.0018910010112449527, -0.06361795961856842, 0.022183282300829887, 0.0005267190863378346, 0.0012040488654747605, -0.005783450789749622, 0.014833481051027775, -0.0060445093549788, 0.0002813491446431726, 0.02037595398724079, 0.013789246790111065, -0.006914704572409391, 0.02042950503528118, 0.02219666913151741, -0.012316607870161533, -0.03703014925122261, 0.021554064005613327, 0.014405076391994953, 0.005408597644418478, 0.03743177652359009, 0.0060445093549788, 0.005361740943044424, 0.029238557443022728, 0.014940581284463406, 0.009471739642322063, -0.0006367485621012747, -0.004354322329163551, -0.01724325120449066, 0.006051203235983849, 0.011158579029142857, -0.008039264008402824, -0.0016140446532517672, -0.013635288923978806, -0.01143971923738718, 0.01823393441736698, -0.007135600317269564, -0.027444615960121155, 0.009793042205274105, -0.003842246253043413, 0.005010315682739019, 0.002568749012425542, -0.031407348811626434, -0.024298526346683502, -0.01681484654545784, -0.017457453534007072, -0.004156854934990406, -0.0058738174848258495, -0.005709819030016661, -0.013749083504080772, 0.0015412494540214539, -0.0039694285951554775, -0.011379474774003029, 0.0008229201193898916, -0.03154122456908226, 0.003915878012776375, -0.01062976848334074, -0.01447201520204544, 0.003929265774786472, 0.014083773829042912, 0.0031527839601039886, -0.027605267241597176, 0.034031324088573456, 0.010335240513086319, 0.0022574870381504297, -0.010034019127488136, 0.02862272784113884, -0.015489473938941956, -0.027712369337677956, 0.007082049734890461, 0.026333443820476532, -0.02532937191426754, -0.035022005438804626, -0.011894898489117622, -0.0019261435372754931, 0.02105872333049774, -0.008581462316215038, -0.007644329685717821, 0.012671380303800106, 0.0033100885339081287, 0.011346005834639072, 0.02162100188434124, 0.022062793374061584, -0.004136773757636547, -0.012035468593239784, 0.03622689098119736, -0.006215201690793037, 0.015114620327949524, -0.004889827221632004, 0.020081426948308945, 0.011131804436445236, 0.0020683868788182735, -0.02579793892800808, -0.0028498892206698656, -0.007008417975157499, 0.0009229088900610805, -0.010930989868938923, 0.005920673720538616, -0.004856358282268047, 0.00017759510956238955, 0.026467319577932358, -0.00037213394534774125, -0.005351700354367495, -0.018059896305203438, -0.0018742665415629745, 0.009752878919243813, -0.0029636838007718325, 0.025838103145360947, -0.028167547658085823, 0.0019378577126190066, -0.02486080676317215, 0.023696083575487137, 0.02136663720011711, 0.023374781012535095, 0.00905672274529934, 0.028033671900629997, -0.00395604083314538, 0.02203601785004139, 0.005388516001403332, -0.02095162123441696, -0.006375852972269058, 0.04559822753071785, 0.026708297431468964, -0.011325924657285213, 0.0066201770678162575, -0.010676625184714794, 0.02611924149096012, 0.008481055498123169, -0.001496066222898662, -0.0014458626974374056, 0.006208507809787989, 0.004314159508794546, 0.04075190797448158, -0.019452208653092384, -0.04393815994262695, 0.011807878501713276, -0.010690012946724892, 0.008467667736113071, 0.011158579029142857, 0.02516872063279152, 0.0006961561157368124, 0.04795444756746292, 0.01780553162097931, 0.0019361842423677444, -0.0063959346152842045, -0.010683318600058556, 0.01942543312907219, -0.008969703689217567, 0.005017009563744068, 0.00013032008428126574, -0.013160028494894505, 0.03419197350740433, -0.020027875900268555, 0.0036983294412493706, -0.0006095549324527383, -0.027377678081393242, 0.01303284615278244, -0.004163548815995455, 0.016721133142709732, -0.002142018871381879, 0.01175432838499546, 0.0027545022312551737, 0.0029971529729664326, 0.020349178463220596, 0.018394585698843002, -0.007664411328732967, -0.004089917056262493, 0.01287888828665018, -0.020871296525001526, 0.0028080528136342764, -0.015087845735251904, 0.01289896946400404, 0.008494443260133266, -0.02266523614525795, -0.024740317836403847, 0.030148915946483612, -0.01875605247914791, 0.02255813591182232, 0.01729680225253105, 0.018314260989427567, 0.00771796191111207, 0.0032297628931701183, -0.004853011108934879, -0.020228689536452293, -0.03713725134730339, 0.026507483795285225, 0.013816021382808685, -0.008755501359701157, -0.021754879504442215, 0.004391138441860676], 'id': '0c906ab3-3786-477f-b13a-5a98367ceee6', '_distance': 0.4137815535068512}\n" + ] + } + ], + "source": [ + "print(docs[0].metadata)" + ] } ], "metadata": { @@ -215,7 +313,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/docs/docs/modules/data_connection/vectorstores/index.mdx b/docs/docs/modules/data_connection/vectorstores/index.mdx index b1242021a27ca..3b6d12699b8c5 100644 --- a/docs/docs/modules/data_connection/vectorstores/index.mdx +++ b/docs/docs/modules/data_connection/vectorstores/index.mdx @@ -131,7 +131,7 @@ table = db.create_table( raw_documents = TextLoader('../../../state_of_the_union.txt').load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) documents = text_splitter.split_documents(raw_documents) -db = LanceDB.from_documents(documents, OpenAIEmbeddings(), connection=table) +db = LanceDB.from_documents(documents, OpenAIEmbeddings()) ``` diff --git a/libs/community/langchain_community/vectorstores/lancedb.py b/libs/community/langchain_community/vectorstores/lancedb.py index 4ca68c92ca66b..414517793ee44 100644 --- a/libs/community/langchain_community/vectorstores/lancedb.py +++ b/libs/community/langchain_community/vectorstores/lancedb.py @@ -12,6 +12,18 @@ class LanceDB(VectorStore): """`LanceDB` vector store. To use, you should have ``lancedb`` python package installed. + You can install it with ``pip install lancedb``. + + Args: + connection: LanceDB connection to use. If not provided, a new connection + will be created. + embedding: Embedding to use for the vectorstore. + vector_key: Key to use for the vector in the database. Defaults to ``vector``. + id_key: Key to use for the id in the database. Defaults to ``id``. + text_key: Key to use for the text in the database. Defaults to ``text``. + table_name: Name of the table to use. Defaults to ``vectorstore``. + + Example: .. code-block:: python @@ -25,13 +37,14 @@ class LanceDB(VectorStore): def __init__( self, - connection: Any, - embedding: Embeddings, + connection: Optional[Any] = None, + embedding: Optional[Embeddings] = None, vector_key: Optional[str] = "vector", id_key: Optional[str] = "id", text_key: Optional[str] = "text", + table_name: Optional[str] = "vectorstore", ): - """Initialize with Lance DB connection""" + """Initialize with Lance DB vectorstore""" try: import lancedb except ImportError: @@ -39,19 +52,28 @@ def __init__( "Could not import lancedb python package. " "Please install it with `pip install lancedb`." ) - if not isinstance(connection, lancedb.db.LanceTable): - raise ValueError( - "connection should be an instance of lancedb.db.LanceTable, ", - f"got {type(connection)}", - ) - self._connection = connection + self.lancedb = lancedb self._embedding = embedding self._vector_key = vector_key self._id_key = id_key self._text_key = text_key + self._table_name = table_name + + if self._embedding is None: + raise ValueError("embedding should be provided") + + if connection is not None: + if not isinstance(connection, lancedb.db.LanceTable): + raise ValueError( + "connection should be an instance of lancedb.db.LanceTable, ", + f"got {type(connection)}", + ) + self._connection = connection + else: + self._connection = self._init_table() @property - def embeddings(self) -> Embeddings: + def embeddings(self) -> Optional[Embeddings]: return self._embedding def add_texts( @@ -74,7 +96,7 @@ def add_texts( # Embed texts and create documents docs = [] ids = ids or [str(uuid.uuid4()) for _ in texts] - embeddings = self._embedding.embed_documents(list(texts)) + embeddings = self._embedding.embed_documents(list(texts)) # type: ignore for idx, text in enumerate(texts): embedding = embeddings[idx] metadata = metadatas[idx] if metadatas else {} @@ -86,7 +108,6 @@ def add_texts( **metadata, } ) - self._connection.add(docs) return ids @@ -102,14 +123,23 @@ def similarity_search( Returns: List of documents most similar to the query. """ - embedding = self._embedding.embed_query(query) - docs = self._connection.search(embedding).limit(k).to_df() + embedding = self._embedding.embed_query(query) # type: ignore + docs = ( + self._connection.search(embedding, vector_column_name=self._vector_key) + .limit(k) + .to_arrow() + ) + columns = docs.schema.names return [ Document( - page_content=row[self._text_key], - metadata=row[docs.columns != self._text_key], + page_content=docs[self._text_key][idx].as_py(), + metadata={ + col: docs[col][idx].as_py() + for col in columns + if col != self._text_key + }, ) - for _, row in docs.iterrows() + for idx in range(len(docs)) ] @classmethod @@ -134,3 +164,23 @@ def from_texts( instance.add_texts(texts, metadatas=metadatas, **kwargs) return instance + + def _init_table(self) -> Any: + import pyarrow as pa + + schema = pa.schema( + [ + pa.field( + self._vector_key, + pa.list_( + pa.float32(), + len(self.embeddings.embed_query("test")), # type: ignore + ), + ), + pa.field(self._id_key, pa.string()), + pa.field(self._text_key, pa.string()), + ] + ) + db = self.lancedb.connect("/tmp/lancedb") + tbl = db.create_table(self._table_name, schema=schema, mode="overwrite") + return tbl diff --git a/libs/community/tests/integration_tests/vectorstores/test_lancedb.py b/libs/community/tests/integration_tests/vectorstores/test_lancedb.py index 37098e221141d..bde46e800e116 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_lancedb.py +++ b/libs/community/tests/integration_tests/vectorstores/test_lancedb.py @@ -1,8 +1,11 @@ +import pytest + from langchain_community.vectorstores import LanceDB from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings -def test_lancedb() -> None: +@pytest.mark.requires("lancedb") +def test_lancedb_with_connection() -> None: import lancedb embeddings = FakeEmbeddings() @@ -23,22 +26,23 @@ def test_lancedb() -> None: assert "text 1" in result_texts -def test_lancedb_add_texts() -> None: - import lancedb +@pytest.mark.requires("lancedb") +def test_lancedb_without_connection() -> None: + embeddings = FakeEmbeddings() + texts = ["text 1", "text 2", "item 3"] + + store = LanceDB(embedding=embeddings) + store.add_texts(texts) + result = store.similarity_search("text 1") + result_texts = [doc.page_content for doc in result] + assert "text 1" in result_texts + +@pytest.mark.requires("lancedb") +def test_lancedb_add_texts() -> None: embeddings = FakeEmbeddings() - db = lancedb.connect("/tmp/lancedb") - texts = ["text 1"] - vectors = embeddings.embed_documents(texts) - table = db.create_table( - "my_table", - data=[ - {"vector": vectors[idx], "id": text, "text": text} - for idx, text in enumerate(texts) - ], - mode="overwrite", - ) - store = LanceDB(table, embeddings) + + store = LanceDB(embedding=embeddings) store.add_texts(["text 2"]) result = store.similarity_search("text 2") result_texts = [doc.page_content for doc in result] From 8009be862ee2f3bd6728ce359ae78f9e58709d09 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:27:26 -0700 Subject: [PATCH 04/12] core[patch]: Release 0.1.24 (#17744) --- libs/core/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index 7aeb592eca11a..b87a95dd7dd64 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-core" -version = "0.1.23" +version = "0.1.24" description = "Building applications with LLMs through composability" authors = [] license = "MIT" From 0d294760e742e0707a71afc7aad22e4d00b54ae5 Mon Sep 17 00:00:00 2001 From: Aymeric Roucher <69208727+aymeric-roucher@users.noreply.github.com> Date: Mon, 19 Feb 2024 19:33:15 +0100 Subject: [PATCH 05/12] Community: Fuse HuggingFace Endpoint-related classes into one (#17254) ## Description Fuse HuggingFace Endpoint-related classes into one: - [HuggingFaceHub](https://github.com/langchain-ai/langchain/blob/5ceaf784f324064b868a3cfed3fab7554173e7b3/libs/community/langchain_community/llms/huggingface_hub.py) - [HuggingFaceTextGenInference](https://github.com/langchain-ai/langchain/blob/5ceaf784f324064b868a3cfed3fab7554173e7b3/libs/community/langchain_community/llms/huggingface_text_gen_inference.py) - and [HuggingFaceEndpoint](https://github.com/langchain-ai/langchain/blob/5ceaf784f324064b868a3cfed3fab7554173e7b3/libs/community/langchain_community/llms/huggingface_endpoint.py) Are fused into - HuggingFaceEndpoint ## Issue The deduplication of classes was creating a lack of clarity, and additional effort to develop classes leads to issues like [this hack](https://github.com/langchain-ai/langchain/blob/5ceaf784f324064b868a3cfed3fab7554173e7b3/libs/community/langchain_community/llms/huggingface_endpoint.py#L159). ## Dependancies None, this removes dependancies. ## Twitter handle If you want to post about this: @AymericRoucher --------- Co-authored-by: Bagatur --- .../llms/huggingface_endpoint.ipynb | 238 +++++++++ .../integrations/llms/huggingface_hub.ipynb | 466 ------------------ .../llms/huggingface_textgen_inference.ipynb | 108 ---- docs/vercel.json | 8 + .../chat_models/huggingface.py | 27 +- .../llms/huggingface_endpoint.py | 389 +++++++++++---- .../llms/huggingface_hub.py | 3 + .../llms/huggingface_pipeline.py | 30 +- .../llms/huggingface_text_gen_inference.py | 3 + .../llms/test_huggingface_endpoint.py | 69 ++- 10 files changed, 614 insertions(+), 727 deletions(-) create mode 100644 docs/docs/integrations/llms/huggingface_endpoint.ipynb delete mode 100644 docs/docs/integrations/llms/huggingface_hub.ipynb delete mode 100644 docs/docs/integrations/llms/huggingface_textgen_inference.ipynb diff --git a/docs/docs/integrations/llms/huggingface_endpoint.ipynb b/docs/docs/integrations/llms/huggingface_endpoint.ipynb new file mode 100644 index 0000000000000..a71a987bac101 --- /dev/null +++ b/docs/docs/integrations/llms/huggingface_endpoint.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Huggingface Endpoints\n", + "\n", + ">The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is a platform with over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n", + "\n", + "The `Hugging Face Hub` also offers various endpoints to build ML applications.\n", + "This example showcases how to connect to the different Endpoints types.\n", + "\n", + "In particular, text generation inference is powered by [Text Generation Inference](https://github.com/huggingface/text-generation-inference): a custom-built Rust, Python and gRPC server for blazing-faset text generation inference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.llms import HuggingFaceEndpoint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation and Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To use, you should have the ``huggingface_hub`` python [package installed](https://huggingface.co/docs/huggingface_hub/installation)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet huggingface_hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token\n", + "\n", + "from getpass import getpass\n", + "\n", + "HUGGINGFACEHUB_API_TOKEN = getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = HUGGINGFACEHUB_API_TOKEN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare Examples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.llms import HuggingFaceEndpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import LLMChain\n", + "from langchain.prompts import PromptTemplate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"Who won the FIFA World Cup in the year 1994? \"\n", + "\n", + "template = \"\"\"Question: {question}\n", + "\n", + "Answer: Let's think step by step.\"\"\"\n", + "\n", + "prompt = PromptTemplate.from_template(template)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Examples\n", + "\n", + "Here is an example of how you can access `HuggingFaceEndpoint` integration of the free [Serverless Endpoints](https://huggingface.co/inference-endpoints/serverless) API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "repo_id = \"mistralai/Mistral-7B-Instruct-v0.2\"\n", + "\n", + "llm = HuggingFaceEndpoint(\n", + " repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN\n", + ")\n", + "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", + "print(llm_chain.run(question))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dedicated Endpoint\n", + "\n", + "\n", + "The free serverless API lets you implement solutions and iterate in no time, but it may be rate limited for heavy use cases, since the loads are shared with other requests.\n", + "\n", + "For enterprise workloads, the best is to use [Inference Endpoints - Dedicated](https://huggingface.co/inference-endpoints/dedicated).\n", + "This gives access to a fully managed infrastructure that offer more flexibility and speed. These resoucres come with continuous support and uptime guarantees, as well as options like AutoScaling\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set the url to your Inference Endpoint below\n", + "your_endpoint_url = \"https://fayjubiy2xqn36z0.us-east-1.aws.endpoints.huggingface.cloud\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm = HuggingFaceEndpoint(\n", + " endpoint_url=f\"{your_endpoint_url}\",\n", + " max_new_tokens=512,\n", + " top_k=10,\n", + " top_p=0.95,\n", + " typical_p=0.95,\n", + " temperature=0.01,\n", + " repetition_penalty=1.03,\n", + ")\n", + "llm(\"What did foo say about bar?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Streaming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "from langchain_community.llms import HuggingFaceEndpoint\n", + "\n", + "llm = HuggingFaceEndpoint(\n", + " endpoint_url=f\"{your_endpoint_url}\",\n", + " max_new_tokens=512,\n", + " top_k=10,\n", + " top_p=0.95,\n", + " typical_p=0.95,\n", + " temperature=0.01,\n", + " repetition_penalty=1.03,\n", + " streaming=True,\n", + ")\n", + "llm(\"What did foo say about bar?\", callbacks=[StreamingStdOutCallbackHandler()])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "agents", + "language": "python", + "name": "agents" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "vscode": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/integrations/llms/huggingface_hub.ipynb b/docs/docs/integrations/llms/huggingface_hub.ipynb deleted file mode 100644 index 67dbe3c41f385..0000000000000 --- a/docs/docs/integrations/llms/huggingface_hub.ipynb +++ /dev/null @@ -1,466 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "959300d4", - "metadata": {}, - "source": [ - "# Hugging Face Hub\n", - "\n", - ">The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is a platform with over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n", - "\n", - "This example showcases how to connect to the `Hugging Face Hub` and use different models." - ] - }, - { - "cell_type": "markdown", - "id": "1ddafc6d-7d7c-48fa-838f-0e7f50895ce3", - "metadata": {}, - "source": [ - "## Installation and Setup" - ] - }, - { - "cell_type": "markdown", - "id": "4c1b8450-5eaf-4d34-8341-2d785448a1ff", - "metadata": { - "tags": [] - }, - "source": [ - "To use, you should have the ``huggingface_hub`` python [package installed](https://huggingface.co/docs/huggingface_hub/installation)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d772b637-de00-4663-bd77-9bc96d798db2", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "%pip install --upgrade --quiet huggingface_hub" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "d597a792-354c-4ca5-b483-5965eec5d63d", - "metadata": {}, - "outputs": [ - { - "name": "stdin", - "output_type": "stream", - "text": [ - " ········\n" - ] - } - ], - "source": [ - "# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token\n", - "\n", - "from getpass import getpass\n", - "\n", - "HUGGINGFACEHUB_API_TOKEN = getpass()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b8c5b88c-e4b8-4d0d-9a35-6e8f106452c2", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = HUGGINGFACEHUB_API_TOKEN" - ] - }, - { - "cell_type": "markdown", - "id": "84dd44c1-c428-41f3-a911-520281386c94", - "metadata": {}, - "source": [ - "## Prepare Examples" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3fe7d1d1-241d-426a-acff-e208f1088871", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_community.llms import HuggingFaceHub" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "6620f39b-3d32-4840-8931-ff7d2c3e47e8", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.chains import LLMChain\n", - "from langchain.prompts import PromptTemplate" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "44adc1a0-9c0a-4f1e-af5a-fe04222e78d7", - "metadata": {}, - "outputs": [], - "source": [ - "question = \"Who won the FIFA World Cup in the year 1994? \"\n", - "\n", - "template = \"\"\"Question: {question}\n", - "\n", - "Answer: Let's think step by step.\"\"\"\n", - "\n", - "prompt = PromptTemplate.from_template(template)" - ] - }, - { - "cell_type": "markdown", - "id": "ddaa06cf-95ec-48ce-b0ab-d892a7909693", - "metadata": {}, - "source": [ - "## Examples\n", - "\n", - "Below are some examples of models you can access through the `Hugging Face Hub` integration." - ] - }, - { - "cell_type": "markdown", - "id": "4c16fded-70d1-42af-8bfa-6ddda9f0bc63", - "metadata": {}, - "source": [ - "### `Flan`, by `Google`" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "39c7eeac-01c4-486b-9480-e828a9e73e78", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "repo_id = \"google/flan-t5-xxl\" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "3acf0069", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The FIFA World Cup was held in the year 1994. West Germany won the FIFA World Cup in 1994\n" - ] - } - ], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "1a5c97af-89bc-4e59-95c1-223742a9160b", - "metadata": {}, - "source": [ - "### `Dolly`, by `Databricks`\n", - "\n", - "See [Databricks](https://huggingface.co/databricks) organization page for a list of available models." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "521fcd2b-8e38-4920-b407-5c7d330411c9", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"databricks/dolly-v2-3b\"" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "9907ec3a-fe0c-4543-81c4-d42f9453f16c", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " First of all, the world cup was won by the Germany. Then the Argentina won the world cup in 2022. So, the Argentina won the world cup in 1994.\n", - "\n", - "\n", - "Question: Who\n" - ] - } - ], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "03f6ae52-b5f9-4de6-832c-551cb3fa11ae", - "metadata": {}, - "source": [ - "### `Camel`, by `Writer`\n", - "\n", - "See [Writer's](https://huggingface.co/Writer) organization page for a list of available models." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "257a091d-750b-4910-ac08-fe1c7b3fd98b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "repo_id = \"Writer/camel-5b-hf\" # See https://huggingface.co/Writer for other options" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b06f6838-a11a-4d6a-88e3-91fa1747a2b3", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "2bf838eb-1083-402f-b099-b07c452418c8", - "metadata": {}, - "source": [ - "### `XGen`, by `Salesforce`\n", - "\n", - "See [more information](https://github.com/salesforce/xgen)." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "18c78880-65d7-41d0-9722-18090efb60e9", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"Salesforce/xgen-7b-8k-base\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b1150b4-ec30-4674-849e-6a41b085aa2b", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "0aca9f9e-f333-449c-97b2-10d1dbf17e75", - "metadata": {}, - "source": [ - "### `Falcon`, by `Technology Innovation Institute (TII)`\n", - "\n", - "See [more information](https://huggingface.co/tiiuae/falcon-40b)." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "496b35ac-5ee2-4b68-a6ce-232608f56c03", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"tiiuae/falcon-40b\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ff2541ad-e394-4179-93c2-7ae9c4ca2a25", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "7e15849b-5561-4bb9-86ec-6412ca10196a", - "metadata": {}, - "source": [ - "### `InternLM-Chat`, by `Shanghai AI Laboratory`\n", - "\n", - "See [more information](https://huggingface.co/internlm/internlm-7b)." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "3b533461-59f8-406e-907b-000841fa60a7", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"internlm/internlm-chat-7b\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c71210b9-5895-41a2-889a-f430d22fa1aa", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"max_length\": 128, \"temperature\": 0.8}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "4f2e5132-1713-42d7-919a-8c313744ce95", - "metadata": {}, - "source": [ - "### `Qwen`, by `Alibaba Cloud`\n", - "\n", - ">`Tongyi Qianwen-7B` (`Qwen-7B`) is a model with a scale of 7 billion parameters in the `Tongyi Qianwen` large model series developed by `Alibaba Cloud`. `Qwen-7B` is a large language model based on Transformer, which is trained on ultra-large-scale pre-training data.\n", - "\n", - "See [more information on HuggingFace](https://huggingface.co/Qwen/Qwen-7B) of on [GitHub](https://github.com/QwenLM/Qwen-7B).\n", - "\n", - "See here a [big example for LangChain integration and Qwen](https://github.com/QwenLM/Qwen-7B/blob/main/examples/langchain_tooluse.ipynb)." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "f598b1ca-77c7-40f1-a83f-c21ea9910c88", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"Qwen/Qwen-7B\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2c97f4e2-d401-44fb-9da7-b60b2e2cc663", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"max_length\": 128, \"temperature\": 0.5}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "e3871376-ed0e-49a8-8d9b-7e60dbbd2b35", - "metadata": {}, - "source": [ - "### `Yi` series models, by `01.ai`\n", - "\n", - ">The `Yi` series models are large language models trained from scratch by developers at [01.ai](https://01.ai/). The first public release contains two bilingual(English/Chinese) base models with the parameter sizes of 6B(`Yi-6B`) and 34B(`Yi-34B`). Both of them are trained with 4K sequence length and can be extended to 32K during inference time. The `Yi-6B-200K` and `Yi-34B-200K` are base model with 200K context length.\n", - "\n", - "Here we test the [Yi-34B](https://huggingface.co/01-ai/Yi-34B) model." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "1c9d3125-3f50-48b8-93b6-b50847207afa", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"01-ai/Yi-34B\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8b661069-8229-4850-9f13-c4ca28c0c96b", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"max_length\": 128, \"temperature\": 0.5}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dd6f3edc-9f97-47a6-ab2c-116756babbe6", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/docs/integrations/llms/huggingface_textgen_inference.ipynb b/docs/docs/integrations/llms/huggingface_textgen_inference.ipynb deleted file mode 100644 index e9b5e31c38600..0000000000000 --- a/docs/docs/integrations/llms/huggingface_textgen_inference.ipynb +++ /dev/null @@ -1,108 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Huggingface TextGen Inference\n", - "\n", - "[Text Generation Inference](https://github.com/huggingface/text-generation-inference) is a Rust, Python and gRPC server for text generation inference. Used in production at [HuggingFace](https://huggingface.co/) to power LLMs api-inference widgets.\n", - "\n", - "This notebooks goes over how to use a self hosted LLM using `Text Generation Inference`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To use, you should have the `text_generation` python package installed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# !pip3 install text_generation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_community.llms import HuggingFaceTextGenInference\n", - "\n", - "llm = HuggingFaceTextGenInference(\n", - " inference_server_url=\"http://localhost:8010/\",\n", - " max_new_tokens=512,\n", - " top_k=10,\n", - " top_p=0.95,\n", - " typical_p=0.95,\n", - " temperature=0.01,\n", - " repetition_penalty=1.03,\n", - ")\n", - "llm(\"What did foo say about bar?\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Streaming" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", - "from langchain_community.llms import HuggingFaceTextGenInference\n", - "\n", - "llm = HuggingFaceTextGenInference(\n", - " inference_server_url=\"http://localhost:8010/\",\n", - " max_new_tokens=512,\n", - " top_k=10,\n", - " top_p=0.95,\n", - " typical_p=0.95,\n", - " temperature=0.01,\n", - " repetition_penalty=1.03,\n", - " streaming=True,\n", - ")\n", - "llm(\"What did foo say about bar?\", callbacks=[StreamingStdOutCallbackHandler()])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - }, - "vscode": { - "interpreter": { - "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/vercel.json b/docs/vercel.json index 65a9861a4d531..9a1ab9d8abdc4 100644 --- a/docs/vercel.json +++ b/docs/vercel.json @@ -1,5 +1,13 @@ { "redirects": [ + { + "source": "/docs/integrations/llms/huggingface_textgen_inference", + "destination": "/docs/integrations/llms/huggingface_endpoint" + }, + { + "source": "/docs/integrations/llms/huggingface_hub", + "destination": "/docs/integrations/llms/huggingface_endpoint" + }, { "source": "/docs/integrations/llms/watsonxllm", "destination": "/docs/integrations/llms/ibm_watsonx" diff --git a/libs/community/langchain_community/chat_models/huggingface.py b/libs/community/langchain_community/chat_models/huggingface.py index 0af34a8cf04b2..143aff07172d1 100644 --- a/libs/community/langchain_community/chat_models/huggingface.py +++ b/libs/community/langchain_community/chat_models/huggingface.py @@ -1,4 +1,5 @@ """Hugging Face Chat Wrapper.""" + from typing import Any, List, Optional, Union from langchain_core.callbacks.manager import ( @@ -52,6 +53,7 @@ def __init__(self, **kwargs: Any): from transformers import AutoTokenizer self._resolve_model_id() + self.tokenizer = ( AutoTokenizer.from_pretrained(self.model_id) if self.tokenizer is None @@ -90,10 +92,10 @@ def _to_chat_prompt( ) -> str: """Convert a list of messages into a prompt format expected by wrapped LLM.""" if not messages: - raise ValueError("at least one HumanMessage must be provided") + raise ValueError("At least one HumanMessage must be provided!") if not isinstance(messages[-1], HumanMessage): - raise ValueError("last message must be a HumanMessage") + raise ValueError("Last message must be a HumanMessage!") messages_dicts = [self._to_chatml_format(m) for m in messages] @@ -135,20 +137,15 @@ def _resolve_model_id(self) -> None: from huggingface_hub import list_inference_endpoints available_endpoints = list_inference_endpoints("*") - - if isinstance(self.llm, HuggingFaceTextGenInference): - endpoint_url = self.llm.inference_server_url - - elif isinstance(self.llm, HuggingFaceEndpoint): - endpoint_url = self.llm.endpoint_url - - elif isinstance(self.llm, HuggingFaceHub): - # no need to look up model_id for HuggingFaceHub LLM + if isinstance(self.llm, HuggingFaceHub) or ( + hasattr(self.llm, "repo_id") and self.llm.repo_id + ): self.model_id = self.llm.repo_id return - + elif isinstance(self.llm, HuggingFaceTextGenInference): + endpoint_url: Optional[str] = self.llm.inference_server_url else: - raise ValueError(f"Unknown LLM type: {type(self.llm)}") + endpoint_url = self.llm.endpoint_url for endpoint in available_endpoints: if endpoint.url == endpoint_url: @@ -156,8 +153,8 @@ def _resolve_model_id(self) -> None: if not self.model_id: raise ValueError( - "Failed to resolve model_id" - f"Could not find model id for inference server provided: {endpoint_url}" + "Failed to resolve model_id:" + f"Could not find model id for inference server: {endpoint_url}" "Make sure that your Hugging Face token has access to the endpoint." ) diff --git a/libs/community/langchain_community/llms/huggingface_endpoint.py b/libs/community/langchain_community/llms/huggingface_endpoint.py index c14b2e24a8050..df25bf367e8b4 100644 --- a/libs/community/langchain_community/llms/huggingface_endpoint.py +++ b/libs/community/langchain_community/llms/huggingface_endpoint.py @@ -1,12 +1,17 @@ -from typing import Any, Dict, List, Mapping, Optional +import json +import logging +from typing import Any, AsyncIterator, Dict, Iterator, List, Mapping, Optional -import requests -from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) from langchain_core.language_models.llms import LLM -from langchain_core.pydantic_v1 import Extra, root_validator -from langchain_core.utils import get_from_dict_or_env +from langchain_core.outputs import GenerationChunk +from langchain_core.pydantic_v1 import Extra, Field, root_validator +from langchain_core.utils import get_from_dict_or_env, get_pydantic_field_names -from langchain_community.llms.utils import enforce_stop_tokens +logger = logging.getLogger(__name__) VALID_TASKS = ( "text2text-generation", @@ -17,70 +22,198 @@ class HuggingFaceEndpoint(LLM): - """HuggingFace Endpoint models. - - To use, you should have the ``huggingface_hub`` python package installed, and the - environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass - it as a named parameter to the constructor. + """ + HuggingFace Endpoint. - Only supports `text-generation` and `text2text-generation` for now. + To use this class, you should have installed the ``huggingface_hub`` package, and + the environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, + or given as a named parameter to the constructor. Example: .. code-block:: python - from langchain_community.llms import HuggingFaceEndpoint - endpoint_url = ( - "https://abcdefghijklmnop.us-east-1.aws.endpoints.huggingface.cloud" + # Basic Example (no streaming) + llm = HuggingFaceEndpoint( + endpoint_url="http://localhost:8010/", + max_new_tokens=512, + top_k=10, + top_p=0.95, + typical_p=0.95, + temperature=0.01, + repetition_penalty=1.03, + huggingfacehub_api_token="my-api-key" ) - hf = HuggingFaceEndpoint( - endpoint_url=endpoint_url, + print(llm("What is Deep Learning?")) + + # Streaming response example + from langchain_community.callbacks import streaming_stdout + + callbacks = [streaming_stdout.StreamingStdOutCallbackHandler()] + llm = HuggingFaceEndpoint( + endpoint_url="http://localhost:8010/", + max_new_tokens=512, + top_k=10, + top_p=0.95, + typical_p=0.95, + temperature=0.01, + repetition_penalty=1.03, + callbacks=callbacks, + streaming=True, huggingfacehub_api_token="my-api-key" ) + print(llm("What is Deep Learning?")) + """ - endpoint_url: str = "" + endpoint_url: Optional[str] = None """Endpoint URL to use.""" + repo_id: Optional[str] = None + """Repo to use.""" + huggingfacehub_api_token: Optional[str] = None + max_new_tokens: int = 512 + """Maximum number of generated tokens""" + top_k: Optional[int] = None + """The number of highest probability vocabulary tokens to keep for + top-k-filtering.""" + top_p: Optional[float] = 0.95 + """If set to < 1, only the smallest set of most probable tokens with probabilities + that add up to `top_p` or higher are kept for generation.""" + typical_p: Optional[float] = 0.95 + """Typical Decoding mass. See [Typical Decoding for Natural Language + Generation](https://arxiv.org/abs/2202.00666) for more information.""" + temperature: Optional[float] = 0.8 + """The value used to module the logits distribution.""" + repetition_penalty: Optional[float] = None + """The parameter for repetition penalty. 1.0 means no penalty. + See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.""" + return_full_text: bool = False + """Whether to prepend the prompt to the generated text""" + truncate: Optional[int] = None + """Truncate inputs tokens to the given size""" + stop_sequences: List[str] = Field(default_factory=list) + """Stop generating tokens if a member of `stop_sequences` is generated""" + seed: Optional[int] = None + """Random sampling seed""" + inference_server_url: str = "" + """text-generation-inference instance base url""" + timeout: int = 120 + """Timeout in seconds""" + streaming: bool = False + """Whether to generate a stream of tokens asynchronously""" + do_sample: bool = False + """Activate logits sampling""" + watermark: bool = False + """Watermarking with [A Watermark for Large Language Models] + (https://arxiv.org/abs/2301.10226)""" + server_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any text-generation-inference server parameters not explicitly specified""" + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `call` not explicitly specified""" + model: str + client: Any + async_client: Any task: Optional[str] = None """Task to call the model with. Should be a task that returns `generated_text` or `summary_text`.""" - model_kwargs: Optional[dict] = None - """Keyword arguments to pass to the model.""" - - huggingfacehub_api_token: Optional[str] = None class Config: """Configuration for this pydantic object.""" extra = Extra.forbid + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = get_pydantic_field_names(cls) + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + if field_name not in all_required_field_names: + logger.warning( + f"""WARNING! {field_name} is not default parameter. + {field_name} was transferred to model_kwargs. + Please make sure that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + + invalid_model_kwargs = all_required_field_names.intersection(extra.keys()) + if invalid_model_kwargs: + raise ValueError( + f"Parameters {invalid_model_kwargs} should be specified explicitly. " + f"Instead they were passed in as part of `model_kwargs` parameter." + ) + + values["model_kwargs"] = extra + if "endpoint_url" not in values and "repo_id" not in values: + raise ValueError( + "Please specify an `endpoint_url` or `repo_id` for the model." + ) + if "endpoint_url" in values and "repo_id" in values: + raise ValueError( + "Please specify either an `endpoint_url` OR a `repo_id`, not both." + ) + values["model"] = values.get("endpoint_url") or values.get("repo_id") + return values + @root_validator() def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - huggingfacehub_api_token = get_from_dict_or_env( - values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN" - ) + """Validate that package is installed and that the API token is valid.""" try: - from huggingface_hub.hf_api import HfApi - - try: - HfApi( - endpoint="https://huggingface.co", # Can be a Private Hub endpoint. - token=huggingfacehub_api_token, - ).whoami() - except Exception as e: - raise ValueError( - "Could not authenticate with huggingface_hub. " - "Please check your API token." - ) from e + from huggingface_hub import login except ImportError: raise ImportError( "Could not import huggingface_hub python package. " "Please install it with `pip install huggingface_hub`." ) - values["huggingfacehub_api_token"] = huggingfacehub_api_token + try: + huggingfacehub_api_token = get_from_dict_or_env( + values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN" + ) + login(token=huggingfacehub_api_token) + except Exception as e: + raise ValueError( + "Could not authenticate with huggingface_hub. " + "Please check your API token." + ) from e + + from huggingface_hub import AsyncInferenceClient, InferenceClient + + values["client"] = InferenceClient( + model=values["model"], + timeout=values["timeout"], + token=huggingfacehub_api_token, + **values["server_kwargs"], + ) + values["async_client"] = AsyncInferenceClient( + model=values["model"], + timeout=values["timeout"], + token=huggingfacehub_api_token, + **values["server_kwargs"], + ) + return values + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling text generation inference API.""" + return { + "max_new_tokens": self.max_new_tokens, + "top_k": self.top_k, + "top_p": self.top_p, + "typical_p": self.typical_p, + "temperature": self.temperature, + "repetition_penalty": self.repetition_penalty, + "return_full_text": self.return_full_text, + "truncate": self.truncate, + "stop_sequences": self.stop_sequences, + "seed": self.seed, + "do_sample": self.do_sample, + "watermark": self.watermark, + **self.model_kwargs, + } + @property def _identifying_params(self) -> Mapping[str, Any]: """Get the identifying parameters.""" @@ -95,6 +228,13 @@ def _llm_type(self) -> str: """Return type of llm.""" return "huggingface_endpoint" + def _invocation_params( + self, runtime_stop: Optional[List[str]], **kwargs: Any + ) -> Dict[str, Any]: + params = {**self._default_params, **kwargs} + params["stop_sequences"] = params["stop_sequences"] + (runtime_stop or []) + return params + def _call( self, prompt: str, @@ -102,62 +242,129 @@ def _call( run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: - """Call out to HuggingFace Hub's inference endpoint. + """Call out to HuggingFace Hub's inference endpoint.""" + invocation_params = self._invocation_params(stop, **kwargs) + if self.streaming: + completion = "" + for chunk in self._stream(prompt, stop, run_manager, **invocation_params): + completion += chunk.text + return completion + else: + invocation_params["stop"] = invocation_params[ + "stop_sequences" + ] # porting 'stop_sequences' into the 'stop' argument + response = self.client.post( + json={"inputs": prompt, "parameters": invocation_params}, + stream=False, + task=self.task, + ) + response_text = json.loads(response.decode())[0]["generated_text"] - Args: - prompt: The prompt to pass into the model. - stop: Optional list of stop words to use when generating. + # Maybe the generation has stopped at one of the stop sequences: + # then we remove this stop sequence from the end of the generated text + for stop_seq in invocation_params["stop_sequences"]: + if response_text[-len(stop_seq) :] == stop_seq: + response_text = response_text[: -len(stop_seq)] + return response_text - Returns: - The string generated by the model. + async def _acall( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + invocation_params = self._invocation_params(stop, **kwargs) + if self.streaming: + completion = "" + async for chunk in self._astream( + prompt, stop, run_manager, **invocation_params + ): + completion += chunk.text + return completion + else: + invocation_params["stop"] = invocation_params["stop_sequences"] + response = await self.async_client.post( + json={"inputs": prompt, "parameters": invocation_params}, + stream=False, + task=self.task, + ) + response_text = json.loads(response.decode())[0]["generated_text"] - Example: - .. code-block:: python + # Maybe the generation has stopped at one of the stop sequences: + # then remove this stop sequence from the end of the generated text + for stop_seq in invocation_params["stop_sequences"]: + if response_text[-len(stop_seq) :] == stop_seq: + response_text = response_text[: -len(stop_seq)] + return response_text - response = hf("Tell me a joke.") - """ - _model_kwargs = self.model_kwargs or {} + def _stream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[GenerationChunk]: + invocation_params = self._invocation_params(stop, **kwargs) - # payload samples - params = {**_model_kwargs, **kwargs} - parameter_payload = {"inputs": prompt, "parameters": params} + for response in self.client.text_generation( + prompt, **invocation_params, stream=True + ): + # identify stop sequence in generated text, if any + stop_seq_found: Optional[str] = None + for stop_seq in invocation_params["stop_sequences"]: + if stop_seq in response: + stop_seq_found = stop_seq - # HTTP headers for authorization - headers = { - "Authorization": f"Bearer {self.huggingfacehub_api_token}", - "Content-Type": "application/json", - } + # identify text to yield + text: Optional[str] = None + if stop_seq_found: + text = response[: response.index(stop_seq_found)] + else: + text = response - # send request - try: - response = requests.post( - self.endpoint_url, headers=headers, json=parameter_payload - ) - except requests.exceptions.RequestException as e: # This is the correct syntax - raise ValueError(f"Error raised by inference endpoint: {e}") - generated_text = response.json() - if "error" in generated_text: - raise ValueError( - f"Error raised by inference API: {generated_text['error']}" - ) - if self.task == "text-generation": - text = generated_text[0]["generated_text"] - # Remove prompt if included in generated text. - if text.startswith(prompt): - text = text[len(prompt) :] - elif self.task == "text2text-generation": - text = generated_text[0]["generated_text"] - elif self.task == "summarization": - text = generated_text[0]["summary_text"] - elif self.task == "conversational": - text = generated_text["response"][1] - else: - raise ValueError( - f"Got invalid task {self.task}, " - f"currently only {VALID_TASKS} are supported" - ) - if stop is not None: - # This is a bit hacky, but I can't figure out a better way to enforce - # stop tokens when making calls to huggingface_hub. - text = enforce_stop_tokens(text, stop) - return text + # yield text, if any + if text: + chunk = GenerationChunk(text=text) + yield chunk + if run_manager: + run_manager.on_llm_new_token(chunk.text) + + # break if stop sequence found + if stop_seq_found: + break + + async def _astream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AsyncIterator[GenerationChunk]: + invocation_params = self._invocation_params(stop, **kwargs) + async for response in await self.async_client.text_generation( + prompt, **invocation_params, stream=True + ): + # identify stop sequence in generated text, if any + stop_seq_found: Optional[str] = None + for stop_seq in invocation_params["stop_sequences"]: + if stop_seq in response: + stop_seq_found = stop_seq + + # identify text to yield + text: Optional[str] = None + if stop_seq_found: + text = response[: response.index(stop_seq_found)] + else: + text = response + + # yield text, if any + if text: + chunk = GenerationChunk(text=text) + yield chunk + if run_manager: + await run_manager.on_llm_new_token(chunk.text) + + # break if stop sequence found + if stop_seq_found: + break diff --git a/libs/community/langchain_community/llms/huggingface_hub.py b/libs/community/langchain_community/llms/huggingface_hub.py index 2d91231775231..2a5deaf766d61 100644 --- a/libs/community/langchain_community/llms/huggingface_hub.py +++ b/libs/community/langchain_community/llms/huggingface_hub.py @@ -1,6 +1,7 @@ import json from typing import Any, Dict, List, Mapping, Optional +from langchain_core._api.deprecation import deprecated from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.language_models.llms import LLM from langchain_core.pydantic_v1 import Extra, root_validator @@ -19,8 +20,10 @@ } +@deprecated("0.0.21", removal="0.2.0", alternative="HuggingFaceEndpoint") class HuggingFaceHub(LLM): """HuggingFaceHub models. + ! This class is deprecated, you should use HuggingFaceEndpoint instead. To use, you should have the ``huggingface_hub`` python package installed, and the environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass diff --git a/libs/community/langchain_community/llms/huggingface_pipeline.py b/libs/community/langchain_community/llms/huggingface_pipeline.py index 388ba117c25ec..7a2b915054fb5 100644 --- a/libs/community/langchain_community/llms/huggingface_pipeline.py +++ b/libs/community/langchain_community/llms/huggingface_pipeline.py @@ -9,8 +9,6 @@ from langchain_core.outputs import Generation, LLMResult from langchain_core.pydantic_v1 import Extra -from langchain_community.llms.utils import enforce_stop_tokens - DEFAULT_MODEL_ID = "gpt2" DEFAULT_TASK = "text-generation" VALID_TASKS = ("text2text-generation", "text-generation", "summarization") @@ -201,7 +199,12 @@ def _generate( batch_prompts = prompts[i : i + self.batch_size] # Process batch of prompts - responses = self.pipeline(batch_prompts, **pipeline_kwargs) + responses = self.pipeline( + batch_prompts, + stop_sequence=stop, + return_full_text=False, + **pipeline_kwargs, + ) # Process each response in the batch for j, response in enumerate(responses): @@ -210,23 +213,7 @@ def _generate( response = response[0] if self.pipeline.task == "text-generation": - try: - from transformers.pipelines.text_generation import ReturnType - - remove_prompt = ( - self.pipeline._postprocess_params.get("return_type") - != ReturnType.NEW_TEXT - ) - except Exception as e: - logger.warning( - f"Unable to extract pipeline return_type. " - f"Received error:\n\n{e}" - ) - remove_prompt = True - if remove_prompt: - text = response["generated_text"][len(batch_prompts[j]) :] - else: - text = response["generated_text"] + text = response["generated_text"] elif self.pipeline.task == "text2text-generation": text = response["generated_text"] elif self.pipeline.task == "summarization": @@ -236,9 +223,6 @@ def _generate( f"Got invalid task {self.pipeline.task}, " f"currently only {VALID_TASKS} are supported" ) - if stop: - # Enforce stop tokens - text = enforce_stop_tokens(text, stop) # Append the processed text to results text_generations.append(text) diff --git a/libs/community/langchain_community/llms/huggingface_text_gen_inference.py b/libs/community/langchain_community/llms/huggingface_text_gen_inference.py index e03b6f7adcf83..9f56a949c6b94 100644 --- a/libs/community/langchain_community/llms/huggingface_text_gen_inference.py +++ b/libs/community/langchain_community/llms/huggingface_text_gen_inference.py @@ -1,6 +1,7 @@ import logging from typing import Any, AsyncIterator, Dict, Iterator, List, Optional +from langchain_core._api.deprecation import deprecated from langchain_core.callbacks import ( AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun, @@ -13,9 +14,11 @@ logger = logging.getLogger(__name__) +@deprecated("0.0.21", removal="0.2.0", alternative="HuggingFaceEndpoint") class HuggingFaceTextGenInference(LLM): """ HuggingFace text generation API. + ! This class is deprecated, you should use HuggingFaceEndpoint instead ! To use, you should have the `text-generation` python package installed and a text-generation server running. diff --git a/libs/community/tests/integration_tests/llms/test_huggingface_endpoint.py b/libs/community/tests/integration_tests/llms/test_huggingface_endpoint.py index ca89d54cde775..11af7df374269 100644 --- a/libs/community/tests/integration_tests/llms/test_huggingface_endpoint.py +++ b/libs/community/tests/integration_tests/llms/test_huggingface_endpoint.py @@ -1,6 +1,5 @@ -"""Test HuggingFace API wrapper.""" +"""Test HuggingFace Endpoints.""" -import unittest from pathlib import Path import pytest @@ -10,51 +9,73 @@ from tests.integration_tests.llms.utils import assert_llm_equality -@unittest.skip( - "This test requires an inference endpoint. Tested with Hugging Face endpoints" -) -def test_huggingface_endpoint_text_generation() -> None: - """Test valid call to HuggingFace text generation model.""" +def test_huggingface_endpoint_call_error() -> None: + """Test valid call to HuggingFace that errors.""" + llm = HuggingFaceEndpoint(endpoint_url="", model_kwargs={"max_new_tokens": -1}) + with pytest.raises(ValueError): + llm("Say foo:") + + +def test_saving_loading_endpoint_llm(tmp_path: Path) -> None: + """Test saving/loading an HuggingFaceHub LLM.""" llm = HuggingFaceEndpoint( endpoint_url="", task="text-generation", model_kwargs={"max_new_tokens": 10} ) + llm.save(file_path=tmp_path / "hf.yaml") + loaded_llm = load_llm(tmp_path / "hf.yaml") + assert_llm_equality(llm, loaded_llm) + + +def test_huggingface_text_generation() -> None: + """Test valid call to HuggingFace text generation model.""" + llm = HuggingFaceEndpoint(repo_id="gpt2", model_kwargs={"max_new_tokens": 10}) output = llm("Say foo:") print(output) # noqa: T201 assert isinstance(output, str) -@unittest.skip( - "This test requires an inference endpoint. Tested with Hugging Face endpoints" -) -def test_huggingface_endpoint_text2text_generation() -> None: +def test_huggingface_text2text_generation() -> None: """Test valid call to HuggingFace text2text model.""" - llm = HuggingFaceEndpoint(endpoint_url="", task="text2text-generation") + llm = HuggingFaceEndpoint(repo_id="google/flan-t5-xl") output = llm("The capital of New York is") assert output == "Albany" -@unittest.skip( - "This test requires an inference endpoint. Tested with Hugging Face endpoints" -) -def test_huggingface_endpoint_summarization() -> None: +def test_huggingface_summarization() -> None: """Test valid call to HuggingFace summarization model.""" - llm = HuggingFaceEndpoint(endpoint_url="", task="summarization") + llm = HuggingFaceEndpoint(repo_id="facebook/bart-large-cnn") output = llm("Say foo:") assert isinstance(output, str) -def test_huggingface_endpoint_call_error() -> None: +def test_huggingface_call_error() -> None: """Test valid call to HuggingFace that errors.""" - llm = HuggingFaceEndpoint(model_kwargs={"max_new_tokens": -1}) + llm = HuggingFaceEndpoint(repo_id="gpt2", model_kwargs={"max_new_tokens": -1}) with pytest.raises(ValueError): llm("Say foo:") -def test_saving_loading_endpoint_llm(tmp_path: Path) -> None: - """Test saving/loading an HuggingFaceHub LLM.""" - llm = HuggingFaceEndpoint( - endpoint_url="", task="text-generation", model_kwargs={"max_new_tokens": 10} - ) +def test_saving_loading_llm(tmp_path: Path) -> None: + """Test saving/loading an HuggingFaceEndpoint LLM.""" + llm = HuggingFaceEndpoint(repo_id="gpt2", model_kwargs={"max_new_tokens": 10}) llm.save(file_path=tmp_path / "hf.yaml") loaded_llm = load_llm(tmp_path / "hf.yaml") assert_llm_equality(llm, loaded_llm) + + +def test_invocation_params_stop_sequences() -> None: + llm = HuggingFaceEndpoint() + assert llm._default_params["stop_sequences"] == [] + + runtime_stop = None + assert llm._invocation_params(runtime_stop)["stop_sequences"] == [] + assert llm._default_params["stop_sequences"] == [] + + runtime_stop = ["stop"] + assert llm._invocation_params(runtime_stop)["stop_sequences"] == ["stop"] + assert llm._default_params["stop_sequences"] == [] + + llm = HuggingFaceEndpoint(stop_sequences=["."]) + runtime_stop = ["stop"] + assert llm._invocation_params(runtime_stop)["stop_sequences"] == [".", "stop"] + assert llm._default_params["stop_sequences"] == ["."] From 86ae48b78152d9240d7c67e60a197cc3a99fc12f Mon Sep 17 00:00:00 2001 From: Pranav Agarwal <119924780+pranava-amzn@users.noreply.github.com> Date: Mon, 19 Feb 2024 10:36:37 -0800 Subject: [PATCH 06/12] experimental[minor]: Amazon Personalize support (#17436) ## Amazon Personalize support on Langchain This PR is a successor to this PR - https://github.com/langchain-ai/langchain/pull/13216 This PR introduces an integration with [Amazon Personalize](https://aws.amazon.com/personalize/) to help you to retrieve recommendations and use them in your natural language applications. This integration provides two new components: 1. An `AmazonPersonalize` client, that provides a wrapper around the Amazon Personalize API. 2. An `AmazonPersonalizeChain`, that provides a chain to pull in recommendations using the client, and then generating the response in natural language. We have added this to langchain_experimental since there was feedback from the previous PR about having this support in experimental rather than the core or community extensions. Here is some sample code to explain the usage. ```python from langchain_experimental.recommenders import AmazonPersonalize from langchain_experimental.recommenders import AmazonPersonalizeChain from langchain.llms.bedrock import Bedrock recommender_arn = "" client=AmazonPersonalize( credentials_profile_name="default", region_name="us-west-2", recommender_arn=recommender_arn ) bedrock_llm = Bedrock( model_id="anthropic.claude-v2", region_name="us-west-2" ) chain = AmazonPersonalizeChain.from_llm( llm=bedrock_llm, client=client ) response = chain({'user_id': '1'}) ``` Reviewer: @3coins --- cookbook/amazon_personalize_how_to.ipynb | 284 ++++++++++++++++++ .../recommenders/__init__.py | 7 + .../recommenders/amazon_personalize.py | 195 ++++++++++++ .../recommenders/amazon_personalize_chain.py | 192 ++++++++++++ 4 files changed, 678 insertions(+) create mode 100644 cookbook/amazon_personalize_how_to.ipynb create mode 100644 libs/experimental/langchain_experimental/recommenders/__init__.py create mode 100644 libs/experimental/langchain_experimental/recommenders/amazon_personalize.py create mode 100644 libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py diff --git a/cookbook/amazon_personalize_how_to.ipynb b/cookbook/amazon_personalize_how_to.ipynb new file mode 100644 index 0000000000000..7555e39d89494 --- /dev/null +++ b/cookbook/amazon_personalize_how_to.ipynb @@ -0,0 +1,284 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Amazon Personalize\n", + "\n", + "[Amazon Personalize](https://docs.aws.amazon.com/personalize/latest/dg/what-is-personalize.html) is a fully managed machine learning service that uses your data to generate item recommendations for your users. It can also generate user segments based on the users' affinity for certain items or item metadata.\n", + "\n", + "This notebook goes through how to use Amazon Personalize Chain. You need a Amazon Personalize campaign_arn or a recommender_arn before you get started with the below notebook.\n", + "\n", + "Following is a [tutorial](https://github.com/aws-samples/retail-demo-store/blob/master/workshop/1-Personalization/Lab-1-Introduction-and-data-preparation.ipynb) to setup a campaign_arn/recommender_arn on Amazon Personalize. Once the campaign_arn/recommender_arn is setup, you can use it in the langchain ecosystem. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Install Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "!pip install boto3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Sample Use-cases" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.1 [Use-case-1] Setup Amazon Personalize Client and retrieve recommendations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_experimental.recommenders import AmazonPersonalize\n", + "\n", + "recommender_arn = \"\"\n", + "\n", + "client = AmazonPersonalize(\n", + " credentials_profile_name=\"default\",\n", + " region_name=\"us-west-2\",\n", + " recommender_arn=recommender_arn,\n", + ")\n", + "client.get_recommendations(user_id=\"1\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "### 2.2 [Use-case-2] Invoke Personalize Chain for summarizing results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "from langchain.llms.bedrock import Bedrock\n", + "from langchain_experimental.recommenders import AmazonPersonalizeChain\n", + "\n", + "bedrock_llm = Bedrock(model_id=\"anthropic.claude-v2\", region_name=\"us-west-2\")\n", + "\n", + "# Create personalize chain\n", + "# Use return_direct=True if you do not want summary\n", + "chain = AmazonPersonalizeChain.from_llm(\n", + " llm=bedrock_llm, client=client, return_direct=False\n", + ")\n", + "response = chain({\"user_id\": \"1\"})\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.3 [Use-Case-3] Invoke Amazon Personalize Chain using your own prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.prompts.prompt import PromptTemplate\n", + "\n", + "RANDOM_PROMPT_QUERY = \"\"\"\n", + "You are a skilled publicist. Write a high-converting marketing email advertising several movies available in a video-on-demand streaming platform next week, \n", + " given the movie and user information below. Your email will leverage the power of storytelling and persuasive language. \n", + " The movies to recommend and their information is contained in the tag. \n", + " All movies in the tag must be recommended. Give a summary of the movies and why the human should watch them. \n", + " Put the email between tags.\n", + "\n", + " \n", + " {result} \n", + " \n", + "\n", + " Assistant:\n", + " \"\"\"\n", + "\n", + "RANDOM_PROMPT = PromptTemplate(input_variables=[\"result\"], template=RANDOM_PROMPT_QUERY)\n", + "\n", + "chain = AmazonPersonalizeChain.from_llm(\n", + " llm=bedrock_llm, client=client, return_direct=False, prompt_template=RANDOM_PROMPT\n", + ")\n", + "chain.run({\"user_id\": \"1\", \"item_id\": \"234\"})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.4 [Use-case-4] Invoke Amazon Personalize in a Sequential Chain " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import LLMChain, SequentialChain\n", + "\n", + "RANDOM_PROMPT_QUERY_2 = \"\"\"\n", + "You are a skilled publicist. Write a high-converting marketing email advertising several movies available in a video-on-demand streaming platform next week, \n", + " given the movie and user information below. Your email will leverage the power of storytelling and persuasive language. \n", + " You want the email to impress the user, so make it appealing to them.\n", + " The movies to recommend and their information is contained in the tag. \n", + " All movies in the tag must be recommended. Give a summary of the movies and why the human should watch them. \n", + " Put the email between tags.\n", + "\n", + " \n", + " {result}\n", + " \n", + "\n", + " Assistant:\n", + " \"\"\"\n", + "\n", + "RANDOM_PROMPT_2 = PromptTemplate(\n", + " input_variables=[\"result\"], template=RANDOM_PROMPT_QUERY_2\n", + ")\n", + "personalize_chain_instance = AmazonPersonalizeChain.from_llm(\n", + " llm=bedrock_llm, client=client, return_direct=True\n", + ")\n", + "random_chain_instance = LLMChain(llm=bedrock_llm, prompt=RANDOM_PROMPT_2)\n", + "overall_chain = SequentialChain(\n", + " chains=[personalize_chain_instance, random_chain_instance],\n", + " input_variables=[\"user_id\"],\n", + " verbose=True,\n", + ")\n", + "overall_chain.run({\"user_id\": \"1\", \"item_id\": \"234\"})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "### 2.5 [Use-case-5] Invoke Amazon Personalize and retrieve metadata " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "recommender_arn = \"\"\n", + "metadata_column_names = [\n", + " \"\",\n", + " \"\",\n", + "]\n", + "metadataMap = {\"ITEMS\": metadata_column_names}\n", + "\n", + "client = AmazonPersonalize(\n", + " credentials_profile_name=\"default\",\n", + " region_name=\"us-west-2\",\n", + " recommender_arn=recommender_arn,\n", + ")\n", + "client.get_recommendations(user_id=\"1\", metadataColumns=metadataMap)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "### 2.6 [Use-Case 6] Invoke Personalize Chain with returned metadata for summarizing results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "bedrock_llm = Bedrock(model_id=\"anthropic.claude-v2\", region_name=\"us-west-2\")\n", + "\n", + "# Create personalize chain\n", + "# Use return_direct=True if you do not want summary\n", + "chain = AmazonPersonalizeChain.from_llm(\n", + " llm=bedrock_llm, client=client, return_direct=False\n", + ")\n", + "response = chain({\"user_id\": \"1\", \"metadata_columns\": metadataMap})\n", + "print(response)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + }, + "vscode": { + "interpreter": { + "hash": "15e58ce194949b77a891bd4339ce3d86a9bd138e905926019517993f97db9e6c" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/libs/experimental/langchain_experimental/recommenders/__init__.py b/libs/experimental/langchain_experimental/recommenders/__init__.py new file mode 100644 index 0000000000000..ec06f5541894d --- /dev/null +++ b/libs/experimental/langchain_experimental/recommenders/__init__.py @@ -0,0 +1,7 @@ +"""Amazon Personalize primitives.""" +from langchain_experimental.recommenders.amazon_personalize import AmazonPersonalize +from langchain_experimental.recommenders.amazon_personalize_chain import ( + AmazonPersonalizeChain, +) + +__all__ = ["AmazonPersonalize", "AmazonPersonalizeChain"] diff --git a/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py b/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py new file mode 100644 index 0000000000000..b2300f0a19c3a --- /dev/null +++ b/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py @@ -0,0 +1,195 @@ +from typing import Any, List, Mapping, Optional, Sequence + + +class AmazonPersonalize: + """Amazon Personalize Runtime wrapper for executing real-time operations: + https://docs.aws.amazon.com/personalize/latest/dg/API_Operations_Amazon_Personalize_Runtime.html + + Args: + campaign_arn: str, Optional: The Amazon Resource Name (ARN) of the campaign + to use for getting recommendations. + recommender_arn: str, Optional: The Amazon Resource Name (ARN) of the + recommender to use to get recommendations + client: Optional: boto3 client + credentials_profile_name: str, Optional :AWS profile name + region_name: str, Optional: AWS region, e.g., us-west-2 + + Example: + .. code-block:: python + + personalize_client = AmazonPersonalize ( + campaignArn='' ) + """ + + def __init__( + self, + campaign_arn: Optional[str] = None, + recommender_arn: Optional[str] = None, + client: Optional[Any] = None, + credentials_profile_name: Optional[str] = None, + region_name: Optional[str] = None, + ): + self.campaign_arn = campaign_arn + self.recommender_arn = recommender_arn + + if campaign_arn and recommender_arn: + raise ValueError( + "Cannot initialize AmazonPersonalize with both " + "campaign_arn and recommender_arn." + ) + + if not campaign_arn and not recommender_arn: + raise ValueError( + "Cannot initialize AmazonPersonalize. Provide one of " + "campaign_arn or recommender_arn" + ) + + try: + if client is not None: + self.client = client + else: + import boto3 + import botocore.config + + if credentials_profile_name is not None: + session = boto3.Session(profile_name=credentials_profile_name) + else: + # use default credentials + session = boto3.Session() + + client_params = {} + if region_name: + client_params["region_name"] = region_name + + service = "personalize-runtime" + session_config = botocore.config.Config(user_agent_extra="langchain") + client_params["config"] = session_config + self.client = session.client(service, **client_params) + + except ImportError: + raise ModuleNotFoundError( + "Could not import boto3 python package. " + "Please install it with `pip install boto3`." + ) + + def get_recommendations( + self, + user_id: Optional[str] = None, + item_id: Optional[str] = None, + filter_arn: Optional[str] = None, + filter_values: Optional[Mapping[str, str]] = None, + num_results: Optional[int] = 10, + context: Optional[Mapping[str, str]] = None, + promotions: Optional[Sequence[Mapping[str, Any]]] = None, + metadata_columns: Optional[Mapping[str, Sequence[str]]] = None, + **kwargs: Any, + ) -> Mapping[str, Any]: + """Get recommendations from Amazon Personalize: + https://docs.aws.amazon.com/personalize/latest/dg/API_RS_GetRecommendations.html + + Args: + user_id: str, Optional: The user identifier + for which to retrieve recommendations + item_id: str, Optional: The item identifier + for which to retrieve recommendations + filter_arn: str, Optional: The ARN of the filter + to apply to the returned recommendations + filter_values: Mapping, Optional: The values + to use when filtering recommendations. + num_results: int, Optional: Default=10: The number of results to return + context: Mapping, Optional: The contextual metadata + to use when getting recommendations + promotions: Sequence, Optional: The promotions + to apply to the recommendation request. + metadata_columns: Mapping, Optional: The metadata Columns to be returned + as part of the response. + + Returns: + response: Mapping[str, Any]: Returns an itemList and recommendationId. + + Example: + .. code-block:: python + + personalize_client = AmazonPersonalize(campaignArn='' )\n + response = personalize_client.get_recommendations(user_id="1") + + """ + if not user_id and not item_id: + raise ValueError("One of user_id or item_id is required") + + if filter_arn: + kwargs["filterArn"] = filter_arn + if filter_values: + kwargs["filterValues"] = filter_values + if user_id: + kwargs["userId"] = user_id + if num_results: + kwargs["numResults"] = num_results + if context: + kwargs["context"] = context + if promotions: + kwargs["promotions"] = promotions + if item_id: + kwargs["itemId"] = item_id + if metadata_columns: + kwargs["metadataColumns"] = metadata_columns + if self.campaign_arn: + kwargs["campaignArn"] = self.campaign_arn + if self.recommender_arn: + kwargs["recommenderArn"] = self.recommender_arn + + return self.client.get_recommendations(**kwargs) + + def get_personalized_ranking( + self, + user_id: str, + input_list: List[str], + filter_arn: Optional[str] = None, + filter_values: Optional[Mapping[str, str]] = None, + context: Optional[Mapping[str, str]] = None, + metadata_columns: Optional[Mapping[str, Sequence[str]]] = None, + **kwargs: Any, + ) -> Mapping[str, Any]: + """Re-ranks a list of recommended items for the given user. + https://docs.aws.amazon.com/personalize/latest/dg/API_RS_GetPersonalizedRanking.html + + Args: + user_id: str, Required: The user identifier + for which to retrieve recommendations + input_list: List[str], Required: A list of items (by itemId) to rank + filter_arn: str, Optional: The ARN of the filter to apply + filter_values: Mapping, Optional: The values to use + when filtering recommendations. + context: Mapping, Optional: The contextual metadata + to use when getting recommendations + metadata_columns: Mapping, Optional: The metadata Columns to be returned + as part of the response. + + Returns: + response: Mapping[str, Any]: Returns personalizedRanking + and recommendationId. + + Example: + .. code-block:: python + + personalize_client = AmazonPersonalize(campaignArn='' )\n + response = personalize_client.get_personalized_ranking(user_id="1", + input_list=["123,"256"]) + + """ + + if filter_arn: + kwargs["filterArn"] = filter_arn + if filter_values: + kwargs["filterValues"] = filter_values + if user_id: + kwargs["userId"] = user_id + if input_list: + kwargs["inputList"] = input_list + if context: + kwargs["context"] = context + if metadata_columns: + kwargs["metadataColumns"] = metadata_columns + kwargs["campaignArn"] = self.campaign_arn + + return self.client.get_personalized_ranking(kwargs) diff --git a/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py b/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py new file mode 100644 index 0000000000000..4c187a8006463 --- /dev/null +++ b/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py @@ -0,0 +1,192 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Mapping, Optional, cast + +from langchain.callbacks.manager import ( + CallbackManagerForChainRun, +) +from langchain.chains import LLMChain +from langchain.chains.base import Chain +from langchain.prompts.prompt import PromptTemplate +from langchain.schema.language_model import BaseLanguageModel + +from langchain_experimental.recommenders.amazon_personalize import AmazonPersonalize + +SUMMARIZE_PROMPT_QUERY = """ +Summarize the recommended items for a user from the items list in tag below. +Make correlation into the items in the list and provide a summary. + + {result} + +""" + +SUMMARIZE_PROMPT = PromptTemplate( + input_variables=["result"], template=SUMMARIZE_PROMPT_QUERY +) + +INTERMEDIATE_STEPS_KEY = "intermediate_steps" + +# Input Key Names to be used +USER_ID_INPUT_KEY = "user_id" +ITEM_ID_INPUT_KEY = "item_id" +INPUT_LIST_INPUT_KEY = "input_list" +FILTER_ARN_INPUT_KEY = "filter_arn" +FILTER_VALUES_INPUT_KEY = "filter_values" +CONTEXT_INPUT_KEY = "context" +PROMOTIONS_INPUT_KEY = "promotions" +METADATA_COLUMNS_INPUT_KEY = "metadata_columns" +RESULT_OUTPUT_KEY = "result" + + +class AmazonPersonalizeChain(Chain): + """Amazon Personalize Chain for retrieving recommendations + from Amazon Personalize, and summarizing + the recommendations in natural language. + It will only return recommendations if return_direct=True. + Can also be used in sequential chains for working with + the output of Amazon Personalize. + + Example: + .. code-block:: python + + chain = PersonalizeChain.from_llm(llm=agent_llm, client=personalize_lg, + return_direct=True)\n + response = chain.run({'user_id':'1'})\n + response = chain.run({'user_id':'1', 'item_id':'234'}) + """ + + client: AmazonPersonalize + summarization_chain: LLMChain + return_direct: bool = False + return_intermediate_steps: bool = False + is_ranking_recipe: bool = False + + @property + def input_keys(self) -> List[str]: + """This returns an empty list since not there are optional + input_keys and none is required. + + :meta private: + """ + return [] + + @property + def output_keys(self) -> List[str]: + """Will always return result key. + + :meta private: + """ + return [RESULT_OUTPUT_KEY] + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + client: AmazonPersonalize, + prompt_template: PromptTemplate = SUMMARIZE_PROMPT, + is_ranking_recipe: bool = False, + **kwargs: Any, + ) -> AmazonPersonalizeChain: + """Initializes the Personalize Chain with LLMAgent, Personalize Client, + Prompts to be used + + Args: + llm: BaseLanguageModel: The LLM to be used in the Chain + client: AmazonPersonalize: The client created to support + invoking AmazonPersonalize + prompt_template: PromptTemplate: The prompt template which can be + invoked with the output from Amazon Personalize + is_ranking_recipe: bool: default: False: specifies + if the trained recipe is USER_PERSONALIZED_RANKING + + Example: + .. code-block:: python + + chain = PersonalizeChain.from_llm(llm=agent_llm, + client=personalize_lg, return_direct=True)\n + response = chain.run({'user_id':'1'})\n + response = chain.run({'user_id':'1', 'item_id':'234'}) + + RANDOM_PROMPT_QUERY=" Summarize recommendations in {result}" + chain = PersonalizeChain.from_llm(llm=agent_llm, + client=personalize_lg, prompt_template=PROMPT_TEMPLATE)\n + """ + summarization_chain = LLMChain(llm=llm, prompt=prompt_template) + + return cls( + summarization_chain=summarization_chain, + client=client, + is_ranking_recipe=is_ranking_recipe, + **kwargs, + ) + + def _call( + self, + inputs: Mapping[str, Any], + run_manager: Optional[CallbackManagerForChainRun] = None, + ) -> Dict[str, Any]: + """Retrieves recommendations by invoking Amazon Personalize, + and invokes an LLM using the default/overridden + prompt template with the output from Amazon Personalize + + Args: + inputs: Mapping [str, Any] : Provide input identifiers in a map. + For example - {'user_id','1'} or + {'user_id':'1', 'item_id':'123'}. You can also pass the + filter_arn, filter_values as an + input. + """ + _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() + callbacks = _run_manager.get_child() + + user_id = inputs.get(USER_ID_INPUT_KEY) + item_id = inputs.get(ITEM_ID_INPUT_KEY) + input_list = inputs.get(INPUT_LIST_INPUT_KEY) + filter_arn = inputs.get(FILTER_ARN_INPUT_KEY) + filter_values = inputs.get(FILTER_VALUES_INPUT_KEY) + promotions = inputs.get(PROMOTIONS_INPUT_KEY) + context = inputs.get(CONTEXT_INPUT_KEY) + metadata_columns = inputs.get(METADATA_COLUMNS_INPUT_KEY) + + intermediate_steps: List = [] + intermediate_steps.append({"Calling Amazon Personalize"}) + + if self.is_ranking_recipe: + response = self.client.get_personalized_ranking( + user_id=str(user_id), + input_list=cast(List[str], input_list), + filter_arn=filter_arn, + filter_values=filter_values, + context=context, + metadata_columns=metadata_columns, + ) + else: + response = self.client.get_recommendations( + user_id=user_id, + item_id=item_id, + filter_arn=filter_arn, + filter_values=filter_values, + context=context, + promotions=promotions, + metadata_columns=metadata_columns, + ) + + _run_manager.on_text("Call to Amazon Personalize complete \n") + + if self.return_direct: + final_result = response + else: + result = self.summarization_chain( + {RESULT_OUTPUT_KEY: response}, callbacks=callbacks + ) + final_result = result[self.summarization_chain.output_key] + + intermediate_steps.append({"context": response}) + chain_result: Dict[str, Any] = {RESULT_OUTPUT_KEY: final_result} + if self.return_intermediate_steps: + chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps + return chain_result + + @property + def _chain_type(self) -> str: + return "amazon_personalize_chain" From 6275d8b1bf88c6b02ec02dad91c3b09461b0ddf3 Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Mon, 19 Feb 2024 19:47:38 +0100 Subject: [PATCH 07/12] docs: Fix AstraDBChatMessageHistory docstrings (#17740) --- .../chat_message_histories/astradb.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/libs/community/langchain_community/chat_message_histories/astradb.py b/libs/community/langchain_community/chat_message_histories/astradb.py index f820480ff26b7..5b118a0ac9f3c 100644 --- a/libs/community/langchain_community/chat_message_histories/astradb.py +++ b/libs/community/langchain_community/chat_message_histories/astradb.py @@ -26,16 +26,16 @@ class AstraDBChatMessageHistory(BaseChatMessageHistory): """Chat message history that stores history in Astra DB. - Args (only keyword-arguments accepted): + Args: session_id: arbitrary key that is used to store the messages of a single chat session. - collection_name (str): name of the Astra DB collection to create/use. - token (Optional[str]): API token for Astra DB usage. - api_endpoint (Optional[str]): full URL to the API endpoint, + collection_name: name of the Astra DB collection to create/use. + token: API token for Astra DB usage. + api_endpoint: full URL to the API endpoint, such as "https://-us-east1.apps.astra.datastax.com". - astra_db_client (Optional[Any]): *alternative to token+api_endpoint*, + astra_db_client: *alternative to token+api_endpoint*, you can pass an already-created 'astrapy.db.AstraDB' instance. - namespace (Optional[str]): namespace (aka keyspace) where the + namespace: namespace (aka keyspace) where the collection is created. Defaults to the database's "default namespace". """ @@ -51,7 +51,6 @@ def __init__( setup_mode: SetupMode = SetupMode.SYNC, pre_delete_collection: bool = False, ) -> None: - """Create an Astra DB chat message history.""" self.astra_env = _AstraDBCollectionEnvironment( collection_name=collection_name, token=token, @@ -96,7 +95,6 @@ def messages(self, messages: List[BaseMessage]) -> None: raise NotImplementedError("Use add_messages instead") async def aget_messages(self) -> List[BaseMessage]: - """Retrieve all session messages from DB""" await self.astra_env.aensure_db_setup() docs = self.async_collection.paginated_find( filter={ @@ -117,7 +115,6 @@ async def aget_messages(self) -> List[BaseMessage]: return messages def add_messages(self, messages: Sequence[BaseMessage]) -> None: - """Write a message to the table""" self.astra_env.ensure_db_setup() docs = [ { @@ -130,7 +127,6 @@ def add_messages(self, messages: Sequence[BaseMessage]) -> None: self.collection.chunked_insert_many(docs) async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None: - """Write a message to the table""" await self.astra_env.aensure_db_setup() docs = [ { @@ -143,11 +139,9 @@ async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None: await self.async_collection.chunked_insert_many(docs) def clear(self) -> None: - """Clear session memory from DB""" self.astra_env.ensure_db_setup() self.collection.delete_many(filter={"session_id": self.session_id}) async def aclear(self) -> None: - """Clear session memory from DB""" await self.astra_env.aensure_db_setup() await self.async_collection.delete_many(filter={"session_id": self.session_id}) From 919ebcc5966fd609d555ed26b8488827b3eac189 Mon Sep 17 00:00:00 2001 From: CogniJT <131272471+CogniJT@users.noreply.github.com> Date: Tue, 20 Feb 2024 00:24:13 +0530 Subject: [PATCH 08/12] community[minor]: CogniSwitch Agent Toolkit for LangChain (#17312) **Description**: CogniSwitch focusses on making GenAI usage more reliable. It abstracts out the complexity & decision making required for tuning processing, storage & retrieval. Using simple APIs documents / URLs can be processed into a Knowledge Graph that can then be used to answer questions. **Dependencies**: No dependencies. Just network calls & API key required **Tag maintainer**: @hwchase17 **Twitter handle**: https://github.com/CogniSwitch **Documentation**: Please check `docs/docs/integrations/toolkits/cogniswitch.ipynb` **Tests**: The usual tool & toolkits tests using `test_imports.py` PR has passed linting and testing before this submission. --------- Co-authored-by: Saicharan Sridhara <145636106+saiCogniswitch@users.noreply.github.com> --- .../integrations/toolkits/cogniswitch.ipynb | 326 ++++++++++++++ .../agent_toolkits/__init__.py | 2 + .../agent_toolkits/cogniswitch/__init__.py | 1 + .../agent_toolkits/cogniswitch/toolkit.py | 40 ++ .../langchain_community/tools/__init__.py | 38 ++ .../tools/cogniswitch/__init__.py | 1 + .../tools/cogniswitch/tool.py | 399 ++++++++++++++++++ .../unit_tests/agent_toolkits/test_imports.py | 1 + .../tests/unit_tests/tools/test_imports.py | 4 + .../tests/unit_tests/tools/test_public_api.py | 4 + 10 files changed, 816 insertions(+) create mode 100644 docs/docs/integrations/toolkits/cogniswitch.ipynb create mode 100644 libs/community/langchain_community/agent_toolkits/cogniswitch/__init__.py create mode 100644 libs/community/langchain_community/agent_toolkits/cogniswitch/toolkit.py create mode 100644 libs/community/langchain_community/tools/cogniswitch/__init__.py create mode 100644 libs/community/langchain_community/tools/cogniswitch/tool.py diff --git a/docs/docs/integrations/toolkits/cogniswitch.ipynb b/docs/docs/integrations/toolkits/cogniswitch.ipynb new file mode 100644 index 0000000000000..836f425cf6055 --- /dev/null +++ b/docs/docs/integrations/toolkits/cogniswitch.ipynb @@ -0,0 +1,326 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "19062701", + "metadata": {}, + "source": [ + "## Cogniswitch Tools\n", + "\n", + "**Use CogniSwitch to build production ready applications that can consume, organize and retrieve knowledge flawlessly. Using the framework of your choice, in this case Langchain CogniSwitch helps alleviate the stress of decision making when it comes to, choosing the right storage and retrieval formats. It also eradicates reliability issues and hallucinations when it comes to responses that are generated. Get started by interacting with your knowledge in just two simple steps.**\n", + "\n", + "visit [https://www.cogniswitch.ai/developer to register](https://www.cogniswitch.ai/developer?utm_source=langchain&utm_medium=langchainbuild&utm_id=dev).\n\n", + "**Registration:** \n\n", + "- Signup with your email and verify your registration \n\n", + "- You will get a mail with a platform token and oauth token for using the services.\n\n\n", + "\n", + "**step 1: Instantiate the toolkit and get the tools:**\n\n", + "- Instantiate the cogniswitch toolkit with the cogniswitch token, openAI API key and OAuth token and get the tools. \n", + "\n", + "**step 2: Instantiate the agent with the tools and llm:**\n", + "- Instantiate the agent with the list of cogniswitch tools and the llm, into the agent executor.\n", + "\n", + "**step 3: CogniSwitch Store Tool:** \n", + "\n", + "***CogniSwitch knowledge source file tool***\n", + "- Use the agent to upload a file by giving the file path.(formats that are currently supported are .pdf, .docx, .doc, .txt, .html) \n", + "- The content from the file will be processed by the cogniswitch and stored in your knowledge store. \n", + "\n", + "***CogniSwitch knowledge source url tool***\n", + "- Use the agent to upload a URL. \n", + "- The content from the url will be processed by the cogniswitch and stored in your knowledge store. \n", + "\n", + "**step 4: CogniSwitch Status Tool:**\n", + "- Use the agent to know the status of the document uploaded with a document name.\n", + "- You can also check the status of document processing in cogniswitch console. \n", + "\n", + "**step 5: CogniSwitch Answer Tool:**\n", + "- Use the agent to ask your question.\n", + "- You will get the answer from your knowledge as the response. \n" + ] + }, + { + "cell_type": "markdown", + "id": "1435b193", + "metadata": {}, + "source": [ + "### Import necessary libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8d86323b", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "import os\n", + "\n", + "from langchain.agents.agent_toolkits import create_conversational_retrieval_agent\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain_community.agent_toolkits import CogniswitchToolkit" + ] + }, + { + "cell_type": "markdown", + "id": "6e6acf0e", + "metadata": {}, + "source": [ + "### Cogniswitch platform token, OAuth token and OpenAI API key" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3d2dfc9f", + "metadata": {}, + "outputs": [], + "source": [ + "cs_token = \"Your CogniSwitch token\"\n", + "OAI_token = \"Your OpenAI API token\"\n", + "oauth_token = \"Your CogniSwitch authentication token\"\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = OAI_token" + ] + }, + { + "cell_type": "markdown", + "id": "320e02fc", + "metadata": {}, + "source": [ + "### Instantiate the cogniswitch toolkit with the credentials" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "89f58167", + "metadata": {}, + "outputs": [], + "source": [ + "cogniswitch_toolkit = CogniswitchToolkit(\n", + " cs_token=cs_token, OAI_token=OAI_token, apiKey=oauth_token\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "16901682", + "metadata": {}, + "source": [ + "### Get the list of cogniswitch tools" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "288d07f6", + "metadata": {}, + "outputs": [], + "source": [ + "tool_lst = cogniswitch_toolkit.get_tools()" + ] + }, + { + "cell_type": "markdown", + "id": "4aae43a3", + "metadata": {}, + "source": [ + "### Instantiate the llm" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4d67e5bb", + "metadata": {}, + "outputs": [], + "source": [ + "llm = ChatOpenAI(\n", + " temperature=0,\n", + " openai_api_key=OAI_token,\n", + " max_tokens=1500,\n", + " model_name=\"gpt-3.5-turbo-0613\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "04179282", + "metadata": {}, + "source": [ + "### Create a agent executor" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2153e758", + "metadata": {}, + "outputs": [], + "source": [ + "agent_executor = create_conversational_retrieval_agent(llm, tool_lst, verbose=False)" + ] + }, + { + "cell_type": "markdown", + "id": "42c9890e", + "metadata": {}, + "source": [ + "### Invoke the agent to upload a URL" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "794b4fba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The URL https://cogniswitch.ai/developer has been uploaded successfully. The status of the document is currently being processed. You will receive an email notification once the processing is complete.\n" + ] + } + ], + "source": [ + "response = agent_executor.invoke(\"upload this url https://cogniswitch.ai/developer\")\n", + "\n", + "print(response[\"output\"])" + ] + }, + { + "cell_type": "markdown", + "id": "544fe8f9", + "metadata": {}, + "source": [ + "### Invoke the agent to upload a File" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "fd0addfc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file example_file.txt has been uploaded successfully. The status of the document is currently being processed. You will receive an email notification once the processing is complete.\n" + ] + } + ], + "source": [ + "response = agent_executor.invoke(\"upload this file example_file.txt\")\n", + "\n", + "print(response[\"output\"])" + ] + }, + { + "cell_type": "markdown", + "id": "02827e1b", + "metadata": {}, + "source": [ + "### Invoke the agent to get the status of a document" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f424e6c5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The status of the document example_file.txt is as follows:\n", + "\n", + "- Created On: 2024-01-22T19:07:42.000+00:00\n", + "- Modified On: 2024-01-22T19:07:42.000+00:00\n", + "- Document Entry ID: 153\n", + "- Status: 0 (Processing)\n", + "- Original File Name: example_file.txt\n", + "- Saved File Name: 1705950460069example_file29393011.txt\n", + "\n", + "The document is currently being processed.\n" + ] + } + ], + "source": [ + "response = agent_executor.invoke(\"Tell me the status of this document example_file.txt\")\n", + "\n", + "print(response[\"output\"])" + ] + }, + { + "cell_type": "markdown", + "id": "0ba9aca9", + "metadata": {}, + "source": [ + "### Invoke the agent with query and get the answer" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "e73e963f", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CogniSwitch can help develop GenAI applications in several ways:\n", + "\n", + "1. Knowledge Extraction: CogniSwitch can extract knowledge from various sources such as documents, websites, and databases. It can analyze and store data from these sources, making it easier to access and utilize the information for GenAI applications.\n", + "\n", + "2. Natural Language Processing: CogniSwitch has advanced natural language processing capabilities. It can understand and interpret human language, allowing GenAI applications to interact with users in a more conversational and intuitive manner.\n", + "\n", + "3. Sentiment Analysis: CogniSwitch can analyze the sentiment of text data, such as customer reviews or social media posts. This can be useful in developing GenAI applications that can understand and respond to the emotions and opinions of users.\n", + "\n", + "4. Knowledge Base Integration: CogniSwitch can integrate with existing knowledge bases or create new ones. This allows GenAI applications to access a vast amount of information and provide accurate and relevant responses to user queries.\n", + "\n", + "5. Document Analysis: CogniSwitch can analyze documents and extract key information such as entities, relationships, and concepts. This can be valuable in developing GenAI applications that can understand and process large amounts of textual data.\n", + "\n", + "Overall, CogniSwitch provides a range of AI-powered capabilities that can enhance the development of GenAI applications by enabling knowledge extraction, natural language processing, sentiment analysis, knowledge base integration, and document analysis.\n" + ] + } + ], + "source": [ + "response = agent_executor.invoke(\"How can cogniswitch help develop GenAI applications?\")\n", + "\n", + "print(response[\"output\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "langchain_repo", + "language": "python", + "name": "langchain_repo" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/community/langchain_community/agent_toolkits/__init__.py b/libs/community/langchain_community/agent_toolkits/__init__.py index 3f6bf3033190d..bbb3820cb3fac 100644 --- a/libs/community/langchain_community/agent_toolkits/__init__.py +++ b/libs/community/langchain_community/agent_toolkits/__init__.py @@ -18,6 +18,7 @@ from langchain_community.agent_toolkits.azure_cognitive_services import ( AzureCognitiveServicesToolkit, ) +from langchain_community.agent_toolkits.cogniswitch.toolkit import CogniswitchToolkit from langchain_community.agent_toolkits.connery import ConneryToolkit from langchain_community.agent_toolkits.file_management.toolkit import ( FileManagementToolkit, @@ -51,6 +52,7 @@ "AINetworkToolkit", "AmadeusToolkit", "AzureCognitiveServicesToolkit", + "CogniswitchToolkit", "ConneryToolkit", "FileManagementToolkit", "GmailToolkit", diff --git a/libs/community/langchain_community/agent_toolkits/cogniswitch/__init__.py b/libs/community/langchain_community/agent_toolkits/cogniswitch/__init__.py new file mode 100644 index 0000000000000..df1d84976c49a --- /dev/null +++ b/libs/community/langchain_community/agent_toolkits/cogniswitch/__init__.py @@ -0,0 +1 @@ +"""CogniSwitch Toolkit""" diff --git a/libs/community/langchain_community/agent_toolkits/cogniswitch/toolkit.py b/libs/community/langchain_community/agent_toolkits/cogniswitch/toolkit.py new file mode 100644 index 0000000000000..36ec5ae0f3609 --- /dev/null +++ b/libs/community/langchain_community/agent_toolkits/cogniswitch/toolkit.py @@ -0,0 +1,40 @@ +from typing import List + +from langchain_community.agent_toolkits.base import BaseToolkit +from langchain_community.tools import BaseTool +from langchain_community.tools.cogniswitch.tool import ( + CogniswitchKnowledgeRequest, + CogniswitchKnowledgeSourceFile, + CogniswitchKnowledgeSourceURL, + CogniswitchKnowledgeStatus, +) + + +class CogniswitchToolkit(BaseToolkit): + """ + Toolkit for CogniSwitch. + + Use the toolkit to get all the tools present in the cogniswitch and + use them to interact with your knowledge + """ + + cs_token: str # cogniswitch token + OAI_token: str # OpenAI API token + apiKey: str # Cogniswitch OAuth token + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return [ + CogniswitchKnowledgeStatus( + cs_token=self.cs_token, OAI_token=self.OAI_token, apiKey=self.apiKey + ), + CogniswitchKnowledgeRequest( + cs_token=self.cs_token, OAI_token=self.OAI_token, apiKey=self.apiKey + ), + CogniswitchKnowledgeSourceFile( + cs_token=self.cs_token, OAI_token=self.OAI_token, apiKey=self.apiKey + ), + CogniswitchKnowledgeSourceURL( + cs_token=self.cs_token, OAI_token=self.OAI_token, apiKey=self.apiKey + ), + ] diff --git a/libs/community/langchain_community/tools/__init__.py b/libs/community/langchain_community/tools/__init__.py index 3456ef10bc1e6..59ad157de5bf8 100644 --- a/libs/community/langchain_community/tools/__init__.py +++ b/libs/community/langchain_community/tools/__init__.py @@ -118,6 +118,32 @@ def _import_brave_search_tool() -> Any: return BraveSearch +def _import_cogniswitch_store_file_tool() -> Any: + from langchain_community.tools.cogniswitch.tool import ( + CogniswitchKnowledgeSourceFile, + ) + + return CogniswitchKnowledgeSourceFile + + +def _import_cogniswitch_store_url_tool() -> Any: + from langchain_community.tools.cogniswitch.tool import CogniswitchKnowledgeSourceURL + + return CogniswitchKnowledgeSourceURL + + +def _import_cogniswitch_answer_tool() -> Any: + from langchain_community.tools.cogniswitch.tool import CogniswitchKnowledgeRequest + + return CogniswitchKnowledgeRequest + + +def _import_cogniswitch_knowledge_status_tool() -> Any: + from langchain_community.tools.cogniswitch.tool import CogniswitchKnowledgeStatus + + return CogniswitchKnowledgeStatus + + def _import_connery_tool() -> Any: from langchain_community.tools.connery import ConneryAction @@ -803,6 +829,14 @@ def __getattr__(name: str) -> Any: return _import_bing_search_tool_BingSearchRun() elif name == "BraveSearch": return _import_brave_search_tool() + elif name == "CogniswitchKnowledgeSourceFile": + return _import_cogniswitch_store_file_tool() + elif name == "CogniswitchKnowledgeSourceURL": + return _import_cogniswitch_store_url_tool() + elif name == "CogniswitchKnowledgeRequest": + return _import_cogniswitch_answer_tool() + elif name == "CogniswitchKnowledgeStatus": + return _import_cogniswitch_knowledge_status_tool() elif name == "ConneryAction": return _import_connery_tool() elif name == "DuckDuckGoSearchResults": @@ -1043,6 +1077,10 @@ def __getattr__(name: str) -> Any: "BingSearchRun", "BraveSearch", "ClickTool", + "CogniswitchKnowledgeSourceFile", + "CogniswitchKnowledgeSourceURL", + "CogniswitchKnowledgeRequest", + "CogniswitchKnowledgeStatus", "ConneryAction", "CopyFileTool", "CurrentWebPageTool", diff --git a/libs/community/langchain_community/tools/cogniswitch/__init__.py b/libs/community/langchain_community/tools/cogniswitch/__init__.py new file mode 100644 index 0000000000000..3a89a8d7d3a9a --- /dev/null +++ b/libs/community/langchain_community/tools/cogniswitch/__init__.py @@ -0,0 +1 @@ +"Cogniswitch Tools" diff --git a/libs/community/langchain_community/tools/cogniswitch/tool.py b/libs/community/langchain_community/tools/cogniswitch/tool.py new file mode 100644 index 0000000000000..e2878e6ed544e --- /dev/null +++ b/libs/community/langchain_community/tools/cogniswitch/tool.py @@ -0,0 +1,399 @@ +from __future__ import annotations + +from typing import Any, Dict, Optional + +import requests +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.tools import BaseTool + + +class CogniswitchKnowledgeRequest(BaseTool): + """ + A tool for interacting with the Cogniswitch service to answer questions. + name: str = "cogniswitch_knowledge_request" + description: str = ( + "A wrapper around cogniswitch service to answer the question + from the knowledge base." + "Input should be a search query." + ) + """ + + name: str = "cogniswitch_knowledge_request" + description: str = """A wrapper around cogniswitch service to + answer the question from the knowledge base.""" + cs_token: str + OAI_token: str + apiKey: str + api_url = "https://api.cogniswitch.ai:8243/cs-api/0.0.1/cs/knowledgeRequest" + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict[str, Any]: + """ + Use the tool to answer a query. + + Args: + query (str): Natural language query, + that you would like to ask to your knowledge graph. + run_manager (Optional[CallbackManagerForChainRun]): + Manager for chain run callbacks. + + Returns: + Dict[str, Any]: Output dictionary containing + the 'response' from the service. + """ + response = self.answer_cs(self.cs_token, self.OAI_token, query, self.apiKey) + return response + + def answer_cs(self, cs_token: str, OAI_token: str, query: str, apiKey: str) -> dict: + """ + Send a query to the Cogniswitch service and retrieve the response. + + Args: + cs_token (str): Cogniswitch token. + OAI_token (str): OpenAI token. + apiKey (str): OAuth token. + query (str): Query to be answered. + + Returns: + dict: Response JSON from the Cogniswitch service. + """ + if not cs_token: + raise ValueError("Missing cs_token") + if not OAI_token: + raise ValueError("Missing OpenAI token") + if not apiKey: + raise ValueError("Missing cogniswitch OAuth token") + if not query: + raise ValueError("Missing input query") + + headers = { + "apiKey": apiKey, + "platformToken": cs_token, + "openAIToken": OAI_token, + } + + data = {"query": query} + response = requests.post(self.api_url, headers=headers, verify=False, data=data) + return response.json() + + +class CogniswitchKnowledgeStatus(BaseTool): + """ + A cogniswitch tool for interacting with the Cogniswitch services to know the + status of the document or url uploaded. + name: str = "cogniswitch_knowledge_status" + description: str = ( + "A wrapper around cogniswitch services to know the status of + the document uploaded from a url or a file. " + "Input should be a file name or the url link" + ) + """ + + name: str = "cogniswitch_knowledge_status" + description: str = """A wrapper around cogniswitch services to know + the status of the document uploaded from a url or a file.""" + cs_token: str + OAI_token: str + apiKey: str + knowledge_status_url = ( + "https://api.cogniswitch.ai:8243/cs-api/0.0.1/cs/knowledgeSource/status" + ) + + def _run( + self, + document_name: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict[str, Any]: + """ + Use the tool to know the status of the document uploaded. + + Args: + document_name (str): name of the document or + the url uploaded + run_manager (Optional[CallbackManagerForChainRun]): + Manager for chain run callbacks. + + Returns: + Dict[str, Any]: Output dictionary containing + the 'response' from the service. + """ + response = self.knowledge_status(document_name) + return response + + def knowledge_status(self, document_name: str) -> dict: + """ + Use this function to know the status of the document or the URL uploaded + Args: + document_name (str): The document name or the url that is uploaded. + + Returns: + dict: Response JSON from the Cogniswitch service. + """ + + params = {"docName": document_name, "platformToken": self.cs_token} + headers = { + "apiKey": self.apiKey, + "openAIToken": self.OAI_token, + "platformToken": self.cs_token, + } + response = requests.get( + self.knowledge_status_url, + headers=headers, + params=params, + verify=False, + ) + if response.status_code == 200: + source_info = response.json() + source_data = dict(source_info[-1]) + status = source_data.get("status") + if status == 0: + source_data["status"] = "SUCCESS" + elif status == 1: + source_data["status"] = "PROCESSING" + elif status == 2: + source_data["status"] = "UPLOADED" + elif status == 3: + source_data["status"] = "FAILURE" + elif status == 4: + source_data["status"] = "UPLOAD_FAILURE" + elif status == 5: + source_data["status"] = "REJECTED" + + if "filePath" in source_data.keys(): + source_data.pop("filePath") + if "savedFileName" in source_data.keys(): + source_data.pop("savedFileName") + if "integrationConfigId" in source_data.keys(): + source_data.pop("integrationConfigId") + if "metaData" in source_data.keys(): + source_data.pop("metaData") + if "docEntryId" in source_data.keys(): + source_data.pop("docEntryId") + return source_data + else: + # error_message = response.json()["message"] + return { + "message": response.status_code, + } + + +class CogniswitchKnowledgeSourceFile(BaseTool): + """ + A cogniswitch tool for interacting with the Cogniswitch services to store data. + name: str = "cogniswitch_knowledge_source_file" + description: str = ( + "This calls the CogniSwitch services to analyze & store data from a file. + If the input looks like a file path, assign that string value to file key. + Assign document name & description only if provided in input." + ) + """ + + name: str = "cogniswitch_knowledge_source_file" + description: str = """ + This calls the CogniSwitch services to analyze & store data from a file. + If the input looks like a file path, assign that string value to file key. + Assign document name & description only if provided in input. + """ + cs_token: str + OAI_token: str + apiKey: str + knowledgesource_file = ( + "https://api.cogniswitch.ai:8243/cs-api/0.0.1/cs/knowledgeSource/file" + ) + + def _run( + self, + file: Optional[str] = None, + document_name: Optional[str] = None, + document_description: Optional[str] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict[str, Any]: + """ + Execute the tool to store the data given from a file. + This calls the CogniSwitch services to analyze & store data from a file. + If the input looks like a file path, assign that string value to file key. + Assign document name & description only if provided in input. + + Args: + file Optional[str]: The file path of your knowledge + document_name Optional[str]: Name of your knowledge document + document_description Optional[str]: Description of your knowledge document + run_manager (Optional[CallbackManagerForChainRun]): + Manager for chain run callbacks. + + Returns: + Dict[str, Any]: Output dictionary containing + the 'response' from the service. + """ + if not file: + return { + "message": "No input provided", + } + else: + response = self.store_data( + file=file, + document_name=document_name, + document_description=document_description, + ) + return response + + def store_data( + self, + file: Optional[str], + document_name: Optional[str], + document_description: Optional[str], + ) -> dict: + """ + Store data using the Cogniswitch service. + This calls the CogniSwitch services to analyze & store data from a file. + If the input looks like a file path, assign that string value to file key. + Assign document name & description only if provided in input. + + Args: + file (Optional[str]): file path of your file. + the current files supported by the files are + .txt, .pdf, .docx, .doc, .html + document_name (Optional[str]): Name of the document you are uploading. + document_description (Optional[str]): Description of the document. + + Returns: + dict: Response JSON from the Cogniswitch service. + """ + headers = { + "apiKey": self.apiKey, + "openAIToken": self.OAI_token, + "platformToken": self.cs_token, + } + data: Dict[str, Any] + if not document_name: + document_name = "" + if not document_description: + document_description = "" + + if file is not None: + files = {"file": open(file, "rb")} + + data = { + "documentName": document_name, + "documentDescription": document_description, + } + response = requests.post( + self.knowledgesource_file, + headers=headers, + verify=False, + data=data, + files=files, + ) + if response.status_code == 200: + return response.json() + else: + return {"message": "Bad Request"} + + +class CogniswitchKnowledgeSourceURL(BaseTool): + """ + A cogniswitch tool for interacting with the Cogniswitch services to store data. + name: str = "cogniswitch_knowledge_source_url" + description: str = ( + "This calls the CogniSwitch services to analyze & store data from a url. + the URL is provided in input, assign that value to the url key. + Assign document name & description only if provided in input" + ) + """ + + name: str = "cogniswitch_knowledge_source_url" + description: str = """ + This calls the CogniSwitch services to analyze & store data from a url. + the URL is provided in input, assign that value to the url key. + Assign document name & description only if provided in input""" + cs_token: str + OAI_token: str + apiKey: str + knowledgesource_url = ( + "https://api.cogniswitch.ai:8243/cs-api/0.0.1/cs/knowledgeSource/url" + ) + + def _run( + self, + url: Optional[str] = None, + document_name: Optional[str] = None, + document_description: Optional[str] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict[str, Any]: + """ + Execute the tool to store the data given from a url. + This calls the CogniSwitch services to analyze & store data from a url. + the URL is provided in input, assign that value to the url key. + Assign document name & description only if provided in input. + + Args: + url Optional[str]: The website/url link of your knowledge + document_name Optional[str]: Name of your knowledge document + document_description Optional[str]: Description of your knowledge document + run_manager (Optional[CallbackManagerForChainRun]): + Manager for chain run callbacks. + + Returns: + Dict[str, Any]: Output dictionary containing + the 'response' from the service. + """ + if not url: + return { + "message": "No input provided", + } + response = self.store_data( + url=url, + document_name=document_name, + document_description=document_description, + ) + return response + + def store_data( + self, + url: Optional[str], + document_name: Optional[str], + document_description: Optional[str], + ) -> dict: + """ + Store data using the Cogniswitch service. + This calls the CogniSwitch services to analyze & store data from a url. + the URL is provided in input, assign that value to the url key. + Assign document name & description only if provided in input. + + Args: + url (Optional[str]): URL link. + document_name (Optional[str]): Name of the document you are uploading. + document_description (Optional[str]): Description of the document. + + Returns: + dict: Response JSON from the Cogniswitch service. + """ + headers = { + "apiKey": self.apiKey, + "openAIToken": self.OAI_token, + "platformToken": self.cs_token, + } + data: Dict[str, Any] + if not document_name: + document_name = "" + if not document_description: + document_description = "" + if not url: + return { + "message": "No input provided", + } + else: + data = {"url": url} + response = requests.post( + self.knowledgesource_url, + headers=headers, + verify=False, + data=data, + ) + if response.status_code == 200: + return response.json() + else: + return {"message": "Bad Request"} diff --git a/libs/community/tests/unit_tests/agent_toolkits/test_imports.py b/libs/community/tests/unit_tests/agent_toolkits/test_imports.py index 3a7ca10efdf26..c2dbdd3833399 100644 --- a/libs/community/tests/unit_tests/agent_toolkits/test_imports.py +++ b/libs/community/tests/unit_tests/agent_toolkits/test_imports.py @@ -28,6 +28,7 @@ "create_pbi_chat_agent", "create_spark_sql_agent", "create_sql_agent", + "CogniswitchToolkit", ] diff --git a/libs/community/tests/unit_tests/tools/test_imports.py b/libs/community/tests/unit_tests/tools/test_imports.py index 4bf70aa0842f9..95fd4315575a5 100644 --- a/libs/community/tests/unit_tests/tools/test_imports.py +++ b/libs/community/tests/unit_tests/tools/test_imports.py @@ -24,6 +24,10 @@ "BingSearchRun", "BraveSearch", "ClickTool", + "CogniswitchKnowledgeSourceFile", + "CogniswitchKnowledgeSourceURL", + "CogniswitchKnowledgeRequest", + "CogniswitchKnowledgeStatus", "ConneryAction", "CopyFileTool", "CurrentWebPageTool", diff --git a/libs/community/tests/unit_tests/tools/test_public_api.py b/libs/community/tests/unit_tests/tools/test_public_api.py index 31ea8327022e1..1595dd4710917 100644 --- a/libs/community/tests/unit_tests/tools/test_public_api.py +++ b/libs/community/tests/unit_tests/tools/test_public_api.py @@ -25,6 +25,10 @@ "BingSearchRun", "BraveSearch", "ClickTool", + "CogniswitchKnowledgeSourceFile", + "CogniswitchKnowledgeStatus", + "CogniswitchKnowledgeSourceURL", + "CogniswitchKnowledgeRequest", "ConneryAction", "CopyFileTool", "CurrentWebPageTool", From ea61302f71ee2a05f2e24b43372c34239b340697 Mon Sep 17 00:00:00 2001 From: Karim Lalani Date: Mon, 19 Feb 2024 12:54:52 -0600 Subject: [PATCH 09/12] community[patch]: bug fix - add empty metadata when metadata not provided (#17669) Code fix to include empty medata dictionary to aadd_texts if metadata is not provided. --- libs/community/langchain_community/vectorstores/surrealdb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/community/langchain_community/vectorstores/surrealdb.py b/libs/community/langchain_community/vectorstores/surrealdb.py index ef65c5ec6b002..34f002305e1fd 100644 --- a/libs/community/langchain_community/vectorstores/surrealdb.py +++ b/libs/community/langchain_community/vectorstores/surrealdb.py @@ -116,6 +116,8 @@ async def aadd_texts( data = {"text": text, "embedding": embeddings[idx]} if metadatas is not None and idx < len(metadatas): data["metadata"] = metadatas[idx] # type: ignore[assignment] + else: + data["metadata"] = [] record = await self.sdb.create( self.collection, data, From ad285ca15c9f0bc14a90824717b8e990f19245f4 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Mon, 19 Feb 2024 12:13:33 -0700 Subject: [PATCH 10/12] community[patch]: Release 0.0.21 (#17750) --- libs/community/poetry.lock | 24 ++++++++++++++++++++---- libs/community/pyproject.toml | 4 ++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/libs/community/poetry.lock b/libs/community/poetry.lock index 1a1dd4ec4cb52..be5c69eebb2ad 100644 --- a/libs/community/poetry.lock +++ b/libs/community/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aenum" @@ -3140,7 +3140,6 @@ files = [ {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:227b178b22a7f91ae88525810441791b1ca1fc71c86f03190911793be15cec3d"}, {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:780eb6383fbae12afa819ef676fc93e1548ae4b076c004a393af26a04b460742"}, {file = "jq-1.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08ded6467f4ef89fec35b2bf310f210f8cd13fbd9d80e521500889edf8d22441"}, - {file = "jq-1.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49e44ed677713f4115bd5bf2dbae23baa4cd503be350e12a1c1f506b0687848f"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:984f33862af285ad3e41e23179ac4795f1701822473e1a26bf87ff023e5a89ea"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42264fafc6166efb5611b5d4cb01058887d050a6c19334f6a3f8a13bb369df5"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a67154f150aaf76cc1294032ed588436eb002097dd4fd1e283824bf753a05080"}, @@ -3224,6 +3223,7 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -3650,7 +3650,7 @@ files = [ [[package]] name = "langchain-core" -version = "0.1.23" +version = "0.1.24" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -5457,6 +5457,8 @@ files = [ {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, + {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, + {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, @@ -5499,6 +5501,7 @@ files = [ {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, @@ -5507,6 +5510,8 @@ files = [ {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, @@ -6490,6 +6495,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -6497,8 +6503,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -6515,6 +6529,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -6522,6 +6537,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -9146,4 +9162,4 @@ extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "as [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "a012fa21f1c46644a7efbfd0c9174c92fa8b183b7b0fe1d3ae6ed57797de43e9" +content-hash = "5fdd9b2eb766411463fa27e19433daf5d5325f2af01ddd93b6a594e3e02a31de" diff --git a/libs/community/pyproject.toml b/libs/community/pyproject.toml index 7df0de6149d3b..abf8dc89b6384 100644 --- a/libs/community/pyproject.toml +++ b/libs/community/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-community" -version = "0.0.20" +version = "0.0.21" description = "Community contributed LangChain integrations." authors = [] license = "MIT" @@ -9,7 +9,7 @@ repository = "https://github.com/langchain-ai/langchain" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.1.21,<0.2" +langchain-core = ">=0.1.24,<0.2" SQLAlchemy = ">=1.4,<3" requests = "^2" PyYAML = ">=5.3" From a9d3c100a20634a257898594db7c1cdfdb60af9e Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Mon, 19 Feb 2024 12:22:31 -0700 Subject: [PATCH 11/12] infra: PR template nits (#17752) --- .github/PULL_REQUEST_TEMPLATE.md | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 4d86dac6a59cf..8d776064019a7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,19 +1,24 @@ Thank you for contributing to LangChain! -Checklist: - -- [ ] PR title: Please title your PR "package: description", where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. +- [ ] **PR title**: "package: description" + - Where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" -- [ ] PR message: **Delete this entire template message** and replace it with the following bulleted list + + +- [ ] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** a description of the change - **Issue:** the issue # it fixes, if applicable - **Dependencies:** any dependencies required for this change - **Twitter handle:** if your PR gets announced, and you'd like a mention, we'll gladly shout you out! -- [ ] Pass lint and test: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified to check that you're passing lint and testing. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ -- [ ] Add tests and docs: If you're adding a new integration, please include + + +- [ ] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. + +- [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ + Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. From 441448372d5dc649bfd0b8069ea18c18373a0341 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Mon, 19 Feb 2024 12:27:37 -0700 Subject: [PATCH 12/12] langchain[patch]: Release 0.1.8 (#17751) --- libs/langchain/poetry.lock | 35 +++++++++++++++++++++++++++++++---- libs/langchain/pyproject.toml | 4 ++-- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index a8e98f8c65b43..a34dda564d618 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiodns" @@ -3049,7 +3049,6 @@ files = [ {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:227b178b22a7f91ae88525810441791b1ca1fc71c86f03190911793be15cec3d"}, {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:780eb6383fbae12afa819ef676fc93e1548ae4b076c004a393af26a04b460742"}, {file = "jq-1.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08ded6467f4ef89fec35b2bf310f210f8cd13fbd9d80e521500889edf8d22441"}, - {file = "jq-1.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49e44ed677713f4115bd5bf2dbae23baa4cd503be350e12a1c1f506b0687848f"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:984f33862af285ad3e41e23179ac4795f1701822473e1a26bf87ff023e5a89ea"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42264fafc6166efb5611b5d4cb01058887d050a6c19334f6a3f8a13bb369df5"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a67154f150aaf76cc1294032ed588436eb002097dd4fd1e283824bf753a05080"}, @@ -3133,6 +3132,7 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -3474,7 +3474,7 @@ url = "../community" [[package]] name = "langchain-core" -version = "0.1.23" +version = "0.1.24" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -3743,6 +3743,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -5269,6 +5279,8 @@ files = [ {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, + {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, + {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, @@ -5311,6 +5323,7 @@ files = [ {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, @@ -5319,6 +5332,8 @@ files = [ {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, @@ -5791,6 +5806,7 @@ files = [ {file = "pymongo-4.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6422b6763b016f2ef2beedded0e546d6aa6ba87910f9244d86e0ac7690f75c96"}, {file = "pymongo-4.5.0-cp312-cp312-win32.whl", hash = "sha256:77cfff95c1fafd09e940b3fdcb7b65f11442662fad611d0e69b4dd5d17a81c60"}, {file = "pymongo-4.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:e57d859b972c75ee44ea2ef4758f12821243e99de814030f69a3decb2aa86807"}, + {file = "pymongo-4.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8443f3a8ab2d929efa761c6ebce39a6c1dca1c9ac186ebf11b62c8fe1aef53f4"}, {file = "pymongo-4.5.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:2b0176f9233a5927084c79ff80b51bd70bfd57e4f3d564f50f80238e797f0c8a"}, {file = "pymongo-4.5.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:89b3f2da57a27913d15d2a07d58482f33d0a5b28abd20b8e643ab4d625e36257"}, {file = "pymongo-4.5.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:5caee7bd08c3d36ec54617832b44985bd70c4cbd77c5b313de6f7fce0bb34f93"}, @@ -6307,6 +6323,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -6314,8 +6331,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -6332,6 +6357,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -6339,6 +6365,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -9123,4 +9150,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "e3419e64eee15fa3e93c00e2b76d4a29d9b08c4299291ec3fc65802b2aede5c0" +content-hash = "fe87b11389305796e6942af6b3e86632d8ba7fdc75b2ab93d7a9a566e3c71086" diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index 89387d7391033..2c9ec410757a2 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain" -version = "0.1.7" +version = "0.1.8" description = "Building applications with LLMs through composability" authors = [] license = "MIT" @@ -12,7 +12,7 @@ langchain-server = "langchain.server:main" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.1.22,<0.2" +langchain-core = ">=0.1.24,<0.2" langchain-community = ">=0.0.20,<0.1" langsmith = "^0.1.0" pydantic = ">=1,<3"