Merge branch 'master' into law_2

langchain-ai · Apr 23, 2024 · 6469e07 · 6469e07
2 parents 37d3a38 + ed98060
commit 6469e07
Show file tree

Hide file tree

Showing 21 changed files with 103 additions and 73 deletions.
diff --git a/cookbook/Semi_Structured_RAG.ipynb b/cookbook/Semi_Structured_RAG.ipynb
@@ -75,7 +75,7 @@
     "\n",
     "Apply to the [`LLaMA2`](https://arxiv.org/pdf/2307.09288.pdf) paper. \n",
     "\n",
-    "We use the Unstructured [`partition_pdf`](https://unstructured-io.github.io/unstructured/bricks/partition.html#partition-pdf), which segments a PDF document by using a layout model. \n",
+    "We use the Unstructured [`partition_pdf`](https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf), which segments a PDF document by using a layout model. \n",
     "\n",
     "This layout model makes it possible to extract elements, such as tables, from pdfs. \n",
     "\n",

diff --git a/docs/.local_build.sh b/docs/.local_build.sh
@@ -19,6 +19,9 @@ poetry run python scripts/copy_templates.py
 wget -q https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O docs/langserve.md
 wget -q https://raw.githubusercontent.com/langchain-ai/langgraph/main/README.md -O docs/langgraph.md
 
-yarn
 
-poetry run quarto preview docs
+poetry run quarto render docs
+poetry run python scripts/generate_api_reference_links.py  --docs_dir docs
+
+yarn
+yarn start
diff --git a/docs/api_reference/guide_imports.json b/docs/api_reference/guide_imports.json
diff --git a/docs/docs/integrations/document_loaders/google_drive.ipynb b/docs/docs/integrations/document_loaders/google_drive.ipynb
@@ -50,7 +50,7 @@
    },
    "outputs": [],
    "source": [
-    "from langchain_community.document_loaders import GoogleDriveLoader"
+    "from langchain_google_community import GoogleDriveLoader"
    ]
   },
   {
@@ -339,7 +339,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from langchain_community.document_loaders import GoogleDriveLoader\n",
+    "from langchain_google_community import GoogleDriveLoader\n",
     "\n",
     "loader = GoogleDriveLoader(\n",
     "    folder_id=folder_id,\n",

diff --git a/docs/docs/integrations/graphs/memgraph.ipynb b/docs/docs/integrations/graphs/memgraph.ipynb
@@ -19,26 +19,22 @@
     "\n",
     "To complete this tutorial, you will need [Docker](https://www.docker.com/get-started/) and [Python 3.x](https://www.python.org/) installed.\n",
     "\n",
-    "Ensure you have a running `Memgraph` instance. You can download and run it in a local Docker container by executing the following script:\n",
+    "Ensure you have a running Memgraph instance. To quickly run Memgraph Platform (Memgraph database + MAGE library + Memgraph Lab) for the first time, do the following:\n",
+    "\n",
+    "On Linux/MacOS:\n",
     "```\n",
-    "docker run \\\n",
-    "    -it \\\n",
-    "    -p 7687:7687 \\\n",
-    "    -p 7444:7444 \\\n",
-    "    -p 3000:3000 \\\n",
-    "    -e MEMGRAPH=\"--bolt-server-name-for-init=Neo4j/\" \\\n",
-    "    -v mg_lib:/var/lib/memgraph memgraph/memgraph-platform\n",
+    "curl https://install.memgraph.com | sh\n",
     "```\n",
     "\n",
-    "You will need to wait a few seconds for the database to start. If the process is completed successfully, you should see something like this:\n",
+    "On Windows:\n",
     "```\n",
-    "mgconsole X.X\n",
-    "Connected to 'memgraph://127.0.0.1:7687'\n",
-    "Type :help for shell usage\n",
-    "Quit the shell by typing Ctrl-D(eof) or :quit\n",
-    "memgraph>\n",
+    "iwr https://windows.memgraph.com | iex\n",
     "```\n",
     "\n",
+    "Both commands run a script that downloads a Docker Compose file to your system, builds and starts `memgraph-mage` and `memgraph-lab` Docker services in two separate containers. \n",
+    "\n",
+    "Read more about the installation process on [Memgraph documentation](https://memgraph.com/docs/getting-started/install-memgraph).\n",
+    "\n",
     "Now you can start playing with `Memgraph`!"
    ]
   },
@@ -89,7 +85,7 @@
    "id": "95ba37a4",
    "metadata": {},
    "source": [
-    "We're utilizing the Python library [GQLAlchemy](https://github.com/memgraph/gqlalchemy) to establish a connection between our Memgraph database and Python script. To execute queries, we can set up a Memgraph instance as follows:"
+    "We're utilizing the Python library [GQLAlchemy](https://github.com/memgraph/gqlalchemy) to establish a connection between our Memgraph database and Python script. You can establish the connection to a running Memgraph instance with the Neo4j driver as well, since it's compatible with Memgraph. To execute queries with GQLAlchemy, we can set up a Memgraph instance as follows:"
    ]
   },
   {

diff --git a/docs/scripts/generate_api_reference_links.py b/docs/scripts/generate_api_reference_links.py
@@ -13,10 +13,10 @@
 _BASE_URL = "https://api.python.langchain.com/en/latest/"
 
 # Regular expression to match Python code blocks
-code_block_re = re.compile(r"^(```python\n)(.*?)(```\n)", re.DOTALL | re.MULTILINE)
+code_block_re = re.compile(r"^(```\s?python\n)(.*?)(```)", re.DOTALL | re.MULTILINE)
 # Regular expression to match langchain import lines
 _IMPORT_RE = re.compile(
-    r"from\s+(langchain\.\w+(\.\w+)*?)\s+import\s+"
+    r"from\s+(langchain(?:_\w+)?(?:\.\w+)*?)\s+import\s+"
     r"((?:\w+(?:,\s*)?)*"  # Match zero or more words separated by a comma+optional ws
     r"(?:\s*\(.*?\))?)",  # Match optional parentheses block
     re.DOTALL,  # Match newlines as well
@@ -64,13 +64,14 @@ def main():
     global_imports = {}
 
     for file in find_files(args.docs_dir):
-        print(f"Adding links for imports in {file}")  # noqa: T201
         file_imports = replace_imports(file)
 
         if file_imports:
             # Use relative file path as key
             relative_path = (
-                os.path.relpath(file, _DOCS_DIR).replace(".mdx", "").replace(".md", "")
+                os.path.relpath(file, args.docs_dir)
+                .replace(".mdx", "/")
+                .replace(".md", "/")
             )
 
             doc_url = f"https://python.langchain.com/docs/{relative_path}"
@@ -122,8 +123,10 @@ def replacer(match):
         imports = []
         for import_match in _IMPORT_RE.finditer(code):
             module = import_match.group(1)
+            if "pydantic_v1" in module:
+                continue
             imports_str = (
-                import_match.group(3).replace("(\n", "").replace("\n)", "")
+                import_match.group(2).replace("(\n", "").replace("\n)", "")
             )  # Handle newlines within parentheses
             # remove any newline and spaces, then split by comma
             imported_classes = [
@@ -140,7 +143,8 @@ def replacer(match):
                 except ImportError as e:
                     logger.warning(f"Failed to load for class {class_name}, {e}")
                     continue
-
+                if len(module_path.split(".")) < 2:
+                    continue
                 url = (
                     _BASE_URL
                     + module_path.split(".")[1]
@@ -174,6 +178,8 @@ def replacer(match):
     # Use re.sub to replace each Python code block
     data = code_block_re.sub(replacer, data)
 
+    if all_imports:
+        print(f"Adding {len(all_imports)} links for imports in {file}")  # noqa: T201
     with open(file, "w") as f:
         f.write(data)
     return all_imports

diff --git a/docs/vercel_build.sh b/docs/vercel_build.sh
@@ -13,7 +13,9 @@ export PATH=$PATH:$(pwd)/quarto-1.3.450/bin/
 python3 -m venv .venv
 source .venv/bin/activate
 python3 -m pip install --upgrade pip
-python3 -m pip install -r vercel_requirements.txt
+python3 -m pip install --upgrade uv
+python3 -m uv pip install -r vercel_requirements.txt
+python3 -m uv pip install -e $(ls ../libs/partners | grep -vE "airbyte|ibm|.md" | xargs -I {} echo "../libs/partners/{}")
 
 # autogenerate integrations tables
 python3 scripts/model_feat_table.py
@@ -33,3 +35,4 @@ python3 scripts/resolve_local_links.py docs/langgraph.md https://github.com/lang
 
 # render
 quarto render docs/
+python3 scripts/generate_api_reference_links.py --docs_dir docs
diff --git a/docs/vercel_requirements.txt b/docs/vercel_requirements.txt
@@ -1,4 +1,11 @@
 -e ../libs/langchain
 -e ../libs/community
 -e ../libs/core
+-e ../libs/experimental
+-e ../libs/text-splitters
+langchain-cohere
+langchain-astradb
+langchain-nvidia-ai-endpoints
+langchain-elasticsearch
+langchain-postgres
 urllib3==1.26.18
diff --git a/libs/community/langchain_community/agent_toolkits/base.py b/libs/community/langchain_community/agent_toolkits/base.py
@@ -1,15 +1,4 @@
 """Toolkits for agents."""
-from abc import ABC, abstractmethod
-from typing import List
+from langchain_core.tools import BaseToolkit
 
-from langchain_core.pydantic_v1 import BaseModel
-
-from langchain_community.tools import BaseTool
-
-
-class BaseToolkit(BaseModel, ABC):
-    """Base Toolkit representing a collection of related tools."""
-
-    @abstractmethod
-    def get_tools(self) -> List[BaseTool]:
-        """Get the tools in the toolkit."""
+__all__ = ["BaseToolkit"]
diff --git a/libs/community/langchain_community/chat_message_histories/cassandra.py b/libs/community/langchain_community/chat_message_histories/cassandra.py
@@ -3,6 +3,7 @@
 
 import json
 import typing
+import uuid
 from typing import List
 
 if typing.TYPE_CHECKING:
@@ -41,32 +42,47 @@ def __init__(
         ttl_seconds: typing.Optional[int] = DEFAULT_TTL_SECONDS,
     ) -> None:
         try:
-            from cassio.history import StoredBlobHistory
+            from cassio.table import ClusteredCassandraTable
         except (ImportError, ModuleNotFoundError):
             raise ImportError(
                 "Could not import cassio python package. "
                 "Please install it with `pip install cassio`."
             )
         self.session_id = session_id
         self.ttl_seconds = ttl_seconds
-        self.blob_history = StoredBlobHistory(session, keyspace, table_name)
+        self.table = ClusteredCassandraTable(
+            session=session,
+            keyspace=keyspace,
+            table=table_name,
+            ttl_seconds=ttl_seconds,
+            primary_key_type=["TEXT", "TIMEUUID"],
+            ordering_in_partition="DESC",
+        )
 
     @property
     def messages(self) -> List[BaseMessage]:  # type: ignore
         """Retrieve all session messages from DB"""
-        message_blobs = self.blob_history.retrieve(
-            self.session_id,
-        )
+        # The latest are returned, in chronological order
+        message_blobs = [
+            row["body_blob"]
+            for row in self.table.get_partition(
+                partition_id=self.session_id,
+            )
+        ][::-1]
         items = [json.loads(message_blob) for message_blob in message_blobs]
         messages = messages_from_dict(items)
         return messages
 
     def add_message(self, message: BaseMessage) -> None:
         """Write a message to the table"""
-        self.blob_history.store(
-            self.session_id, json.dumps(message_to_dict(message)), self.ttl_seconds
+        this_row_id = uuid.uuid1()
+        self.table.put(
+            partition_id=self.session_id,
+            row_id=this_row_id,
+            body_blob=json.dumps(message_to_dict(message)),
+            ttl_seconds=self.ttl_seconds,
         )
 
     def clear(self) -> None:
         """Clear session memory from DB"""
-        self.blob_history.clear_session_id(self.session_id)
+        self.table.delete_partition(self.session_id)
diff --git a/libs/core/langchain_core/tools.py b/libs/core/langchain_core/tools.py
@@ -22,7 +22,7 @@
 import inspect
 import uuid
 import warnings
-from abc import abstractmethod
+from abc import ABC, abstractmethod
 from functools import partial
 from inspect import signature
 from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple, Type, Union
@@ -1038,3 +1038,11 @@ def render_text_description_and_args(tools: List[BaseTool]) -> str:
         args_schema = str(tool.args)
         tool_strings.append(f"{tool.name}: {tool.description}, args: {args_schema}")
     return "\n".join(tool_strings)
+
+
+class BaseToolkit(BaseModel, ABC):
+    """Base Toolkit representing a collection of related tools."""
+
+    @abstractmethod
+    def get_tools(self) -> List[BaseTool]:
+        """Get the tools in the toolkit."""
diff --git a/libs/langchain/langchain/adapters/openai.py b/libs/langchain/langchain/adapters/openai.py
@@ -1,20 +1,28 @@
-from langchain_community.adapters.openai import (
-    Chat,
-    ChatCompletion,
-    ChatCompletionChunk,
-    ChatCompletions,
-    Choice,
-    ChoiceChunk,
-    Completions,
-    IndexableBaseModel,
-    chat,
-    convert_dict_to_message,
-    convert_message_to_dict,
-    convert_messages_for_finetuning,
-    convert_openai_messages,
-)
-
-__all__ = [
+import warnings
+
+from langchain_core._api import LangChainDeprecationWarning
+
+from langchain.utils.interactive_env import is_interactive_env
+
+
+def __getattr__(name: str) -> None:
+    # If not in interactive env, raise warning.
+    from langchain_community.adapters import openai
+
+    if not is_interactive_env():
+        warnings.warn(
+            "Importing from langchain is deprecated. Importing from "
+            "langchain will no longer be supported as of langchain==0.2.0. "
+            "Instead of `from langchain.adapters.openai import {name}` "
+            "Use `from langchain_community.adapters.openai import {name}`."
+            "To install langchain-community run `pip install -U langchain-community`.",
+            category=LangChainDeprecationWarning,
+        )
+
+    return getattr(openai, name)
+
+
+__all__ = [  # noqa: F822
     "IndexableBaseModel",
     "Choice",
     "ChatCompletions",

diff --git a/libs/partners/astradb/README.md → libs/partners/astradb.md b/libs/partners/astradb/README.md → libs/partners/astradb.md
diff --git a/libs/partners/astradb/.gitignore b/libs/partners/astradb/.gitignore
diff --git a/libs/partners/cohere/README.md → libs/partners/cohere.md b/libs/partners/cohere/README.md → libs/partners/cohere.md
diff --git a/libs/partners/elasticsearch/README.md → libs/partners/elasticsearch.md b/libs/partners/elasticsearch/README.md → libs/partners/elasticsearch.md
diff --git a/libs/partners/elasticsearch/.gitignore b/libs/partners/elasticsearch/.gitignore
diff --git a/libs/partners/google-genai/README.md → libs/partners/google_genai.md b/libs/partners/google-genai/README.md → libs/partners/google_genai.md
diff --git a/libs/partners/google-vertexai/README.md → libs/partners/google_vertexai.md b/libs/partners/google-vertexai/README.md → libs/partners/google_vertexai.md
diff --git a/libs/partners/nvidia-ai-endpoints/README.md → libs/partners/nvidia_ai_endpoints.md b/libs/partners/nvidia-ai-endpoints/README.md → libs/partners/nvidia_ai_endpoints.md
diff --git a/libs/partners/nvidia-trt/README.md → libs/partners/nvidia_trt.md b/libs/partners/nvidia-trt/README.md → libs/partners/nvidia_trt.md