Merge branch 'master' into hmnfalahi/get_python_function_required_args

langchain-ai · Apr 24, 2024 · b5e8f09 · b5e8f09
2 parents c4012c2 + 9111d3a
commit b5e8f09
Show file tree

Hide file tree

Showing 47 changed files with 810 additions and 427 deletions.
diff --git a/.github/workflows/scheduled_test.yml b/.github/workflows/scheduled_test.yml
@@ -19,11 +19,11 @@ jobs:
         working-directory:
           - "libs/partners/openai"
           - "libs/partners/anthropic"
-          # - "libs/partners/ai21"  # standard-tests broken
+          - "libs/partners/ai21"
           - "libs/partners/fireworks"
-          # - "libs/partners/groq"  # rate-limited
+          - "libs/partners/groq"
           - "libs/partners/mistralai"
-          # - "libs/partners/together"  # rate-limited
+          - "libs/partners/together"
     name: Python ${{ matrix.python-version }} - ${{ matrix.working-directory }}
     steps:
       - uses: actions/checkout@v4

diff --git a/docs/api_reference/guide_imports.json b/docs/api_reference/guide_imports.json
diff --git a/docs/docs/guides/productionization/safety/hugging_face_prompt_injection.ipynb b/docs/docs/guides/productionization/safety/hugging_face_prompt_injection.ipynb
@@ -9,7 +9,7 @@
     "\n",
     "This notebook shows how to prevent prompt injection attacks using the text classification model from `HuggingFace`.\n",
     "\n",
-    "By default, it uses a *[laiyer/deberta-v3-base-prompt-injection](https://huggingface.co/laiyer/deberta-v3-base-prompt-injection)* model trained to identify prompt injections. \n",
+    "By default, it uses a *[protectai/deberta-v3-base-prompt-injection-v2](https://huggingface.co/protectai/deberta-v3-base-prompt-injection-v2)* model trained to identify prompt injections. \n",
     "\n",
     "In this notebook, we will use the ONNX version of the model to speed up the inference. "
    ]
@@ -49,11 +49,15 @@
     "from optimum.onnxruntime import ORTModelForSequenceClassification\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
-    "# Using https://huggingface.co/laiyer/deberta-v3-base-prompt-injection\n",
-    "model_path = \"laiyer/deberta-v3-base-prompt-injection\"\n",
-    "tokenizer = AutoTokenizer.from_pretrained(model_path)\n",
-    "tokenizer.model_input_names = [\"input_ids\", \"attention_mask\"]  # Hack to run the model\n",
-    "model = ORTModelForSequenceClassification.from_pretrained(model_path, subfolder=\"onnx\")\n",
+    "# Using https://huggingface.co/protectai/deberta-v3-base-prompt-injection-v2\n",
+    "model_path = \"laiyer/deberta-v3-base-prompt-injection-v2\"\n",
+    "revision = None  # We recommend specifiying the revision to avoid breaking changes or supply chain attacks\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\n",
+    "    model_path, revision=revision, model_input_names=[\"input_ids\", \"attention_mask\"]\n",
+    ")\n",
+    "model = ORTModelForSequenceClassification.from_pretrained(\n",
+    "    model_path, revision=revision, subfolder=\"onnx\"\n",
+    ")\n",
     "\n",
     "classifier = pipeline(\n",
     "    \"text-classification\",\n",

diff --git a/docs/docs/integrations/vectorstores/neo4jvector.ipynb b/docs/docs/integrations/vectorstores/neo4jvector.ipynb
@@ -8,7 +8,7 @@
     "\n",
     ">[Neo4j](https://neo4j.com/) is an open-source graph database with integrated support for vector similarity search\n",
     "\n",
-    "It supports:\n",
+    "It supports:\n\n",
     "- approximate nearest neighbor search\n",
     "- Euclidean similarity and cosine similarity\n",
     "- Hybrid search combining vector and keyword searches\n",

diff --git a/docs/docs/modules/data_connection/retrievers/custom_retriever.ipynb b/docs/docs/modules/data_connection/retrievers/custom_retriever.ipynb
@@ -98,7 +98,7 @@
     "    ) -> List[Document]:\n",
     "        \"\"\"Sync implementations for retriever.\"\"\"\n",
     "        matching_documents = []\n",
-    "        for document in documents:\n",
+    "        for document in self.documents:\n",
     "            if len(matching_documents) > self.k:\n",
     "                return matching_documents\n",
     "\n",

diff --git a/docs/docs/use_cases/question_answering/chat_history.ipynb b/docs/docs/use_cases/question_answering/chat_history.ipynb
@@ -403,16 +403,13 @@
    "outputs": [],
    "source": [
     "import bs4\n",
-    "from langchain import hub\n",
     "from langchain.chains import create_history_aware_retriever, create_retrieval_chain\n",
     "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
     "from langchain_chroma import Chroma\n",
     "from langchain_community.chat_message_histories import ChatMessageHistory\n",
     "from langchain_community.document_loaders import WebBaseLoader\n",
     "from langchain_core.chat_history import BaseChatMessageHistory\n",
-    "from langchain_core.output_parsers import StrOutputParser\n",
     "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
-    "from langchain_core.runnables import RunnablePassthrough\n",
     "from langchain_core.runnables.history import RunnableWithMessageHistory\n",
     "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
     "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",

diff --git a/libs/community/langchain_community/callbacks/tracers/comet.py b/libs/community/langchain_community/callbacks/tracers/comet.py
@@ -70,37 +70,40 @@ def _initialize_comet_modules(self) -> None:
         self._flush: Callable[[], None] = comet_llm_api.flush
 
     def _persist_run(self, run: "Run") -> None:
+        run_dict: Dict[str, Any] = run.dict()
         chain_ = self._chains_map[run.id]
-        chain_.set_outputs(outputs=run.outputs)
+        chain_.set_outputs(outputs=run_dict["outputs"])
         self._chain_api.log_chain(chain_)
 
     def _process_start_trace(self, run: "Run") -> None:
+        run_dict: Dict[str, Any] = run.dict()
         if not run.parent_run_id:
             # This is the first run, which maps to a chain
             chain_: "Chain" = self._chain.Chain(
-                inputs=run.inputs,
+                inputs=run_dict["inputs"],
                 metadata=None,
                 experiment_info=self._experiment_info.get(),
             )
             self._chains_map[run.id] = chain_
         else:
             span: "Span" = self._span.Span(
-                inputs=run.inputs,
+                inputs=run_dict["inputs"],
                 category=_get_run_type(run),
-                metadata=run.extra,
+                metadata=run_dict["extra"],
                 name=run.name,
             )
             span.__api__start__(self._chains_map[run.parent_run_id])
             self._chains_map[run.id] = self._chains_map[run.parent_run_id]
             self._span_map[run.id] = span
 
     def _process_end_trace(self, run: "Run") -> None:
+        run_dict: Dict[str, Any] = run.dict()
         if not run.parent_run_id:
             pass
             # Langchain will call _persist_run for us
         else:
             span = self._span_map[run.id]
-            span.set_outputs(outputs=run.outputs)
+            span.set_outputs(outputs=run_dict["outputs"])
             span.__api__end__()
 
     def flush(self) -> None:

diff --git a/libs/community/langchain_community/chat_message_histories/file.py b/libs/community/langchain_community/chat_message_histories/file.py
@@ -1,45 +1,5 @@
-import json
-import logging
-from pathlib import Path
-from typing import List
+from langchain_core.chat_history import FileChatMessageHistory
 
-from langchain_core.chat_history import BaseChatMessageHistory
-from langchain_core.messages import (
-    BaseMessage,
-    messages_from_dict,
-    messages_to_dict,
-)
-
-logger = logging.getLogger(__name__)
-
-
-class FileChatMessageHistory(BaseChatMessageHistory):
-    """
-    Chat message history that stores history in a local file.
-
-    Args:
-        file_path: path of the local file to store the messages.
-    """
-
-    def __init__(self, file_path: str):
-        self.file_path = Path(file_path)
-        if not self.file_path.exists():
-            self.file_path.touch()
-            self.file_path.write_text(json.dumps([]))
-
-    @property
-    def messages(self) -> List[BaseMessage]:  # type: ignore
-        """Retrieve the messages from the local file"""
-        items = json.loads(self.file_path.read_text())
-        messages = messages_from_dict(items)
-        return messages
-
-    def add_message(self, message: BaseMessage) -> None:
-        """Append the message to the record in the local file"""
-        messages = messages_to_dict(self.messages)
-        messages.append(messages_to_dict([message])[0])
-        self.file_path.write_text(json.dumps(messages))
-
-    def clear(self) -> None:
-        """Clear session memory from the local file"""
-        self.file_path.write_text(json.dumps([]))
+__all__ = [
+    "FileChatMessageHistory",
+]
diff --git a/libs/community/langchain_community/chat_message_histories/in_memory.py b/libs/community/langchain_community/chat_message_histories/in_memory.py
@@ -1,31 +1,5 @@
-from typing import List, Sequence
+from langchain_core.chat_history import InMemoryChatMessageHistory as ChatMessageHistory
 
-from langchain_core.chat_history import BaseChatMessageHistory
-from langchain_core.messages import BaseMessage
-from langchain_core.pydantic_v1 import BaseModel, Field
-
-
-class ChatMessageHistory(BaseChatMessageHistory, BaseModel):
-    """In memory implementation of chat message history.
-
-    Stores messages in an in memory list.
-    """
-
-    messages: List[BaseMessage] = Field(default_factory=list)
-
-    async def aget_messages(self) -> List[BaseMessage]:
-        return self.messages
-
-    def add_message(self, message: BaseMessage) -> None:
-        """Add a self-created message to the store"""
-        self.messages.append(message)
-
-    async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None:
-        """Add messages to the store"""
-        self.add_messages(messages)
-
-    def clear(self) -> None:
-        self.messages = []
-
-    async def aclear(self) -> None:
-        self.clear()
+__all__ = [
+    "ChatMessageHistory",
+]
diff --git a/libs/community/langchain_community/chat_models/bedrock.py b/libs/community/langchain_community/chat_models/bedrock.py
@@ -193,7 +193,13 @@ def format_messages(
         )
 
 
-_message_type_lookups = {"human": "user", "ai": "assistant"}
+_message_type_lookups = {
+    "human": "user",
+    "ai": "assistant",
+    "AIMessageChunk": "assistant",
+    "HumanMessageChunk": "user",
+    "function": "user",
+}
 
 
 @deprecated(

diff --git a/libs/community/langchain_community/llms/llamafile.py b/libs/community/langchain_community/llms/llamafile.py
@@ -139,6 +139,7 @@ def _param_fieldnames(self) -> List[str]:
             "streaming",
             "tags",
             "verbose",
+            "custom_get_token_ids",
         ]
         attrs = [
             k for k in get_pydantic_field_names(self.__class__) if k not in ignore_keys

diff --git a/libs/core/langchain_core/_api/beta_decorator.py b/libs/core/langchain_core/_api/beta_decorator.py
@@ -121,7 +121,7 @@ async def awarning_emitting_wrapper(*args: Any, **kwargs: Any) -> Any:
             if not _obj_type:
                 _obj_type = "class"
             wrapped = obj.__init__  # type: ignore
-            _name = _name or obj.__name__
+            _name = _name or obj.__qualname__
             old_doc = obj.__doc__
 
             def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:
@@ -147,10 +147,11 @@ def warn_if_direct_instance(
                 return cast(T, obj)
 
         elif isinstance(obj, property):
+            # note(erick): this block doesn't seem to be used?
             if not _obj_type:
                 _obj_type = "attribute"
             wrapped = None
-            _name = _name or obj.fget.__name__
+            _name = _name or obj.fget.__qualname__
             old_doc = obj.__doc__
 
             class _beta_property(property):
@@ -189,10 +190,12 @@ def finalize(wrapper: Callable[..., Any], new_doc: str) -> Any:
                 )
 
         else:
+            _name = _name or obj.__qualname__
             if not _obj_type:
-                _obj_type = "function"
+                # edge case: when a function is within another function
+                # within a test, this will call it a "method" not a "function"
+                _obj_type = "function" if "." not in _name else "method"
             wrapped = obj
-            _name = _name or obj.__name__
             old_doc = wrapped.__doc__
 
             def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:

diff --git a/libs/core/langchain_core/caches.py b/libs/core/langchain_core/caches.py
@@ -22,7 +22,7 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from typing import Any, Optional, Sequence
+from typing import Any, Dict, Optional, Sequence, Tuple
 
 from langchain_core.outputs import Generation
 from langchain_core.runnables import run_in_executor
@@ -105,3 +105,37 @@ async def aupdate(
     async def aclear(self, **kwargs: Any) -> None:
         """Clear cache that can take additional keyword arguments."""
         return await run_in_executor(None, self.clear, **kwargs)
+
+
+class InMemoryCache(BaseCache):
+    """Cache that stores things in memory."""
+
+    def __init__(self) -> None:
+        """Initialize with empty cache."""
+        self._cache: Dict[Tuple[str, str], RETURN_VAL_TYPE] = {}
+
+    def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
+        """Look up based on prompt and llm_string."""
+        return self._cache.get((prompt, llm_string), None)
+
+    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
+        """Update cache based on prompt and llm_string."""
+        self._cache[(prompt, llm_string)] = return_val
+
+    def clear(self, **kwargs: Any) -> None:
+        """Clear cache."""
+        self._cache = {}
+
+    async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
+        """Look up based on prompt and llm_string."""
+        return self.lookup(prompt, llm_string)
+
+    async def aupdate(
+        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
+    ) -> None:
+        """Update cache based on prompt and llm_string."""
+        self.update(prompt, llm_string, return_val)
+
+    async def aclear(self, **kwargs: Any) -> None:
+        """Clear cache."""
+        self.clear()