anthropic[major]: support python 3.13 (#27916)

Last week Anthropic released version 0.39.0 of its python sdk, which enabled support for Python 3.13. This release deleted a legacy `client.count_tokens` method, which we currently access during init of the `Anthropic` LLM. Anthropic has replaced this functionality with the [client.beta.messages.count_tokens() API](anthropics/anthropic-sdk-python#726). To enable support for `anthropic >= 0.39.0` and Python 3.13, here we drop support for the legacy token counting method, and add support for the new method via `ChatAnthropic.get_num_tokens_from_messages`. To fully support the token counting API, we update the signature of `get_num_tokens_from_message` to accept tools everywhere. --------- Co-authored-by: Bagatur <[email protected]>
langchain-ai · Nov 12, 2024 · 1538ee1 · 1538ee1
1 parent 759b6ed
commit 1538ee1
Show file tree

Hide file tree

Showing 14 changed files with 534 additions and 542 deletions.
diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py
@@ -37,7 +37,6 @@
 PY_312_MAX_PACKAGES = [
     f"libs/partners/{integration}"
     for integration in [
-        "anthropic",
         "chroma",
         "couchbase",
         "huggingface",

diff --git a/libs/community/langchain_community/chat_models/anyscale.py b/libs/community/langchain_community/chat_models/anyscale.py
@@ -5,10 +5,22 @@
 import logging
 import os
 import sys
-from typing import TYPE_CHECKING, Any, Dict, Optional, Set
+import warnings
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Optional,
+    Sequence,
+    Set,
+    Type,
+    Union,
+)
 
 import requests
 from langchain_core.messages import BaseMessage
+from langchain_core.tools import BaseTool
 from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
 from pydantic import Field, SecretStr, model_validator
 
@@ -197,10 +209,20 @@ def _get_encoding_model(self) -> tuple[str, tiktoken.Encoding]:
             encoding = tiktoken_.get_encoding(model)
         return model, encoding
 
-    def get_num_tokens_from_messages(self, messages: list[BaseMessage]) -> int:
+    def get_num_tokens_from_messages(
+        self,
+        messages: list[BaseMessage],
+        tools: Optional[
+            Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]]
+        ] = None,
+    ) -> int:
         """Calculate num tokens with tiktoken package.
         Official documentation: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
         """
+        if tools is not None:
+            warnings.warn(
+                "Counting tokens in tool schemas is not yet supported. Ignoring tools."
+            )
         if sys.version_info[1] <= 7:
             return super().get_num_tokens_from_messages(messages)
         model, encoding = self._get_encoding_model()

diff --git a/libs/community/langchain_community/chat_models/everlyai.py b/libs/community/langchain_community/chat_models/everlyai.py
@@ -4,9 +4,21 @@
 
 import logging
 import sys
-from typing import TYPE_CHECKING, Any, Dict, Optional, Set
+import warnings
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Optional,
+    Sequence,
+    Set,
+    Type,
+    Union,
+)
 
 from langchain_core.messages import BaseMessage
+from langchain_core.tools import BaseTool
 from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
 from pydantic import Field, model_validator
 
@@ -138,11 +150,21 @@ def _get_encoding_model(self) -> tuple[str, tiktoken.Encoding]:
             encoding = tiktoken_.get_encoding(model)
         return model, encoding
 
-    def get_num_tokens_from_messages(self, messages: list[BaseMessage]) -> int:
+    def get_num_tokens_from_messages(
+        self,
+        messages: list[BaseMessage],
+        tools: Optional[
+            Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]]
+        ] = None,
+    ) -> int:
         """Calculate num tokens with tiktoken package.
 
         Official documentation: https://github.com/openai/openai-cookbook/blob/
         main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
+        if tools is not None:
+            warnings.warn(
+                "Counting tokens in tool schemas is not yet supported. Ignoring tools."
+            )
         if sys.version_info[1] <= 7:
             return super().get_num_tokens_from_messages(messages)
         model, encoding = self._get_encoding_model()

diff --git a/libs/community/langchain_community/chat_models/openai.py b/libs/community/langchain_community/chat_models/openai.py
@@ -46,6 +46,7 @@
 )
 from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
 from langchain_core.runnables import Runnable
+from langchain_core.tools import BaseTool
 from langchain_core.utils import (
     get_from_dict_or_env,
     get_pydantic_field_names,
@@ -644,11 +645,21 @@ def get_token_ids(self, text: str) -> List[int]:
         _, encoding_model = self._get_encoding_model()
         return encoding_model.encode(text)
 
-    def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
+    def get_num_tokens_from_messages(
+        self,
+        messages: List[BaseMessage],
+        tools: Optional[
+            Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]]
+        ] = None,
+    ) -> int:
         """Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package.
 
         Official documentation: https://github.com/openai/openai-cookbook/blob/
         main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
+        if tools is not None:
+            warnings.warn(
+                "Counting tokens in tool schemas is not yet supported. Ignoring tools."
+            )
         if sys.version_info[1] <= 7:
             return super().get_num_tokens_from_messages(messages)
         model, encoding = self._get_encoding_model()

diff --git a/libs/core/langchain_core/language_models/base.py b/libs/core/langchain_core/language_models/base.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import warnings
 from abc import ABC, abstractmethod
 from collections.abc import Mapping, Sequence
 from functools import cache
@@ -364,17 +365,31 @@ def get_num_tokens(self, text: str) -> int:
         """
         return len(self.get_token_ids(text))
 
-    def get_num_tokens_from_messages(self, messages: list[BaseMessage]) -> int:
+    def get_num_tokens_from_messages(
+        self,
+        messages: list[BaseMessage],
+        tools: Optional[Sequence] = None,
+    ) -> int:
         """Get the number of tokens in the messages.
 
         Useful for checking if an input fits in a model's context window.
 
+        **Note**: the base implementation of get_num_tokens_from_messages ignores
+        tool schemas.
+
         Args:
             messages: The message inputs to tokenize.
+            tools: If provided, sequence of dict, BaseModel, function, or BaseTools
+                to be converted to tool schemas.
 
         Returns:
             The sum of the number of tokens across the messages.
         """
+        if tools is not None:
+            warnings.warn(
+                "Counting tokens in tool schemas is not yet supported. Ignoring tools.",
+                stacklevel=2,
+            )
         return sum([self.get_num_tokens(get_buffer_string([m])) for m in messages])
 
     @classmethod

diff --git a/libs/core/tests/unit_tests/messages/test_utils.py b/libs/core/tests/unit_tests/messages/test_utils.py
@@ -1,5 +1,8 @@
 import base64
 import json
+import typing
+from collections.abc import Sequence
+from typing import Any, Callable, Optional, Union
 
 import pytest
 
@@ -19,6 +22,7 @@
     merge_message_runs,
     trim_messages,
 )
+from langchain_core.tools import BaseTool
 
 
 @pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])
@@ -431,7 +435,15 @@ def dummy_token_counter(messages: list[BaseMessage]) -> int:
 
 
 class FakeTokenCountingModel(FakeChatModel):
-    def get_num_tokens_from_messages(self, messages: list[BaseMessage]) -> int:
+    def get_num_tokens_from_messages(
+        self,
+        messages: list[BaseMessage],
+        tools: Optional[
+            Sequence[
+                Union[typing.Dict[str, Any], type, Callable, BaseTool]  # noqa: UP006
+            ]
+        ] = None,
+    ) -> int:
         return dummy_token_counter(messages)
 
 

diff --git a/libs/langchain/tests/unit_tests/chat_models/test_base.py b/libs/langchain/tests/unit_tests/chat_models/test_base.py
@@ -3,7 +3,6 @@
 
 import pytest
 from langchain_core.language_models import BaseChatModel
-from langchain_core.messages import HumanMessage
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.runnables import RunnableConfig, RunnableSequence
 from pydantic import SecretStr
@@ -180,9 +179,6 @@ def test_configurable_with_default() -> None:
     )
 
     assert model_with_config.model == "claude-3-sonnet-20240229"  # type: ignore[attr-defined]
-    # Anthropic defaults to using `transformers` for token counting.
-    with pytest.raises(ImportError):
-        model_with_config.get_num_tokens_from_messages([(HumanMessage("foo"))])  # type: ignore[attr-defined]
 
     assert model_with_config.model_dump() == {  # type: ignore[attr-defined]
         "name": None,

diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py
@@ -21,7 +21,7 @@
 )
 
 import anthropic
-from langchain_core._api import deprecated
+from langchain_core._api import beta, deprecated
 from langchain_core.callbacks import (
     AsyncCallbackManagerForLLMRun,
     CallbackManagerForLLMRun,
@@ -1113,6 +1113,41 @@ class AnswerWithJustification(BaseModel):
         else:
             return llm | output_parser
 
+    @beta()
+    def get_num_tokens_from_messages(
+        self,
+        messages: List[BaseMessage],
+        tools: Optional[
+            Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]]
+        ] = None,
+    ) -> int:
+        """Count tokens in a sequence of input messages.
+
+        Args:
+            messages: The message inputs to tokenize.
+            tools: If provided, sequence of dict, BaseModel, function, or BaseTools
+                to be converted to tool schemas.
+
+        .. versionchanged:: 0.3.0
+
+                Uses Anthropic's token counting API to count tokens in messages. See:
+                https://docs.anthropic.com/en/docs/build-with-claude/token-counting
+        """
+        formatted_system, formatted_messages = _format_messages(messages)
+        kwargs: Dict[str, Any] = {}
+        if isinstance(formatted_system, str):
+            kwargs["system"] = formatted_system
+        if tools:
+            kwargs["tools"] = [convert_to_anthropic_tool(tool) for tool in tools]
+
+        response = self._client.beta.messages.count_tokens(
+            betas=["token-counting-2024-11-01"],
+            model=self.model,
+            messages=formatted_messages,  # type: ignore[arg-type]
+            **kwargs,
+        )
+        return response.input_tokens
+
 
 class AnthropicTool(TypedDict):
     """Anthropic tool definition."""

diff --git a/libs/partners/anthropic/langchain_anthropic/llms.py b/libs/partners/anthropic/langchain_anthropic/llms.py
@@ -109,7 +109,6 @@ def validate_environment(self) -> Self:
         )
         self.HUMAN_PROMPT = anthropic.HUMAN_PROMPT
         self.AI_PROMPT = anthropic.AI_PROMPT
-        self.count_tokens = self.client.count_tokens
         return self
 
     @property
@@ -375,9 +374,11 @@ async def _astream(
 
     def get_num_tokens(self, text: str) -> int:
         """Calculate number of tokens."""
-        if not self.count_tokens:
-            raise NameError("Please ensure the anthropic package is loaded")
-        return self.count_tokens(text)
+        raise NotImplementedError(
+            "Anthropic's legacy count_tokens method was removed in anthropic 0.39.0 "
+            "and langchain-anthropic 0.3.0. Please use "
+            "ChatAnthropic.get_num_tokens_from_messages instead."
+        )
 
 
 @deprecated(since="0.1.0", removal="0.3.0", alternative="AnthropicLLM")