Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

community: add reka chat model integration #27379

Merged
merged 48 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
58858ce
Integration test
findalexli Sep 10, 2024
6f1d259
Linting fix
findalexli Sep 10, 2024
5ecad6d
code formatter changes
findalexli Sep 11, 2024
e0637a5
Ruff linting check
findalexli Sep 12, 2024
e936317
Unit test update
findalexli Sep 12, 2024
a7fe379
Included dependecy
findalexli Oct 10, 2024
cf807af
Fixed linting
findalexli Oct 11, 2024
6973ad0
Track changes to too lintegraiton
findalexli Oct 14, 2024
241f8eb
Merge branch 'langchain-ai:master' into master
findalexli Oct 15, 2024
64adf02
Fix linting
findalexli Oct 15, 2024
1d25d13
Update notebook with tool use example
findalexli Oct 15, 2024
9da7eed
Merge branch 'master' into master
findalexli Oct 15, 2024
6471ad1
Tool use enfoce a version
findalexli Oct 15, 2024
90d52f9
Merge branch 'master' into master
efriis Oct 16, 2024
ca4cf2e
use pytest mark requires reka-api
findalexli Oct 16, 2024
142f4ca
Lint and mark test requring reka-api
findalexli Oct 16, 2024
43972a8
Update reka import in notebook
findalexli Oct 16, 2024
eacde83
Import order update to pass unit test
findalexli Oct 16, 2024
00ff647
Merge branch 'master' into master
findalexli Oct 18, 2024
1b66b63
Merge branch 'master' into master
vbarda Oct 21, 2024
2e94393
lint
vbarda Oct 24, 2024
0158f76
Merge branch 'master' into master
vbarda Oct 24, 2024
72446fa
remove extra whitespace
vbarda Oct 24, 2024
3bdc725
Remove unused model list
findalexli Oct 25, 2024
20caad1
Merge branch 'master' into master
findalexli Oct 25, 2024
2c5c898
Merge branch 'master' into master
findalexli Oct 30, 2024
e0c7eec
Combine reka chat into one class
findalexli Nov 1, 2024
5343ec5
Unit and integration test with system messages
findalexli Nov 1, 2024
541f0aa
Notebook doc update (pip, e2e agent)
findalexli Nov 1, 2024
05c427c
Lint/formatt
findalexli Nov 1, 2024
f82e2c4
Merge branch 'master' into master
findalexli Nov 1, 2024
3489b99
Linted notebook
findalexli Nov 5, 2024
b59fb7f
add chatke in test import
findalexli Nov 5, 2024
c7d75a2
add tiktoken token count method
findalexli Nov 5, 2024
d6f0ef4
Add required header in doc notebook
findalexli Nov 6, 2024
c72aefb
token count update
findalexli Nov 6, 2024
a229d20
Type consistency for token counter
findalexli Nov 8, 2024
5939ac6
Merge branch 'master' into master
vbarda Nov 8, 2024
33ad1d2
Merge branch 'master' into master
vbarda Nov 14, 2024
88a24fe
remove from deps
vbarda Nov 15, 2024
d126131
remove scheduled
vbarda Nov 15, 2024
b395905
skip
vbarda Nov 15, 2024
21e62f4
lint
vbarda Nov 15, 2024
b3ea9de
skip
vbarda Nov 15, 2024
6874313
update
vbarda Nov 15, 2024
4eab208
fix link
vbarda Nov 15, 2024
5b9f422
Merge branch 'master' into master
vbarda Nov 15, 2024
e1e79cf
update concepts link
vbarda Nov 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 72 additions & 12 deletions docs/docs/integrations/chat/reka.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Initialize a client"
"## Instantiation"
]
},
{
Expand Down Expand Up @@ -110,7 +110,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -123,21 +123,21 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Single turn text message"
"## Invocation"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=' Hello! How can I help you today? If you have a question, need assistance, or just want to chat, feel free to let me know. Have a great day!\\n\\n', additional_kwargs={}, response_metadata={}, id='run-b40e505a-5110-451a-92e6-a2a34988472c-0')"
"AIMessage(content=' Hello! How can I help you today? If you have a question, need assistance, or just want to chat, feel free to let me know. Have a great day!\\n\\n', additional_kwargs={}, response_metadata={}, id='run-61522ec2-0587-4fd5-a492-5b205fd8860c-0')"
]
},
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -155,14 +155,14 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" The image shows an indoor setting with no visible weather elements. It features a cat on a desk licking a computer keyboard. The background includes a computer monitor, a desk with a few items like a pen holder and a mobile phone, and a glimpse of a window with blinds partially drawn.\n"
" The image shows an indoor setting with no visible windows or natural light, and there are no indicators of weather conditions. The focus is on a cat sitting on a computer keyboard, and the background includes a computer monitor and various office supplies.\n"
]
}
],
Expand Down Expand Up @@ -193,18 +193,18 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" The first image shows two German Shepherds, one adult and one puppy, in a grassy field. The adult dog is carrying a large stick in its mouth, indicating playfulness or a game being played. The background features a natural, leafy environment, suggesting an outdoor setting conducive to activities like running or training.\n",
" The first image features two German Shepherds, one adult and one puppy, in a vibrant, lush green setting. The adult dog is carrying a large stick in its mouth, running through what appears to be a grassy field, with the puppy following close behind. Both dogs exhibit striking physical characteristics typical of the breed, such as pointed ears and dense fur.\n",
"\n",
"The second image features a close-up of a single cat with striking blue eyes, set against a background of dry leaves or grass. The cat has a calm and somewhat intense expression, with its fur neatly groomed and whiskers prominently visible. The focus is on the cat's face, capturing its serene demeanor in a quiet, natural outdoor setting.\n",
"The second image shows a close-up of a single cat with striking blue eyes, likely a breed like the Siberian or Maine Coon, in a natural outdoor setting. The cat's fur is lighter, possibly a mix of white and gray, and it has a more subdued expression compared to the dogs. The background is blurred, suggesting a focus on the cat's face.\n",
"\n",
"The main differences lie in the subjects (dogs vs. cat) and their expressions (playful vs. serene), as well as the composition and focus of the images (outdoor play vs. close-up portrait).\n"
"Overall, the differences lie in the subjects (two dogs vs. one cat), the setting (lush, vibrant grassy field vs. a more muted outdoor background), and the overall mood and activity depicted (playful and active vs. serene and focused).\n"
]
}
],
Expand All @@ -230,6 +230,52 @@
"print(response.content)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Chaining"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=' Ich liebe Programmieren.\\n\\n', additional_kwargs={}, response_metadata={}, id='run-ffc4ace1-b73a-4fb3-ad0f-57e60a0f9b8d-0')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain_core.prompts import ChatPromptTemplate\n",
"\n",
"prompt = ChatPromptTemplate(\n",
" [\n",
" (\n",
" \"system\",\n",
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
" ),\n",
" (\"human\", \"{input}\"),\n",
" ]\n",
")\n",
"\n",
"chain = prompt | model\n",
"chain.invoke(\n",
" {\n",
" \"input_language\": \"English\",\n",
" \"output_language\": \"German\",\n",
" \"input\": \"I love programming.\",\n",
" }\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -507,6 +553,20 @@
" print(chunk)\n",
" print(\"----\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## API reference"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"https://docs.reka.ai/quick-start"
]
}
],
"metadata": {
Expand Down
1 change: 1 addition & 0 deletions libs/community/extended_testing_deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ telethon>=1.28.5,<2
tidb-vector>=0.0.3,<1.0.0
timescale-vector==0.0.1
tqdm>=4.48.0
tiktoken>=0.8.0
tree-sitter>=0.20.2,<0.21
tree-sitter-languages>=1.8.0,<2
upstash-redis>=1.1.0,<2
Expand Down
31 changes: 25 additions & 6 deletions libs/community/langchain_community/chat_models/reka.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
Union,
)

import tiktoken
from langchain_core.callbacks import (
AsyncCallbackManagerForLLMRun,
CallbackManagerForLLMRun,
Expand Down Expand Up @@ -156,7 +155,9 @@ class ChatReka(BaseChatModel):
reka_api_key: Optional[str] = None
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
model_config = ConfigDict(extra="forbid")
_tiktoken_encoder = None
token_counter: Optional[
Union[Callable[[list[BaseMessage]], int], Callable[[BaseMessage], int]]
] = None

@model_validator(mode="before")
@classmethod
Expand Down Expand Up @@ -329,11 +330,29 @@ async def _agenerate(

return ChatResult(generations=[ChatGeneration(message=message)])

def get_num_tokens(self, text: str) -> int:
def get_num_tokens(self, input: Union[str, BaseMessage, List[BaseMessage]]) -> int:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like the input type here is not matching the one in the token_counter callable (failing linter)

"""Calculate number of tokens."""
if self._tiktoken_encoder is None:
self._tiktoken_encoder = tiktoken.get_encoding("cl100k_base")
return len(self._tiktoken_encoder.encode(text))
# Initialize encoder if not already set

if self.token_counter is None:
try:
import tiktoken
except ImportError:
raise ImportError(
"Could not import tiktoken python package. "
"Please install it with `pip install tiktoken`."
)
encoding = tiktoken.get_encoding("cl100k_base")

if isinstance(input, str):
return len(encoding.encode(input))
elif isinstance(input, BaseMessage):
return len(encoding.encode(input.content))
elif isinstance(input, list):
return sum(len(encoding.encode(msg.content)) for msg in input)
raise ValueError(f"Got unexpected type for input: {type(input)}")

return self.token_counter(input)

def bind_tools(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"ChatVertexAI",
"ChatYandexGPT",
"ChatYuan2",
"ChatReKa",
"ChatReka",
"ChatZhipuAI",
"ErnieBotChat",
"FakeListChatModel",
Expand Down
37 changes: 30 additions & 7 deletions libs/community/tests/unit_tests/chat_models/test_reka.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from unittest.mock import MagicMock, patch

import pytest
import tiktoken
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
from pydantic import ValidationError

Expand Down Expand Up @@ -305,20 +304,44 @@ def test_multiple_system_messages_error() -> None:
convert_to_reka_messages(messages)


@pytest.mark.requires("tiktoken")
@pytest.mark.requires("reka")
def test_get_num_tokens() -> None:
"""Test that token counting works correctly."""
"""Test that token counting works correctly for different input types."""
llm = ChatReka()
import tiktoken

# Test basic text
encoding = tiktoken.get_encoding("cl100k_base")

# Test string input
text = "Hello, world!"
expected_tokens = len(tiktoken.get_encoding("cl100k_base").encode(text))
expected_tokens = len(encoding.encode(text))
assert llm.get_num_tokens(text) == expected_tokens

# Test empty string
# Test BaseMessage input
message = HumanMessage(content="What is the weather like today?")
expected_tokens = len(encoding.encode(message.content))
assert llm.get_num_tokens(message) == expected_tokens

# Test List[BaseMessage] input
messages = [
SystemMessage(content="You are a helpful assistant."),
HumanMessage(content="Hi!"),
AIMessage(content="Hello! How can I help you today?"),
]
expected_tokens = sum(len(encoding.encode(msg.content)) for msg in messages)
assert llm.get_num_tokens(messages) == expected_tokens

# Test empty inputs
assert llm.get_num_tokens("") == 0
assert llm.get_num_tokens(HumanMessage(content="")) == 0
assert llm.get_num_tokens([]) == 0

# Test longer text with special characters
# Test complex text with special characters
complex_text = "Hello 🌍! This is a test of the token counting"
expected_tokens = len(tiktoken.get_encoding("cl100k_base").encode(complex_text))
expected_tokens = len(encoding.encode(complex_text))
assert llm.get_num_tokens(complex_text) == expected_tokens

# Test invalid input type
with pytest.raises(ValueError, match="Got unexpected type for input:"):
llm.get_num_tokens(123) # type: ignore
Loading