Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

standard-tests[patch]: add tests for runnables as tools and streaming usage metadata #24153

Merged
merged 3 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions libs/partners/ai21/tests/integration_tests/test_standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,18 @@ def chat_model_params(self) -> dict:
"model": "j2-ultra",
}

@pytest.mark.xfail(reason="Emits AIMessage instead of AIMessageChunk.")
@pytest.mark.xfail(reason="Streaming is not supported for Jurassic models.")
def test_stream(self, model: BaseChatModel) -> None:
super().test_stream(model)

@pytest.mark.xfail(reason="Emits AIMessage instead of AIMessageChunk.")
@pytest.mark.xfail(reason="Streaming is not supported for Jurassic models.")
async def test_astream(self, model: BaseChatModel) -> None:
await super().test_astream(model)

@pytest.mark.xfail(reason="Streaming is not supported for Jurassic models.")
def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
super().test_usage_metadata_streaming(model)


class TestAI21Jamba(BaseTestAI21):
@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
from typing import Type

import pytest
from langchain_core.language_models import BaseChatModel
from langchain_standard_tests.integration_tests import ChatModelIntegrationTests

Expand Down Expand Up @@ -30,3 +31,7 @@ def chat_model_params(self) -> dict:
"azure_endpoint": OPENAI_API_BASE,
"api_key": OPENAI_API_KEY,
}

@pytest.mark.xfail(reason="Not yet supported.")
def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
super().test_usage_metadata_streaming(model)
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def chat_model_class(self) -> Type[BaseChatModel]:

@property
def chat_model_params(self) -> dict:
return {"model": "gpt-4o"}
return {"model": "gpt-4o", "stream_usage": True}

@property
def supports_image_inputs(self) -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,7 @@ def chat_model_params(self) -> dict:
@pytest.mark.xfail(reason=("May not call a tool."))
def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None:
super().test_tool_calling_with_no_arguments(model)

@pytest.mark.xfail(reason="Not yet supported.")
def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
super().test_usage_metadata_streaming(model)
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import httpx
import pytest
from langchain_core.language_models import BaseChatModel
from langchain_core.language_models import BaseChatModel, GenericFakeChatModel
from langchain_core.messages import (
AIMessage,
AIMessageChunk,
Expand All @@ -14,6 +14,8 @@
SystemMessage,
ToolMessage,
)
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.tools import tool

Expand Down Expand Up @@ -129,6 +131,19 @@ def test_usage_metadata(self, model: BaseChatModel) -> None:
assert isinstance(result.usage_metadata["output_tokens"], int)
assert isinstance(result.usage_metadata["total_tokens"], int)

def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
if not self.returns_usage_metadata:
pytest.skip("Not implemented.")
full: Optional[BaseMessageChunk] = None
for chunk in model.stream("Hello"):
assert isinstance(chunk, AIMessageChunk)
full = chunk if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
assert full.usage_metadata is not None
assert isinstance(full.usage_metadata["input_tokens"], int)
assert isinstance(full.usage_metadata["output_tokens"], int)
assert isinstance(full.usage_metadata["total_tokens"], int)

def test_stop_sequence(self, model: BaseChatModel) -> None:
result = model.invoke("hi", stop=["you"])
assert isinstance(result, AIMessage)
Expand Down Expand Up @@ -171,6 +186,23 @@ def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None:
assert isinstance(full, AIMessage)
_validate_tool_call_message_no_args(full)

def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None:
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")

prompt = ChatPromptTemplate.from_messages(
[("human", "Hello. Please respond in the style of {answer_style}.")]
)
llm = GenericFakeChatModel(messages=iter(["hello matey"]))
chain = prompt | llm | StrOutputParser()
model_with_tools = model.bind_tools([chain.as_tool()])
query = "Using the tool, ask a Pirate how it would say hello."
result = model_with_tools.invoke(query)
assert isinstance(result, AIMessage)
assert result.tool_calls
tool_call = result.tool_calls[0]
assert tool_call["args"].get("answer_style")

def test_structured_output(self, model: BaseChatModel) -> None:
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
Expand Down
Loading