From efbfc54ded449a4dc30f577971e30923c74a5a89 Mon Sep 17 00:00:00 2001 From: Prithvi Kannan <46332835+prithvikannan@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:48:28 -0800 Subject: [PATCH] Port databricks-langchain integration tests (#26) * Port databricks-langchain integration tests Signed-off-by: Prithvi Kannan * format Signed-off-by: Prithvi Kannan * only unit tests in ci Signed-off-by: Prithvi Kannan * update import Signed-off-by: Prithvi Kannan * lint Signed-off-by: Prithvi Kannan --------- Signed-off-by: Prithvi Kannan --- .github/workflows/main.yml | 2 +- .../tests/integration_tests/__init__.py | 0 .../integration_tests/test_chat_models.py | 420 ++++++++++++++++++ .../tests/integration_tests/test_compile.py | 7 + .../integration_tests/test_embeddings.py | 29 ++ .../integration_tests/test_vectorstore.py | 46 ++ .../langchain/tests/unit_tests/__init__.py | 0 .../{ => unit_tests}/test_chat_models.py | 0 .../tests/{ => unit_tests}/test_embeddings.py | 0 .../tests/{ => unit_tests}/test_genie.py | 0 .../{ => unit_tests}/test_vectorstores.py | 0 11 files changed, 503 insertions(+), 1 deletion(-) create mode 100644 integrations/langchain/tests/integration_tests/__init__.py create mode 100644 integrations/langchain/tests/integration_tests/test_chat_models.py create mode 100644 integrations/langchain/tests/integration_tests/test_compile.py create mode 100644 integrations/langchain/tests/integration_tests/test_embeddings.py create mode 100644 integrations/langchain/tests/integration_tests/test_vectorstore.py create mode 100644 integrations/langchain/tests/unit_tests/__init__.py rename integrations/langchain/tests/{ => unit_tests}/test_chat_models.py (100%) rename integrations/langchain/tests/{ => unit_tests}/test_embeddings.py (100%) rename integrations/langchain/tests/{ => unit_tests}/test_genie.py (100%) rename integrations/langchain/tests/{ => unit_tests}/test_vectorstores.py (100%) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fa5e819..08781c6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -68,4 +68,4 @@ jobs: pip install integrations/langchain[dev] - name: Run tests run: | - pytest integrations/langchain/tests + pytest integrations/langchain/tests/unit_tests diff --git a/integrations/langchain/tests/integration_tests/__init__.py b/integrations/langchain/tests/integration_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/integrations/langchain/tests/integration_tests/test_chat_models.py b/integrations/langchain/tests/integration_tests/test_chat_models.py new file mode 100644 index 0000000..cbce738 --- /dev/null +++ b/integrations/langchain/tests/integration_tests/test_chat_models.py @@ -0,0 +1,420 @@ +""" +This file contains the integration test for ChatDatabricks class. + +We run the integration tests nightly by the trusted CI/CD system defined in +a private repository, in order to securely run the tests. With this design, +integration test is not intended to be run manually by OSS contributors. +If you want to update the ChatDatabricks implementation and you think that you +need to update the corresponding integration test, please contact to the +maintainers of the repository to verify the changes. +""" + +from typing import Annotated +from unittest import mock + +import pytest +from langchain.agents import AgentExecutor, create_tool_calling_agent +from langchain_core.callbacks.base import BaseCallbackHandler +from langchain_core.messages import ( + AIMessage, + AIMessageChunk, + HumanMessage, + SystemMessage, + ToolMessage, +) +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.tools import tool +from langgraph.checkpoint.memory import MemorySaver +from langgraph.graph import START, StateGraph +from langgraph.graph.message import add_messages +from langgraph.prebuilt import ToolNode, create_react_agent, tools_condition +from pydantic import BaseModel, Field +from typing_extensions import TypedDict + +from databricks_langchain.chat_models import ChatDatabricks + +_TEST_ENDPOINT = "databricks-meta-llama-3-70b-instruct" + + +def test_chat_databricks_invoke(): + chat = ChatDatabricks(endpoint=_TEST_ENDPOINT, temperature=0, max_tokens=10, stop=["Java"]) + + response = chat.invoke("How to learn Java? Start the response by 'To learn Java,'") + assert isinstance(response, AIMessage) + assert response.content == "To learn " + assert response.response_metadata["prompt_tokens"] == 24 + assert response.response_metadata["completion_tokens"] == 3 + assert response.response_metadata["total_tokens"] == 27 + + response = chat.invoke("How to learn Python? Start the response by 'To learn Python,'") + assert response.content.startswith("To learn Python,") + assert len(response.content.split(" ")) <= 15 # Give some margin for tokenization difference + + # Call with a system message + response = chat.invoke( + [ + ("system", "You are helpful programming tutor."), + ("user", "How to learn Python? Start the response by 'To learn Python,'"), + ] + ) + assert response.content.startswith("To learn Python,") + + # Call with message history + response = chat.invoke( + [ + SystemMessage(content="You are helpful sports coach."), + HumanMessage(content="How to swim better?"), + AIMessage(content="You need more and more practice.", id="12345"), + HumanMessage(content="No, I need more tips."), + ] + ) + assert response.content is not None + + +def test_chat_databricks_invoke_multiple_completions(): + chat = ChatDatabricks( + endpoint=_TEST_ENDPOINT, + temperature=0.5, + n=3, + max_tokens=10, + ) + response = chat.invoke("How to learn Python?") + assert isinstance(response, AIMessage) + + +def test_chat_databricks_stream(): + class FakeCallbackHandler(BaseCallbackHandler): + def __init__(self): + self.chunk_counts = 0 + + def on_llm_new_token(self, *args, **kwargs): + self.chunk_counts += 1 + + callback = FakeCallbackHandler() + + chat = ChatDatabricks( + endpoint=_TEST_ENDPOINT, + temperature=0, + stop=["Python"], + max_tokens=100, + ) + + chunks = list(chat.stream("How to learn Python?", config={"callbacks": [callback]})) + assert len(chunks) > 0 + assert all(isinstance(chunk, AIMessageChunk) for chunk in chunks) + assert all("Python" not in chunk.content for chunk in chunks) + assert callback.chunk_counts == len(chunks) + + last_chunk = chunks[-1] + assert last_chunk.response_metadata["finish_reason"] == "stop" + + +def test_chat_databricks_stream_with_usage(): + class FakeCallbackHandler(BaseCallbackHandler): + def __init__(self): + self.chunk_counts = 0 + + def on_llm_new_token(self, *args, **kwargs): + self.chunk_counts += 1 + + callback = FakeCallbackHandler() + + chat = ChatDatabricks( + endpoint=_TEST_ENDPOINT, + temperature=0, + stop=["Python"], + max_tokens=100, + stream_usage=True, + ) + + chunks = list(chat.stream("How to learn Python?", config={"callbacks": [callback]})) + assert len(chunks) > 0 + assert all(isinstance(chunk, AIMessageChunk) for chunk in chunks) + assert all("Python" not in chunk.content for chunk in chunks) + assert callback.chunk_counts == len(chunks) + + last_chunk = chunks[-1] + assert last_chunk.response_metadata["finish_reason"] == "stop" + assert last_chunk.usage_metadata is not None + assert last_chunk.usage_metadata["input_tokens"] > 0 + assert last_chunk.usage_metadata["output_tokens"] > 0 + assert last_chunk.usage_metadata["total_tokens"] > 0 + + +@pytest.mark.asyncio +async def test_chat_databricks_ainvoke(): + chat = ChatDatabricks( + endpoint=_TEST_ENDPOINT, + temperature=0, + max_tokens=10, + ) + + response = await chat.ainvoke("How to learn Python? Start the response by 'To learn Python,'") + assert isinstance(response, AIMessage) + assert response.content.startswith("To learn Python,") + + +async def test_chat_databricks_astream(): + chat = ChatDatabricks( + endpoint=_TEST_ENDPOINT, + temperature=0, + max_tokens=10, + ) + chunk_count = 0 + async for chunk in chat.astream("How to learn Python?"): + assert isinstance(chunk, AIMessageChunk) + chunk_count += 1 + assert chunk_count > 0 + + +@pytest.mark.asyncio +async def test_chat_databricks_abatch(): + chat = ChatDatabricks( + endpoint=_TEST_ENDPOINT, + temperature=0, + max_tokens=10, + ) + + responses = await chat.abatch( + [ + "How to learn Python?", + "How to learn Java?", + "How to learn C++?", + ] + ) + assert len(responses) == 3 + assert all(isinstance(response, AIMessage) for response in responses) + + +@pytest.mark.parametrize("tool_choice", [None, "auto", "required", "any", "none"]) +def test_chat_databricks_tool_calls(tool_choice): + chat = ChatDatabricks( + endpoint=_TEST_ENDPOINT, + temperature=0, + max_tokens=100, + ) + + class GetWeather(BaseModel): + """Get the current weather in a given location""" + + location: str = Field(..., description="The city and state, e.g. San Francisco, CA") + + llm_with_tools = chat.bind_tools([GetWeather], tool_choice=tool_choice) + question = "Which is the current weather in Los Angeles, CA?" + response = llm_with_tools.invoke(question) + + if tool_choice == "none": + assert response.tool_calls == [] + return + + assert response.tool_calls == [ + { + "name": "GetWeather", + "args": {"location": "Los Angeles, CA"}, + "id": mock.ANY, + "type": "tool_call", + } + ] + + tool_msg = ToolMessage( + "GetWeather", + tool_call_id=response.additional_kwargs["tool_calls"][0]["id"], + ) + response = llm_with_tools.invoke( + [ + HumanMessage(question), + response, + tool_msg, + HumanMessage("What about San Francisco, CA?"), + ] + ) + + assert response.tool_calls == [ + { + "name": "GetWeather", + "args": {"location": "San Francisco, CA"}, + "id": mock.ANY, + "type": "tool_call", + } + ] + + +# Pydantic-based schema +class AnswerWithJustification(BaseModel): + """An answer to the user question along with justification for the answer.""" + + answer: str = Field(description="The answer to the user question.") + justification: str = Field(description="The justification for the answer.") + + +# Raw JSON schema +JSON_SCHEMA = { + "title": "AnswerWithJustification", + "description": "An answer to the user question along with justification.", + "type": "object", + "properties": { + "answer": { + "type": "string", + "description": "The answer to the user question.", + }, + "justification": { + "type": "string", + "description": "The justification for the answer.", + }, + }, + "required": ["answer", "justification"], +} + + +@pytest.mark.parametrize("schema", [AnswerWithJustification, JSON_SCHEMA, None]) +@pytest.mark.parametrize("method", ["function_calling", "json_mode"]) +def test_chat_databricks_with_structured_output(schema, method): + llm = ChatDatabricks(endpoint=_TEST_ENDPOINT) + + if schema is None and method == "function_calling": + pytest.skip("Cannot use function_calling without schema") + + structured_llm = llm.with_structured_output(schema, method=method) + + if method == "function_calling": + prompt = "What day comes two days after Monday?" + else: + prompt = ( + "What day comes two days after Monday? Return in JSON format with key " + "'answer' for the answer and 'justification' for the justification." + ) + + response = structured_llm.invoke(prompt) + + if schema == AnswerWithJustification: + assert response.answer == "Wednesday" + assert response.justification is not None + else: + assert response["answer"] == "Wednesday" + assert response["justification"] is not None + + # Invoke with raw output + structured_llm = llm.with_structured_output(schema, method=method, include_raw=True) + response_with_raw = structured_llm.invoke(prompt) + assert isinstance(response_with_raw["raw"], AIMessage) + + +def test_chat_databricks_runnable_sequence(): + chat = ChatDatabricks( + endpoint=_TEST_ENDPOINT, + temperature=0, + max_tokens=100, + ) + + prompt = ChatPromptTemplate.from_template("tell me a joke about {topic}") + chain = prompt | chat | StrOutputParser() + + response = chain.invoke({"topic": "chicken"}) + assert "chicken" in response + + +@tool +def add(a: int, b: int) -> int: + """Add two integers. + + Args: + a: First integer + b: Second integer + """ + return a + b + + +@tool +def multiply(a: int, b: int) -> int: + """Multiply two integers. + + Args: + a: First integer + b: Second integer + """ + return a * b + + +def test_chat_databricks_agent_executor(): + model = ChatDatabricks( + endpoint=_TEST_ENDPOINT, + temperature=0, + max_tokens=100, + ) + tools = [add, multiply] + prompt = ChatPromptTemplate.from_messages( + [ + ("system", "You are a helpful assistant"), + ("human", "{input}"), + ("placeholder", "{agent_scratchpad}"), + ] + ) + + agent = create_tool_calling_agent(model, tools, prompt) + agent_executor = AgentExecutor(agent=agent, tools=tools) + + response = agent_executor.invoke({"input": "What is (10 + 5) * 3?"}) + assert "45" in response["output"] + + +def test_chat_databricks_langgraph(): + model = ChatDatabricks( + endpoint=_TEST_ENDPOINT, + temperature=0, + max_tokens=100, + ) + tools = [add, multiply] + + app = create_react_agent(model, tools) + response = app.invoke({"messages": [("human", "What is (10 + 5) * 3?")]}) + assert "45" in response["messages"][-1].content + + +def test_chat_databricks_langgraph_with_memory(): + class State(TypedDict): + messages: Annotated[list, add_messages] + + tools = [add, multiply] + llm = ChatDatabricks( + endpoint=_TEST_ENDPOINT, + temperature=0, + max_tokens=100, + ) + llm_with_tools = llm.bind_tools(tools) + + def chatbot(state: State): + return {"messages": [llm_with_tools.invoke(state["messages"])]} + + graph_builder = StateGraph(State) + graph_builder.add_node("chatbot", chatbot) + + tool_node = ToolNode(tools=tools) + graph_builder.add_node("tools", tool_node) + graph_builder.add_conditional_edges("chatbot", tools_condition) + # Any time a tool is called, we return to the chatbot to decide the next step + graph_builder.add_edge("tools", "chatbot") + graph_builder.add_edge(START, "chatbot") + + graph = graph_builder.compile(checkpointer=MemorySaver()) + + response = graph.invoke( + {"messages": [("user", "What is (10 + 5) * 3?")]}, + config={"configurable": {"thread_id": "1"}}, + ) + assert "45" in response["messages"][-1].content + + response = graph.invoke( + {"messages": [("user", "Subtract 5 from it")]}, + config={"configurable": {"thread_id": "1"}}, + ) + + # Interestingly, the agent sometimes mistakes the subtraction for addition:( + # In such case, the agent asks for a retry so we need one more step. + if "Let me try again." in response["messages"][-1].content: + response = graph.invoke( + {"messages": [("user", "Ok, try again")]}, + config={"configurable": {"thread_id": "1"}}, + ) + + assert "40" in response["messages"][-1].content diff --git a/integrations/langchain/tests/integration_tests/test_compile.py b/integrations/langchain/tests/integration_tests/test_compile.py new file mode 100644 index 0000000..33ecccd --- /dev/null +++ b/integrations/langchain/tests/integration_tests/test_compile.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.mark.compile +def test_placeholder() -> None: + """Used for compiling integration tests without running any real tests.""" + pass diff --git a/integrations/langchain/tests/integration_tests/test_embeddings.py b/integrations/langchain/tests/integration_tests/test_embeddings.py new file mode 100644 index 0000000..3abda59 --- /dev/null +++ b/integrations/langchain/tests/integration_tests/test_embeddings.py @@ -0,0 +1,29 @@ +""" +This file contains the integration test for DatabricksEmbeddings class. + +We run the integration tests nightly by the trusted CI/CD system defined in +a private repository, in order to securely run the tests. With this design, +integration test is not intended to be run manually by OSS contributors. +If you want to update the DatabricksEmbeddings implementation and you think +that you need to update the corresponding integration test, please contact to +the maintainers of the repository to verify the changes. +""" + +from databricks_langchain import DatabricksEmbeddings + +_TEST_ENDPOINT = "databricks-bge-large-en" + + +def test_embedding_documents() -> None: + documents = ["foo bar"] + embedding = DatabricksEmbeddings(endpoint=_TEST_ENDPOINT) + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) > 0 + + +def test_embedding_query() -> None: + document = "foo bar" + embedding = DatabricksEmbeddings(endpoint=_TEST_ENDPOINT) + output = embedding.embed_query(document) + assert len(output) > 0 diff --git a/integrations/langchain/tests/integration_tests/test_vectorstore.py b/integrations/langchain/tests/integration_tests/test_vectorstore.py new file mode 100644 index 0000000..767deb7 --- /dev/null +++ b/integrations/langchain/tests/integration_tests/test_vectorstore.py @@ -0,0 +1,46 @@ +""" +This file contains the integration test for DatabricksVectorSearch class. + +We run the integration tests nightly by the trusted CI/CD system defined in +a private repository, in order to securely run the tests. With this design, +integration test is not intended to be run manually by OSS contributors. +If you want to update the DatabricksVectorSearch implementation and you think +that you need to update the corresponding integration test, please contact to +the maintainers of the repository to verify the changes. +""" + +import os +from datetime import timedelta + +import pytest +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.jobs import RunLifecycleStateV2State, TerminationTypeType + + +@pytest.mark.timeout(3600) +def test_vectorstore(): + """ + We run the integration tests for vector store by Databricks Workflow, + because the setup is too complex to run within a single python file. + Thereby, this test simply triggers the workflow by calling the REST API. + """ + test_job_id = os.getenv("VS_TEST_JOB_ID") + if not test_job_id: + raise RuntimeError("Please set the environment variable VS_TEST_JOB_ID") + + w = WorkspaceClient() + + # Check if there is any ongoing job run + run_list = list(w.jobs.list_runs(job_id=test_job_id, active_only=True)) + no_active_run = len(run_list) == 0 + assert no_active_run, "There is an ongoing job run. Please wait for it to complete." + + # Trigger the workflow + response = w.jobs.run_now(job_id=test_job_id) + job_url = f"{w.config.host}/jobs/{test_job_id}/runs/{response.run_id}" + print(f"Started the job at {job_url}") # noqa: T201 + + # Wait for the job to complete + result = response.result(timeout=timedelta(seconds=3600)) + assert result.status.state == RunLifecycleStateV2State.TERMINATED + assert result.status.termination_details.type == TerminationTypeType.SUCCESS diff --git a/integrations/langchain/tests/unit_tests/__init__.py b/integrations/langchain/tests/unit_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/integrations/langchain/tests/test_chat_models.py b/integrations/langchain/tests/unit_tests/test_chat_models.py similarity index 100% rename from integrations/langchain/tests/test_chat_models.py rename to integrations/langchain/tests/unit_tests/test_chat_models.py diff --git a/integrations/langchain/tests/test_embeddings.py b/integrations/langchain/tests/unit_tests/test_embeddings.py similarity index 100% rename from integrations/langchain/tests/test_embeddings.py rename to integrations/langchain/tests/unit_tests/test_embeddings.py diff --git a/integrations/langchain/tests/test_genie.py b/integrations/langchain/tests/unit_tests/test_genie.py similarity index 100% rename from integrations/langchain/tests/test_genie.py rename to integrations/langchain/tests/unit_tests/test_genie.py diff --git a/integrations/langchain/tests/test_vectorstores.py b/integrations/langchain/tests/unit_tests/test_vectorstores.py similarity index 100% rename from integrations/langchain/tests/test_vectorstores.py rename to integrations/langchain/tests/unit_tests/test_vectorstores.py