diff --git a/.github/workflows/_lint.yml b/.github/workflows/_lint.yml index 2e21b948..b02f7603 100644 --- a/.github/workflows/_lint.yml +++ b/.github/workflows/_lint.yml @@ -69,7 +69,7 @@ jobs: # It doesn't matter how you change it, any change will cause a cache-bust. working-directory: ${{ inputs.working-directory }} run: | - poetry install --with lint,typing + poetry install --with lint,typing --all-extras - name: Install langchain editable working-directory: ${{ inputs.working-directory }} @@ -88,7 +88,6 @@ jobs: ${{ env.WORKDIR }}/.mypy_cache key: mypy-lint-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', inputs.working-directory)) }} - - name: Analysing the code with our lint working-directory: ${{ inputs.working-directory }} run: | diff --git a/libs/community/langchain_google_community/documentai_warehouse.py b/libs/community/langchain_google_community/documentai_warehouse.py index bdf5672b..a7a8ac21 100644 --- a/libs/community/langchain_google_community/documentai_warehouse.py +++ b/libs/community/langchain_google_community/documentai_warehouse.py @@ -40,7 +40,7 @@ class DocumentAIWarehouseRetriever(BaseRetriever): If nothing is provided, all documents in the project will be searched.""" qa_size_limit: int = 5 """The limit on the number of documents returned.""" - client: "DocumentServiceClient" = None #: :meta private: + client: "DocumentServiceClient" = None # type:ignore[assignment] #: :meta private: @model_validator(mode="before") @classmethod diff --git a/libs/vertexai/langchain_google_vertexai/_anthropic_utils.py b/libs/vertexai/langchain_google_vertexai/_anthropic_utils.py index cb12c633..fd3721d2 100644 --- a/libs/vertexai/langchain_google_vertexai/_anthropic_utils.py +++ b/libs/vertexai/langchain_google_vertexai/_anthropic_utils.py @@ -59,6 +59,74 @@ def _format_image(image_url: str) -> Dict: } +def _format_message_anthropic(message: Union[HumanMessage, AIMessage]): + role = _message_type_lookups[message.type] + content: List[Dict[str, Any]] = [] + + if isinstance(message.content, str): + if not message.content.strip(): + return None + content.append({"type": "text", "text": message.content}) + elif isinstance(message.content, list): + for block in message.content: + if isinstance(block, str): + # Only add non-empty strings for now as empty ones are not + # accepted. + # https://github.com/anthropics/anthropic-sdk-python/issues/461 + if not block.strip(): + continue + content.append({"type": "text", "text": block}) + + if isinstance(block, dict): + if "type" not in block: + raise ValueError("Dict content block must have a type key") + + new_block = {} + + for copy_attr in ["type", "cache_control"]: + if copy_attr in block: + new_block[copy_attr] = block[copy_attr] + + if block["type"] == "text": + text: str = block.get("text", "") + # Only add non-empty strings for now as empty ones are not + # accepted. + # https://github.com/anthropics/anthropic-sdk-python/issues/461 + if text.strip(): + new_block["text"] = text + content.append(new_block) + continue + + if block["type"] == "image_url": + # convert format + new_block["source"] = _format_image(block["image_url"]["url"]) + content.append(new_block) + continue + + if block["type"] == "tool_use": + # If a tool_call with the same id as a tool_use content block + # exists, the tool_call is preferred. + if isinstance(message, AIMessage) and message.tool_calls: + is_unique = block["id"] not in [ + tc["id"] for tc in message.tool_calls + ] + if not is_unique: + continue + + # all other block types + content.append(block) + else: + raise ValueError("Message should be a str, list of str or list of dicts") + + # adding all tool calls + if isinstance(message, AIMessage) and message.tool_calls: + for tc in message.tool_calls: + tu = cast(Dict[str, Any], _lc_tool_call_to_anthropic_tool_use_block(tc)) + content.append(tu) + + return {"role": role, "content": content} + + def _format_messages_anthropic( messages: List[BaseMessage], ) -> Tuple[Optional[str], List[Dict]]: @@ -79,81 +147,11 @@ def _format_messages_anthropic( system_message = message.content continue - role = _message_type_lookups[message.type] - content: Union[str, List] - - if not isinstance(message.content, str): - # parse as dict - assert isinstance( - message.content, list - ), "Anthropic message content must be str or list of dicts" - - # populate content - content = [] - for item in message.content: - if isinstance(item, str): - content.append( - { - "type": "text", - "text": item, - } - ) - elif isinstance(item, dict): - if "type" not in item: - raise ValueError("Dict content item must have a type key") - elif item["type"] == "image_url": - # convert format - source = _format_image(item["image_url"]["url"]) - content.append( - { - "type": "image", - "source": source, - } - ) - elif item["type"] == "tool_use": - # If a tool_call with the same id as a tool_use content block - # exists, the tool_call is preferred. - if isinstance(message, AIMessage) and item["id"] in [ - tc["id"] for tc in message.tool_calls - ]: - overlapping = [ - tc - for tc in message.tool_calls - if tc["id"] == item["id"] - ] - content.extend( - _lc_tool_calls_to_anthropic_tool_use_blocks(overlapping) - ) - else: - item.pop("text", None) - content.append(item) - elif item["type"] == "text": - text = item.get("text", "") - # Only add non-empty strings for now as empty ones are not - # accepted. - # https://github.com/anthropics/anthropic-sdk-python/issues/461 - if text.strip(): - content.append({"type": "text", "text": text}) - else: - content.append(item) - else: - raise ValueError( - f"Content items must be str or dict, instead was: {type(item)}" - ) - elif isinstance(message, AIMessage) and message.tool_calls: - content = ( - [] - if not message.content - else [{"type": "text", "text": message.content}] - ) - # Note: Anthropic can't have invalid tool calls as presently defined, - # since the model already returns dicts args not JSON strings, and invalid - # tool calls are those with invalid JSON for args. - content += _lc_tool_calls_to_anthropic_tool_use_blocks(message.tool_calls) - else: - content = message.content + fm = _format_message_anthropic(message) + if not fm: + continue + formatted_messages.append(fm) - formatted_messages.append({"role": role, "content": content}) return system_message, formatted_messages @@ -186,7 +184,7 @@ def _merge_messages( """Merge runs of human/tool messages into single human messages with content blocks.""" # noqa: E501 merged: list = [] for curr in messages: - curr = curr.copy(deep=True) + curr = curr.model_copy(deep=True) if isinstance(curr, ToolMessage): if isinstance(curr.content, list) and all( isinstance(block, dict) and block.get("type") == "tool_result" @@ -226,20 +224,15 @@ class _AnthropicToolUse(TypedDict): id: str -def _lc_tool_calls_to_anthropic_tool_use_blocks( - tool_calls: List[ToolCall], -) -> List[_AnthropicToolUse]: - blocks = [] - for tool_call in tool_calls: - blocks.append( - _AnthropicToolUse( - type="tool_use", - name=tool_call["name"], - input=tool_call["args"], - id=cast(str, tool_call["id"]), - ) - ) - return blocks +def _lc_tool_call_to_anthropic_tool_use_block( + tool_call: ToolCall, +) -> _AnthropicToolUse: + return _AnthropicToolUse( + type="tool_use", + name=tool_call["name"], + input=tool_call["args"], + id=cast(str, tool_call["id"]), + ) def _make_message_chunk_from_anthropic_event( diff --git a/libs/vertexai/tests/unit_tests/test_anthropic_utils.py b/libs/vertexai/tests/unit_tests/test_anthropic_utils.py new file mode 100644 index 00000000..7f83bfb3 --- /dev/null +++ b/libs/vertexai/tests/unit_tests/test_anthropic_utils.py @@ -0,0 +1,480 @@ +import pytest +from langchain_core.messages import ( + AIMessage, + HumanMessage, + SystemMessage, + ToolMessage, +) +from langchain_core.messages.tool import tool_call as create_tool_call + +from langchain_google_vertexai.model_garden import _format_messages_anthropic + + +@pytest.mark.parametrize( + "source_history, expected_sm, expected_history", + [ + ( + [ + AIMessage( + content="", + ), + ], + None, + [], + ), + ( + [ + AIMessage( + content=[], + ), + ], + None, + [], + ), + ( + [ + AIMessage( + content=[""], + ), + ], + None, + [], + ), + ( + [ + AIMessage( + content=["", "Mike age is 30"], + ), + ], + None, + [ + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Mike age is 30", + } + ], + } + ], + ), + ( + [ + AIMessage( + content="Mike age is 30", + ), + ], + None, + [ + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Mike age is 30", + } + ], + } + ], + ), + ( + [ + AIMessage( + content=[{"type": "text", "text": "Mike age is 30"}], + ), + ], + None, + [ + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Mike age is 30", + } + ], + } + ], + ), + ( + [ + AIMessage( + content=[{"type": "text", "text": ""}], + ), + ], + None, + [], + ), + ( + [ + SystemMessage(content="test1"), + AIMessage( + content="Mike age is 30", + ), + ], + "test1", + [ + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Mike age is 30", + } + ], + } + ], + ), + ( + [ + AIMessage( + content=["Mike age is 30", "Arthur age is 30"], + ), + ], + None, + [ + { + "role": "assistant", + "content": [ + {"type": "text", "text": "Mike age is 30"}, + {"type": "text", "text": "Arthur age is 30"}, + ], + } + ], + ), + ( + [ + HumanMessage( + content=[ + { + "type": "image_url", + "image_url": {"url": "data:image/png;base64,/9j/4AAQSk"}, + } + ], + ), + ], + None, + [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "/9j/4AAQSk", + }, + }, + ], + } + ], + ), + ( + [ + AIMessage( + content="", + tool_calls=[ + create_tool_call( + name="Information", + args={"name": "Ben"}, + id="00000000-0000-0000-0000-00000000000", + ), + ], + ), + ], + None, + [ + { + "role": "assistant", + "content": [ + { + "type": "tool_use", + "name": "Information", + "input": {"name": "Ben"}, + "id": "00000000-0000-0000-0000-00000000000", + } + ], + } + ], + ), + ( + [ + AIMessage( + content=[], + tool_calls=[ + create_tool_call( + name="Information", + args={"name": "Ben"}, + id="00000000-0000-0000-0000-00000000000", + ), + ], + ), + ], + None, + [ + { + "role": "assistant", + "content": [ + { + "type": "tool_use", + "name": "Information", + "input": {"name": "Ben"}, + "id": "00000000-0000-0000-0000-00000000000", + } + ], + } + ], + ), + ( + [ + AIMessage( + content="Mike age is 30", + tool_calls=[ + create_tool_call( + name="Information", + args={"name": "Ben"}, + id="00000000-0000-0000-0000-00000000000", + ), + ], + ), + ], + None, + [ + { + "role": "assistant", + "content": [ + {"type": "text", "text": "Mike age is 30"}, + { + "type": "tool_use", + "name": "Information", + "input": {"name": "Ben"}, + "id": "00000000-0000-0000-0000-00000000000", + }, + ], + } + ], + ), + ( + [ + AIMessage( + content=["Mike age is 30", "Arthur age is 30"], + tool_calls=[ + create_tool_call( + name="Information", + args={"name": "Ben"}, + id="00000000-0000-0000-0000-00000000000", + ), + ], + ), + ], + None, + [ + { + "role": "assistant", + "content": [ + {"type": "text", "text": "Mike age is 30"}, + {"type": "text", "text": "Arthur age is 30"}, + { + "type": "tool_use", + "name": "Information", + "input": {"name": "Ben"}, + "id": "00000000-0000-0000-0000-00000000000", + }, + ], + } + ], + ), + ( + [ + AIMessage( + content=["Mike age is 30"], + tool_calls=[ + create_tool_call( + name="Information", + args={"name": "Rob"}, + id="00000000-0000-0000-0000-00000000000", + ), + ], + ), + AIMessage( + content=["Arthur age is 30"], + tool_calls=[ + create_tool_call( + name="Information", + args={"name": "Ben"}, + id="00000000-0000-0000-0000-00000000000", + ), + ], + ), + ], + None, + [ + { + "role": "assistant", + "content": [ + {"type": "text", "text": "Mike age is 30"}, + { + "type": "tool_use", + "name": "Information", + "input": {"name": "Rob"}, + "id": "00000000-0000-0000-0000-00000000000", + }, + ], + }, + { + "role": "assistant", + "content": [ + {"type": "text", "text": "Arthur age is 30"}, + { + "type": "tool_use", + "name": "Information", + "input": {"name": "Ben"}, + "id": "00000000-0000-0000-0000-00000000000", + }, + ], + }, + ], + ), + ( + [ + AIMessage( + content=[ + { + "type": "tool_use", + "name": "Information", + "input": {"name": "Ben"}, + "id": "0", + } + ], + tool_calls=[ + create_tool_call( + name="Information", + args={"name": "Rob"}, + id="0", + ), + ], + ), + ], + None, + [ + { + "role": "assistant", + "content": [ + { + "type": "tool_use", + "name": "Information", + "input": {"name": "Rob"}, + "id": "0", + }, + ], + }, + ], + ), + ( + [ + AIMessage( + content=[ + { + "type": "tool_use", + "name": "Information", + "input": {"name": "Ben"}, + "id": "0", + } + ], + tool_calls=[ + create_tool_call( + name="Information", + args={"name": "Rob"}, + id="1", + ), + ], + ), + ], + None, + [ + { + "role": "assistant", + "content": [ + { + "type": "tool_use", + "name": "Information", + "input": {"name": "Ben"}, + "id": "0", + }, + { + "type": "tool_use", + "name": "Information", + "input": {"name": "Rob"}, + "id": "1", + }, + ], + }, + ], + ), + ( + [ + AIMessage( + content=[ + { + "type": "tool_use", + "name": "Information", + "cache_control": { + "type": "ephemeral", + }, + "input": {"name": "Ben"}, + "id": "0", + }, + ], + ), + ], + None, + [ + { + "role": "assistant", + "content": [ + { + "type": "tool_use", + "name": "Information", + "cache_control": { + "type": "ephemeral", + }, + "input": {"name": "Ben"}, + "id": "0", + }, + ], + }, + ], + ), + ( + [ + ToolMessage( + content="test", + tool_call_id="0", + ), + ], + None, + [ + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "0", + "content": "test", + }, + ], + }, + ], + ), + ], +) +def test_format_messages_anthropic( + source_history, expected_sm, expected_history +) -> None: + sm, result_history = _format_messages_anthropic(source_history) + + for result, expected in zip(result_history, expected_history): + assert result == expected + assert sm == expected_sm