From 2410b812a6494d94c797944db9f6e652832e95a3 Mon Sep 17 00:00:00 2001 From: maxmet91 Date: Sun, 24 Nov 2024 10:07:50 +0000 Subject: [PATCH] genai[feat]: add filetype detection for images from bytes and implement new multimodal test --- .../langchain_google_genai/_image_utils.py | 7 +++++ libs/genai/poetry.lock | 13 ++++++++- libs/genai/pyproject.toml | 1 + .../integration_tests/test_chat_models.py | 27 +++++++++++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/libs/genai/langchain_google_genai/_image_utils.py b/libs/genai/langchain_google_genai/_image_utils.py index a0baba49..4fabcd84 100644 --- a/libs/genai/langchain_google_genai/_image_utils.py +++ b/libs/genai/langchain_google_genai/_image_utils.py @@ -9,6 +9,7 @@ from urllib.parse import urlparse import requests +import filetype from google.ai.generativelanguage_v1beta.types import Part @@ -87,7 +88,13 @@ def load_part(self, image_string: str) -> Part: raise ValueError(msg) inline_data: Dict[str, Any] = {"data": bytes_} + mime_type, _ = mimetypes.guess_type(image_string) + if not mime_type: + kind = filetype.guess(bytes_) + if kind: + mime_type = kind.mime + if mime_type: inline_data["mime_type"] = mime_type diff --git a/libs/genai/poetry.lock b/libs/genai/poetry.lock index 4f0f797f..e3d68883 100644 --- a/libs/genai/poetry.lock +++ b/libs/genai/poetry.lock @@ -211,6 +211,17 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "filetype" +version = "1.2.0" +description = "Infer file type and MIME type of any file/buffer. No external dependencies." +optional = false +python-versions = "*" +files = [ + {file = "filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25"}, + {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, +] + [[package]] name = "freezegun" version = "1.5.1" @@ -1470,4 +1481,4 @@ watchmedo = ["PyYAML (>=3.10)"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "988b5a557506419d3750476d499929c8e3d3f7a0f37f4ca8c5ec5064227f2671" +content-hash = "07647d94d5cbab0a6be38f492be4a97837ad4164d1437d9a11723235a34c9e45" diff --git a/libs/genai/pyproject.toml b/libs/genai/pyproject.toml index 3f3e4105..a5344385 100644 --- a/libs/genai/pyproject.toml +++ b/libs/genai/pyproject.toml @@ -15,6 +15,7 @@ python = ">=3.9,<4.0" langchain-core = ">=0.3.15,<0.4" google-generativeai = "^0.8.0" pydantic = ">=2,<3" +filetype = "^1.2.0" [tool.poetry.group.test] optional = true diff --git a/libs/genai/tests/integration_tests/test_chat_models.py b/libs/genai/tests/integration_tests/test_chat_models.py index 225ffa79..3a15b9d4 100644 --- a/libs/genai/tests/integration_tests/test_chat_models.py +++ b/libs/genai/tests/integration_tests/test_chat_models.py @@ -158,6 +158,33 @@ def test_chat_google_genai_invoke_multimodal() -> None: assert len(chunk.content.strip()) > 0 +def test_chat_google_genai_invoke_multimodal_by_url() -> None: + messages: list = [ + HumanMessage( + content=[ + { + "type": "text", + "text": "Guess what's in this picture! You have 3 guesses.", + }, + { + "type": "image_url", + "image_url": "https://picsum.photos/seed/picsum/200/300", + }, + ] + ), + ] + llm = ChatGoogleGenerativeAI(model=_VISION_MODEL) + response = llm.invoke(messages) + assert isinstance(response.content, str) + assert len(response.content.strip()) > 0 + + # Try streaming + for chunk in llm.stream(messages): + print(chunk) # noqa: T201 + assert isinstance(chunk.content, str) + assert len(chunk.content.strip()) > 0 + + def test_chat_google_genai_invoke_multimodal_multiple_messages() -> None: messages: list = [ HumanMessage(content="Hi there"),