Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
lkuligin authored Dec 2, 2024
2 parents 81a2fdc + b33d7b6 commit 1a1d787
Show file tree
Hide file tree
Showing 22 changed files with 1,276 additions and 741 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/_lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
# It doesn't matter how you change it, any change will cause a cache-bust.
working-directory: ${{ inputs.working-directory }}
run: |
poetry install --with lint,typing
poetry install --with lint,typing --all-extras
- name: Install langchain editable
working-directory: ${{ inputs.working-directory }}
Expand All @@ -88,7 +88,6 @@ jobs:
${{ env.WORKDIR }}/.mypy_cache
key: mypy-lint-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', inputs.working-directory)) }}


- name: Analysing the code with our lint
working-directory: ${{ inputs.working-directory }}
run: |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class DocumentAIWarehouseRetriever(BaseRetriever):
If nothing is provided, all documents in the project will be searched."""
qa_size_limit: int = 5
"""The limit on the number of documents returned."""
client: "DocumentServiceClient" = None #: :meta private:
client: "DocumentServiceClient" = None # type:ignore[assignment] #: :meta private:

@model_validator(mode="before")
@classmethod
Expand Down
7 changes: 7 additions & 0 deletions libs/community/langchain_google_community/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class GoogleDriveLoader(BaseLoader, BaseModel):
"""Path to the credentials file."""
token_path: Path = Path.home() / ".credentials" / "token.json"
"""Path to the token file."""
credentials: Any = None
"""Your own google credentials created via your own mechanism"""
folder_id: Optional[str] = None
"""The folder id to load from."""
document_ids: Optional[List[str]] = None
Expand Down Expand Up @@ -276,6 +278,11 @@ def _load_credentials(self) -> Any:
if self.token_path.exists():
creds = Credentials.from_authorized_user_file(str(self.token_path), SCOPES)

if self.credentials:
# use whatever was passed to us
creds = self.credentials
return creds

if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
Expand Down
2 changes: 1 addition & 1 deletion libs/community/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain-google-community"
version = "2.0.2"
version = "2.0.3"
description = "An integration package connecting miscellaneous Google's products and LangChain"
authors = []
readme = "README.md"
Expand Down
7 changes: 7 additions & 0 deletions libs/genai/langchain_google_genai/_image_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Any, Dict
from urllib.parse import urlparse

import filetype # type: ignore[import]
import requests
from google.ai.generativelanguage_v1beta.types import Part

Expand Down Expand Up @@ -87,7 +88,13 @@ def load_part(self, image_string: str) -> Part:
raise ValueError(msg)

inline_data: Dict[str, Any] = {"data": bytes_}

mime_type, _ = mimetypes.guess_type(image_string)
if not mime_type:
kind = filetype.guess(bytes_)
if kind:
mime_type = kind.mime

if mime_type:
inline_data["mime_type"] = mime_type

Expand Down
13 changes: 12 additions & 1 deletion libs/genai/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion libs/genai/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain-google-genai"
version = "2.0.5"
version = "2.0.6"
description = "An integration package connecting Google's genai package and LangChain"
authors = []
readme = "README.md"
Expand All @@ -15,6 +15,7 @@ python = ">=3.9,<4.0"
langchain-core = ">=0.3.15,<0.4"
google-generativeai = "^0.8.0"
pydantic = ">=2,<3"
filetype = "^1.2.0"

[tool.poetry.group.test]
optional = true
Expand Down
27 changes: 27 additions & 0 deletions libs/genai/tests/integration_tests/test_chat_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,33 @@ def test_chat_google_genai_invoke_multimodal() -> None:
assert len(chunk.content.strip()) > 0


def test_chat_google_genai_invoke_multimodal_by_url() -> None:
messages: list = [
HumanMessage(
content=[
{
"type": "text",
"text": "Guess what's in this picture! You have 3 guesses.",
},
{
"type": "image_url",
"image_url": "https://picsum.photos/seed/picsum/200/300",
},
]
),
]
llm = ChatGoogleGenerativeAI(model=_VISION_MODEL)
response = llm.invoke(messages)
assert isinstance(response.content, str)
assert len(response.content.strip()) > 0

# Try streaming
for chunk in llm.stream(messages):
print(chunk) # noqa: T201
assert isinstance(chunk.content, str)
assert len(chunk.content.strip()) > 0


def test_chat_google_genai_invoke_multimodal_multiple_messages() -> None:
messages: list = [
HumanMessage(content="Hi there"),
Expand Down
175 changes: 85 additions & 90 deletions libs/vertexai/langchain_google_vertexai/_anthropic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
from pydantic import BaseModel

if TYPE_CHECKING:
from anthropic.types import RawMessageStreamEvent # type: ignore
from anthropic.types import (
RawMessageStreamEvent, # type: ignore[unused-ignore, import-not-found]
)

_message_type_lookups = {
"human": "user",
Expand All @@ -57,6 +59,74 @@ def _format_image(image_url: str) -> Dict:
}


def _format_message_anthropic(message: Union[HumanMessage, AIMessage]):
role = _message_type_lookups[message.type]
content: List[Dict[str, Any]] = []

if isinstance(message.content, str):
if not message.content.strip():
return None
content.append({"type": "text", "text": message.content})
elif isinstance(message.content, list):
for block in message.content:
if isinstance(block, str):
# Only add non-empty strings for now as empty ones are not
# accepted.
# https://github.com/anthropics/anthropic-sdk-python/issues/461
if not block.strip():
continue
content.append({"type": "text", "text": block})

if isinstance(block, dict):
if "type" not in block:
raise ValueError("Dict content block must have a type key")

new_block = {}

for copy_attr in ["type", "cache_control"]:
if copy_attr in block:
new_block[copy_attr] = block[copy_attr]

if block["type"] == "text":
text: str = block.get("text", "")
# Only add non-empty strings for now as empty ones are not
# accepted.
# https://github.com/anthropics/anthropic-sdk-python/issues/461
if text.strip():
new_block["text"] = text
content.append(new_block)
continue

if block["type"] == "image_url":
# convert format
new_block["source"] = _format_image(block["image_url"]["url"])
content.append(new_block)
continue

if block["type"] == "tool_use":
# If a tool_call with the same id as a tool_use content block
# exists, the tool_call is preferred.
if isinstance(message, AIMessage) and message.tool_calls:
is_unique = block["id"] not in [
tc["id"] for tc in message.tool_calls
]
if not is_unique:
continue

# all other block types
content.append(block)
else:
raise ValueError("Message should be a str, list of str or list of dicts")

# adding all tool calls
if isinstance(message, AIMessage) and message.tool_calls:
for tc in message.tool_calls:
tu = cast(Dict[str, Any], _lc_tool_call_to_anthropic_tool_use_block(tc))
content.append(tu)

return {"role": role, "content": content}


def _format_messages_anthropic(
messages: List[BaseMessage],
) -> Tuple[Optional[str], List[Dict]]:
Expand All @@ -77,81 +147,11 @@ def _format_messages_anthropic(
system_message = message.content
continue

role = _message_type_lookups[message.type]
content: Union[str, List]

if not isinstance(message.content, str):
# parse as dict
assert isinstance(
message.content, list
), "Anthropic message content must be str or list of dicts"

# populate content
content = []
for item in message.content:
if isinstance(item, str):
content.append(
{
"type": "text",
"text": item,
}
)
elif isinstance(item, dict):
if "type" not in item:
raise ValueError("Dict content item must have a type key")
elif item["type"] == "image_url":
# convert format
source = _format_image(item["image_url"]["url"])
content.append(
{
"type": "image",
"source": source,
}
)
elif item["type"] == "tool_use":
# If a tool_call with the same id as a tool_use content block
# exists, the tool_call is preferred.
if isinstance(message, AIMessage) and item["id"] in [
tc["id"] for tc in message.tool_calls
]:
overlapping = [
tc
for tc in message.tool_calls
if tc["id"] == item["id"]
]
content.extend(
_lc_tool_calls_to_anthropic_tool_use_blocks(overlapping)
)
else:
item.pop("text", None)
content.append(item)
elif item["type"] == "text":
text = item.get("text", "")
# Only add non-empty strings for now as empty ones are not
# accepted.
# https://github.com/anthropics/anthropic-sdk-python/issues/461
if text.strip():
content.append({"type": "text", "text": text})
else:
content.append(item)
else:
raise ValueError(
f"Content items must be str or dict, instead was: {type(item)}"
)
elif isinstance(message, AIMessage) and message.tool_calls:
content = (
[]
if not message.content
else [{"type": "text", "text": message.content}]
)
# Note: Anthropic can't have invalid tool calls as presently defined,
# since the model already returns dicts args not JSON strings, and invalid
# tool calls are those with invalid JSON for args.
content += _lc_tool_calls_to_anthropic_tool_use_blocks(message.tool_calls)
else:
content = message.content
fm = _format_message_anthropic(message)
if not fm:
continue
formatted_messages.append(fm)

formatted_messages.append({"role": role, "content": content})
return system_message, formatted_messages


Expand Down Expand Up @@ -184,7 +184,7 @@ def _merge_messages(
"""Merge runs of human/tool messages into single human messages with content blocks.""" # noqa: E501
merged: list = []
for curr in messages:
curr = curr.copy(deep=True)
curr = curr.model_copy(deep=True)
if isinstance(curr, ToolMessage):
if isinstance(curr.content, list) and all(
isinstance(block, dict) and block.get("type") == "tool_result"
Expand Down Expand Up @@ -224,20 +224,15 @@ class _AnthropicToolUse(TypedDict):
id: str


def _lc_tool_calls_to_anthropic_tool_use_blocks(
tool_calls: List[ToolCall],
) -> List[_AnthropicToolUse]:
blocks = []
for tool_call in tool_calls:
blocks.append(
_AnthropicToolUse(
type="tool_use",
name=tool_call["name"],
input=tool_call["args"],
id=cast(str, tool_call["id"]),
)
)
return blocks
def _lc_tool_call_to_anthropic_tool_use_block(
tool_call: ToolCall,
) -> _AnthropicToolUse:
return _AnthropicToolUse(
type="tool_use",
name=tool_call["name"],
input=tool_call["args"],
id=cast(str, tool_call["id"]),
)


def _make_message_chunk_from_anthropic_event(
Expand Down
7 changes: 6 additions & 1 deletion libs/vertexai/langchain_google_vertexai/_image_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import re
from enum import Enum
from functools import cached_property
from typing import Dict, Optional, Union
from urllib.parse import urlparse

Expand Down Expand Up @@ -43,6 +44,10 @@ def __init__(
"""
self._project = project

@cached_property
def _storage_client(self):
return storage.Client(project=self._project)

def load_bytes(self, image_string: str) -> bytes:
"""Routes to the correct loader based on the image_string.
Expand Down Expand Up @@ -198,7 +203,7 @@ def _blob_from_gcs(self, gcs_uri: str) -> storage.Blob:
storage.Blob
"""

gcs_client = storage.Client(project=self._project)
gcs_client = self._storage_client
blob = storage.Blob.from_string(gcs_uri, gcs_client)
blob.reload(client=gcs_client)
return blob
Expand Down
2 changes: 1 addition & 1 deletion libs/vertexai/langchain_google_vertexai/model_garden.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def __init__(self, **kwargs: Any) -> None:

@model_validator(mode="after")
def validate_environment(self) -> Self:
from anthropic import ( # type: ignore
from anthropic import ( # type: ignore[unused-ignore, import-not-found]
AnthropicVertex,
AsyncAnthropicVertex,
)
Expand Down
Loading

0 comments on commit 1a1d787

Please sign in to comment.