Skip to content

Commit

Permalink
migrating code from community
Browse files Browse the repository at this point in the history
  • Loading branch information
lkuligin committed Mar 26, 2024
1 parent 8a0d616 commit 7782a49
Show file tree
Hide file tree
Showing 20 changed files with 1,333 additions and 12 deletions.
27 changes: 24 additions & 3 deletions libs/community/langchain_google_community/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,32 @@
from libs.community.langchain_google_community.drive import GoogleDriveLoader

from langchain_google_community.bigquery import BigQueryLoader
from langchain_google_community.bigquery_vector_search import BigQueryVectorSearch
from langchain_google_community.docai import DocAIParser, DocAIParsingResults
from langchain_google_community.documentai_warehouse import DocumentAIWarehouseRetriever
from langchain_google_community.gcs_directory import GCSDirectoryLoader
from langchain_google_community.gcs_file import GCSFileLoader
from langchain_google_community.gmail.loader import GMailLoader
from langchain_google_community.gmail.toolkit import GmailToolkit
from langchain_google_community.google_speech_to_text import GoogleSpeechToTextLoader
from langchain_google_community.googledrive import GoogleDriveLoader
from langchain_google_community.google_speech_to_text import SpeechToTextLoader
from langchain_google_community.places_api import (
GooglePlacesAPIWrapper,
GooglePlacesTool,
)
from langchain_google_community.search import (
GoogleSearchAPIWrapper,
GoogleSearchResults,
GoogleSearchRun,
)
from langchain_google_community.texttospeech import TextToSpeechTool
from langchain_google_community.translate import GoogleTranslateTransformer
from langchain_google_community.vertex_ai_search import (
VertexAIMultiTurnSearchRetriever,
VertexAISearchRetriever,
)

__all__ = [
"BigQueryLoader",
"BigQueryVectorSearch",
"DocAIParser",
"DocAIParsingResults",
Expand All @@ -22,7 +36,14 @@
"GMailLoader",
"GmailToolkit",
"GoogleDriveLoader",
"GoogleSpeechToTextLoader",
"GooglePlacesAPIWrapper",
"GooglePlacesTool",
"GoogleSearchAPIWrapper",
"GoogleSearchResults",
"GoogleSearchRun",
"GoogleTranslateTransformer",
"SpeechToTextLoader",
"TextToSpeechTool",
"VertexAIMultiTurnSearchRetriever",
"VertexAISearchRetriever",
]
94 changes: 94 additions & 0 deletions libs/community/langchain_google_community/bigquery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from __future__ import annotations

from typing import TYPE_CHECKING, List, Optional

from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document

from langchain_google_community._utils import get_client_info

if TYPE_CHECKING:
from google.auth.credentials import Credentials


class BigQueryLoader(BaseLoader):
"""Load from the Google Cloud Platform `BigQuery`.
Each document represents one row of the result. The `page_content_columns`
are written into the `page_content` of the document. The `metadata_columns`
are written into the `metadata` of the document. By default, all columns
are written into the `page_content` and none into the `metadata`.
"""

def __init__(
self,
query: str,
project: Optional[str] = None,
page_content_columns: Optional[List[str]] = None,
metadata_columns: Optional[List[str]] = None,
credentials: Optional[Credentials] = None,
):
"""Initialize BigQuery document loader.
Args:
query: The query to run in BigQuery.
project: Optional. The project to run the query in.
page_content_columns: Optional. The columns to write into the `page_content`
of the document.
metadata_columns: Optional. The columns to write into the `metadata` of the
document.
credentials : google.auth.credentials.Credentials, optional
Credentials for accessing Google APIs. Use this parameter to override
default credentials, such as to use Compute Engine
(`google.auth.compute_engine.Credentials`) or Service Account
(`google.oauth2.service_account.Credentials`) credentials directly.
"""
self.query = query
self.project = project
self.page_content_columns = page_content_columns
self.metadata_columns = metadata_columns
self.credentials = credentials

def load(self) -> List[Document]:
try:
from google.cloud import bigquery
except ImportError as ex:
raise ImportError(
"Could not import google-cloud-bigquery python package. "
"Please install it with `pip install google-cloud-bigquery`."
) from ex

bq_client = bigquery.Client(
credentials=self.credentials,
project=self.project,
client_info=get_client_info(module="bigquery"),
)
if not bq_client.project:
error_desc = (
"GCP project for Big Query is not set! Either provide a "
"`project` argument during BigQueryLoader instantiation, "
"or set a default project with `gcloud config set project` "
"command."
)
raise ValueError(error_desc)
query_result = bq_client.query(self.query).result()
docs: List[Document] = []

page_content_columns = self.page_content_columns
metadata_columns = self.metadata_columns

if page_content_columns is None:
page_content_columns = [column.name for column in query_result.schema]
if metadata_columns is None:
metadata_columns = []

for row in query_result:
page_content = "\n".join(
f"{k}: {v}" for k, v in row.items() if k in page_content_columns
)
metadata = {k: v for k, v in row.items() if k in metadata_columns}
doc = Document(page_content=page_content, metadata=metadata)
docs.append(doc)

return docs
37 changes: 37 additions & 0 deletions libs/community/langchain_google_community/gmail/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Base class for Gmail tools."""
from __future__ import annotations

from typing import TYPE_CHECKING

from langchain_core.pydantic_v1 import Field
from langchain_core.tools import BaseTool

from langchain_google_community.gmail.utils import build_resource_service

if TYPE_CHECKING:
# This is for linting and IDE typehints
from googleapiclient.discovery import Resource
else:
try:
# We do this so pydantic can resolve the types when instantiating
from googleapiclient.discovery import Resource
except ImportError:
pass


class GmailBaseTool(BaseTool):
"""Base class for Gmail tools."""

api_resource: Resource = Field(default_factory=build_resource_service)

@classmethod
def from_api_resource(cls, api_resource: Resource) -> "GmailBaseTool":
"""Create a tool from an api resource.
Args:
api_resource: The api resource to use.
Returns:
A tool.
"""
return cls(service=api_resource)
87 changes: 87 additions & 0 deletions libs/community/langchain_google_community/gmail/create_draft.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import base64
from email.message import EmailMessage
from typing import List, Optional, Type

from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.pydantic_v1 import BaseModel, Field

from langchain_google_community.gmail.base import GmailBaseTool


class CreateDraftSchema(BaseModel):
"""Input for CreateDraftTool."""

message: str = Field(
...,
description="The message to include in the draft.",
)
to: List[str] = Field(
...,
description="The list of recipients.",
)
subject: str = Field(
...,
description="The subject of the message.",
)
cc: Optional[List[str]] = Field(
None,
description="The list of CC recipients.",
)
bcc: Optional[List[str]] = Field(
None,
description="The list of BCC recipients.",
)


class GmailCreateDraft(GmailBaseTool):
"""Tool that creates a draft email for Gmail."""

name: str = "create_gmail_draft"
description: str = (
"Use this tool to create a draft email with the provided message fields."
)
args_schema: Type[CreateDraftSchema] = CreateDraftSchema

def _prepare_draft_message(
self,
message: str,
to: List[str],
subject: str,
cc: Optional[List[str]] = None,
bcc: Optional[List[str]] = None,
) -> dict:
draft_message = EmailMessage()
draft_message.set_content(message)

draft_message["To"] = ", ".join(to)
draft_message["Subject"] = subject
if cc is not None:
draft_message["Cc"] = ", ".join(cc)

if bcc is not None:
draft_message["Bcc"] = ", ".join(bcc)

encoded_message = base64.urlsafe_b64encode(draft_message.as_bytes()).decode()
return {"message": {"raw": encoded_message}}

def _run(
self,
message: str,
to: List[str],
subject: str,
cc: Optional[List[str]] = None,
bcc: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
try:
create_message = self._prepare_draft_message(message, to, subject, cc, bcc)
draft = (
self.api_resource.users()
.drafts()
.create(userId="me", body=create_message)
.execute()
)
output = f'Draft created. Draft Id: {draft["id"]}'
return output
except Exception as e:
raise Exception(f"An error occurred: {e}")
70 changes: 70 additions & 0 deletions libs/community/langchain_google_community/gmail/get_message.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import base64
import email
from typing import Dict, Optional, Type

from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.pydantic_v1 import BaseModel, Field

from langchain_google_community.gmail.base import GmailBaseTool
from langchain_google_community.gmail.utils import clean_email_body


class SearchArgsSchema(BaseModel):
"""Input for GetMessageTool."""

message_id: str = Field(
...,
description="The unique ID of the email message, retrieved from a search.",
)


class GmailGetMessage(GmailBaseTool):
"""Tool that gets a message by ID from Gmail."""

name: str = "get_gmail_message"
description: str = (
"Use this tool to fetch an email by message ID."
" Returns the thread ID, snippet, body, subject, and sender."
)
args_schema: Type[SearchArgsSchema] = SearchArgsSchema

def _run(
self,
message_id: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> Dict:
"""Run the tool."""
query = (
self.api_resource.users()
.messages()
.get(userId="me", format="raw", id=message_id)
)
message_data = query.execute()
raw_message = base64.urlsafe_b64decode(message_data["raw"])

email_msg = email.message_from_bytes(raw_message)

subject = email_msg["Subject"]
sender = email_msg["From"]

message_body = ""
if email_msg.is_multipart():
for part in email_msg.walk():
ctype = part.get_content_type()
cdispo = str(part.get("Content-Disposition"))
if ctype == "text/plain" and "attachment" not in cdispo:
message_body = part.get_payload(decode=True).decode("utf-8")
break
else:
message_body = email_msg.get_payload(decode=True).decode("utf-8")

body = clean_email_body(message_body)

return {
"id": message_id,
"threadId": message_data["threadId"],
"snippet": message_data["snippet"],
"body": body,
"subject": subject,
"sender": sender,
}
48 changes: 48 additions & 0 deletions libs/community/langchain_google_community/gmail/get_thread.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from typing import Dict, Optional, Type

from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.pydantic_v1 import BaseModel, Field

from langchain_google_community.gmail.base import GmailBaseTool


class GetThreadSchema(BaseModel):
"""Input for GetMessageTool."""

# From https://support.google.com/mail/answer/7190?hl=en
thread_id: str = Field(
...,
description="The thread ID.",
)


class GmailGetThread(GmailBaseTool):
"""Tool that gets a thread by ID from Gmail."""

name: str = "get_gmail_thread"
description: str = (
"Use this tool to search for email messages."
" The input must be a valid Gmail query."
" The output is a JSON list of messages."
)
args_schema: Type[GetThreadSchema] = GetThreadSchema

def _run(
self,
thread_id: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> Dict:
"""Run the tool."""
query = self.api_resource.users().threads().get(userId="me", id=thread_id)
thread_data = query.execute()
if not isinstance(thread_data, dict):
raise ValueError("The output of the query must be a list.")
messages = thread_data["messages"]
thread_data["messages"] = []
keys_to_keep = ["id", "snippet", "snippet"]
# TODO: Parse body.
for message in messages:
thread_data["messages"].append(
{k: message[k] for k in keys_to_keep if k in message}
)
return thread_data
Loading

0 comments on commit 7782a49

Please sign in to comment.