diff --git a/libs/community/langchain_google_community/__init__.py b/libs/community/langchain_google_community/__init__.py index a769a571b..a46da2f8a 100644 --- a/libs/community/langchain_google_community/__init__.py +++ b/libs/community/langchain_google_community/__init__.py @@ -1,3 +1,6 @@ +from libs.community.langchain_google_community.drive import GoogleDriveLoader + +from langchain_google_community.bigquery import BigQueryLoader from langchain_google_community.bigquery_vector_search import BigQueryVectorSearch from langchain_google_community.docai import DocAIParser, DocAIParsingResults from langchain_google_community.documentai_warehouse import DocumentAIWarehouseRetriever @@ -5,14 +8,25 @@ from langchain_google_community.gcs_file import GCSFileLoader from langchain_google_community.gmail.loader import GMailLoader from langchain_google_community.gmail.toolkit import GmailToolkit -from langchain_google_community.google_speech_to_text import GoogleSpeechToTextLoader -from langchain_google_community.googledrive import GoogleDriveLoader +from langchain_google_community.google_speech_to_text import SpeechToTextLoader +from langchain_google_community.places_api import ( + GooglePlacesAPIWrapper, + GooglePlacesTool, +) +from langchain_google_community.search import ( + GoogleSearchAPIWrapper, + GoogleSearchResults, + GoogleSearchRun, +) +from langchain_google_community.texttospeech import TextToSpeechTool +from langchain_google_community.translate import GoogleTranslateTransformer from langchain_google_community.vertex_ai_search import ( VertexAIMultiTurnSearchRetriever, VertexAISearchRetriever, ) __all__ = [ + "BigQueryLoader", "BigQueryVectorSearch", "DocAIParser", "DocAIParsingResults", @@ -22,7 +36,14 @@ "GMailLoader", "GmailToolkit", "GoogleDriveLoader", - "GoogleSpeechToTextLoader", + "GooglePlacesAPIWrapper", + "GooglePlacesTool", + "GoogleSearchAPIWrapper", + "GoogleSearchResults", + "GoogleSearchRun", + "GoogleTranslateTransformer", + "SpeechToTextLoader", + "TextToSpeechTool", "VertexAIMultiTurnSearchRetriever", "VertexAISearchRetriever", ] diff --git a/libs/community/langchain_google_community/bigquery.py b/libs/community/langchain_google_community/bigquery.py new file mode 100644 index 000000000..3ba4974cd --- /dev/null +++ b/libs/community/langchain_google_community/bigquery.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, List, Optional + +from langchain_core.document_loaders import BaseLoader +from langchain_core.documents import Document + +from langchain_google_community._utils import get_client_info + +if TYPE_CHECKING: + from google.auth.credentials import Credentials + + +class BigQueryLoader(BaseLoader): + """Load from the Google Cloud Platform `BigQuery`. + + Each document represents one row of the result. The `page_content_columns` + are written into the `page_content` of the document. The `metadata_columns` + are written into the `metadata` of the document. By default, all columns + are written into the `page_content` and none into the `metadata`. + + """ + + def __init__( + self, + query: str, + project: Optional[str] = None, + page_content_columns: Optional[List[str]] = None, + metadata_columns: Optional[List[str]] = None, + credentials: Optional[Credentials] = None, + ): + """Initialize BigQuery document loader. + + Args: + query: The query to run in BigQuery. + project: Optional. The project to run the query in. + page_content_columns: Optional. The columns to write into the `page_content` + of the document. + metadata_columns: Optional. The columns to write into the `metadata` of the + document. + credentials : google.auth.credentials.Credentials, optional + Credentials for accessing Google APIs. Use this parameter to override + default credentials, such as to use Compute Engine + (`google.auth.compute_engine.Credentials`) or Service Account + (`google.oauth2.service_account.Credentials`) credentials directly. + """ + self.query = query + self.project = project + self.page_content_columns = page_content_columns + self.metadata_columns = metadata_columns + self.credentials = credentials + + def load(self) -> List[Document]: + try: + from google.cloud import bigquery + except ImportError as ex: + raise ImportError( + "Could not import google-cloud-bigquery python package. " + "Please install it with `pip install google-cloud-bigquery`." + ) from ex + + bq_client = bigquery.Client( + credentials=self.credentials, + project=self.project, + client_info=get_client_info(module="bigquery"), + ) + if not bq_client.project: + error_desc = ( + "GCP project for Big Query is not set! Either provide a " + "`project` argument during BigQueryLoader instantiation, " + "or set a default project with `gcloud config set project` " + "command." + ) + raise ValueError(error_desc) + query_result = bq_client.query(self.query).result() + docs: List[Document] = [] + + page_content_columns = self.page_content_columns + metadata_columns = self.metadata_columns + + if page_content_columns is None: + page_content_columns = [column.name for column in query_result.schema] + if metadata_columns is None: + metadata_columns = [] + + for row in query_result: + page_content = "\n".join( + f"{k}: {v}" for k, v in row.items() if k in page_content_columns + ) + metadata = {k: v for k, v in row.items() if k in metadata_columns} + doc = Document(page_content=page_content, metadata=metadata) + docs.append(doc) + + return docs diff --git a/libs/community/langchain_google_community/googledrive.py b/libs/community/langchain_google_community/drive.py similarity index 100% rename from libs/community/langchain_google_community/googledrive.py rename to libs/community/langchain_google_community/drive.py diff --git a/libs/community/langchain_google_community/gmail/base.py b/libs/community/langchain_google_community/gmail/base.py new file mode 100644 index 000000000..8b2bb981f --- /dev/null +++ b/libs/community/langchain_google_community/gmail/base.py @@ -0,0 +1,37 @@ +"""Base class for Gmail tools.""" +from __future__ import annotations + +from typing import TYPE_CHECKING + +from langchain_core.pydantic_v1 import Field +from langchain_core.tools import BaseTool + +from langchain_google_community.gmail.utils import build_resource_service + +if TYPE_CHECKING: + # This is for linting and IDE typehints + from googleapiclient.discovery import Resource +else: + try: + # We do this so pydantic can resolve the types when instantiating + from googleapiclient.discovery import Resource + except ImportError: + pass + + +class GmailBaseTool(BaseTool): + """Base class for Gmail tools.""" + + api_resource: Resource = Field(default_factory=build_resource_service) + + @classmethod + def from_api_resource(cls, api_resource: Resource) -> "GmailBaseTool": + """Create a tool from an api resource. + + Args: + api_resource: The api resource to use. + + Returns: + A tool. + """ + return cls(service=api_resource) diff --git a/libs/community/langchain_google_community/gmail/create_draft.py b/libs/community/langchain_google_community/gmail/create_draft.py new file mode 100644 index 000000000..b48669658 --- /dev/null +++ b/libs/community/langchain_google_community/gmail/create_draft.py @@ -0,0 +1,87 @@ +import base64 +from email.message import EmailMessage +from typing import List, Optional, Type + +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.pydantic_v1 import BaseModel, Field + +from langchain_google_community.gmail.base import GmailBaseTool + + +class CreateDraftSchema(BaseModel): + """Input for CreateDraftTool.""" + + message: str = Field( + ..., + description="The message to include in the draft.", + ) + to: List[str] = Field( + ..., + description="The list of recipients.", + ) + subject: str = Field( + ..., + description="The subject of the message.", + ) + cc: Optional[List[str]] = Field( + None, + description="The list of CC recipients.", + ) + bcc: Optional[List[str]] = Field( + None, + description="The list of BCC recipients.", + ) + + +class GmailCreateDraft(GmailBaseTool): + """Tool that creates a draft email for Gmail.""" + + name: str = "create_gmail_draft" + description: str = ( + "Use this tool to create a draft email with the provided message fields." + ) + args_schema: Type[CreateDraftSchema] = CreateDraftSchema + + def _prepare_draft_message( + self, + message: str, + to: List[str], + subject: str, + cc: Optional[List[str]] = None, + bcc: Optional[List[str]] = None, + ) -> dict: + draft_message = EmailMessage() + draft_message.set_content(message) + + draft_message["To"] = ", ".join(to) + draft_message["Subject"] = subject + if cc is not None: + draft_message["Cc"] = ", ".join(cc) + + if bcc is not None: + draft_message["Bcc"] = ", ".join(bcc) + + encoded_message = base64.urlsafe_b64encode(draft_message.as_bytes()).decode() + return {"message": {"raw": encoded_message}} + + def _run( + self, + message: str, + to: List[str], + subject: str, + cc: Optional[List[str]] = None, + bcc: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + try: + create_message = self._prepare_draft_message(message, to, subject, cc, bcc) + draft = ( + self.api_resource.users() + .drafts() + .create(userId="me", body=create_message) + .execute() + ) + output = f'Draft created. Draft Id: {draft["id"]}' + return output + except Exception as e: + raise Exception(f"An error occurred: {e}") diff --git a/libs/community/langchain_google_community/gmail/get_message.py b/libs/community/langchain_google_community/gmail/get_message.py new file mode 100644 index 000000000..3bd62d397 --- /dev/null +++ b/libs/community/langchain_google_community/gmail/get_message.py @@ -0,0 +1,70 @@ +import base64 +import email +from typing import Dict, Optional, Type + +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.pydantic_v1 import BaseModel, Field + +from langchain_google_community.gmail.base import GmailBaseTool +from langchain_google_community.gmail.utils import clean_email_body + + +class SearchArgsSchema(BaseModel): + """Input for GetMessageTool.""" + + message_id: str = Field( + ..., + description="The unique ID of the email message, retrieved from a search.", + ) + + +class GmailGetMessage(GmailBaseTool): + """Tool that gets a message by ID from Gmail.""" + + name: str = "get_gmail_message" + description: str = ( + "Use this tool to fetch an email by message ID." + " Returns the thread ID, snippet, body, subject, and sender." + ) + args_schema: Type[SearchArgsSchema] = SearchArgsSchema + + def _run( + self, + message_id: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict: + """Run the tool.""" + query = ( + self.api_resource.users() + .messages() + .get(userId="me", format="raw", id=message_id) + ) + message_data = query.execute() + raw_message = base64.urlsafe_b64decode(message_data["raw"]) + + email_msg = email.message_from_bytes(raw_message) + + subject = email_msg["Subject"] + sender = email_msg["From"] + + message_body = "" + if email_msg.is_multipart(): + for part in email_msg.walk(): + ctype = part.get_content_type() + cdispo = str(part.get("Content-Disposition")) + if ctype == "text/plain" and "attachment" not in cdispo: + message_body = part.get_payload(decode=True).decode("utf-8") + break + else: + message_body = email_msg.get_payload(decode=True).decode("utf-8") + + body = clean_email_body(message_body) + + return { + "id": message_id, + "threadId": message_data["threadId"], + "snippet": message_data["snippet"], + "body": body, + "subject": subject, + "sender": sender, + } diff --git a/libs/community/langchain_google_community/gmail/get_thread.py b/libs/community/langchain_google_community/gmail/get_thread.py new file mode 100644 index 000000000..ff18621f3 --- /dev/null +++ b/libs/community/langchain_google_community/gmail/get_thread.py @@ -0,0 +1,48 @@ +from typing import Dict, Optional, Type + +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.pydantic_v1 import BaseModel, Field + +from langchain_google_community.gmail.base import GmailBaseTool + + +class GetThreadSchema(BaseModel): + """Input for GetMessageTool.""" + + # From https://support.google.com/mail/answer/7190?hl=en + thread_id: str = Field( + ..., + description="The thread ID.", + ) + + +class GmailGetThread(GmailBaseTool): + """Tool that gets a thread by ID from Gmail.""" + + name: str = "get_gmail_thread" + description: str = ( + "Use this tool to search for email messages." + " The input must be a valid Gmail query." + " The output is a JSON list of messages." + ) + args_schema: Type[GetThreadSchema] = GetThreadSchema + + def _run( + self, + thread_id: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict: + """Run the tool.""" + query = self.api_resource.users().threads().get(userId="me", id=thread_id) + thread_data = query.execute() + if not isinstance(thread_data, dict): + raise ValueError("The output of the query must be a list.") + messages = thread_data["messages"] + thread_data["messages"] = [] + keys_to_keep = ["id", "snippet", "snippet"] + # TODO: Parse body. + for message in messages: + thread_data["messages"].append( + {k: message[k] for k in keys_to_keep if k in message} + ) + return thread_data diff --git a/libs/community/langchain_google_community/gmail/search.py b/libs/community/langchain_google_community/gmail/search.py new file mode 100644 index 000000000..e4ececdb8 --- /dev/null +++ b/libs/community/langchain_google_community/gmail/search.py @@ -0,0 +1,145 @@ +import base64 +import email +from enum import Enum +from typing import Any, Dict, List, Optional, Type + +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.pydantic_v1 import BaseModel, Field + +from langchain_google_community.gmail.base import GmailBaseTool +from langchain_google_community.gmail.utils import clean_email_body + + +class Resource(str, Enum): + """Enumerator of Resources to search.""" + + THREADS = "threads" + MESSAGES = "messages" + + +class SearchArgsSchema(BaseModel): + """Input for SearchGmailTool.""" + + # From https://support.google.com/mail/answer/7190?hl=en + query: str = Field( + ..., + description="The Gmail query. Example filters include from:sender," + " to:recipient, subject:subject, -filtered_term," + " in:folder, is:important|read|starred, after:year/mo/date, " + "before:year/mo/date, label:label_name" + ' "exact phrase".' + " Search newer/older than using d (day), m (month), and y (year): " + "newer_than:2d, older_than:1y." + " Attachments with extension example: filename:pdf. Multiple term" + " matching example: from:amy OR from:david.", + ) + resource: Resource = Field( + default=Resource.MESSAGES, + description="Whether to search for threads or messages.", + ) + max_results: int = Field( + default=10, + description="The maximum number of results to return.", + ) + + +class GmailSearch(GmailBaseTool): + """Tool that searches for messages or threads in Gmail.""" + + name: str = "search_gmail" + description: str = ( + "Use this tool to search for email messages or threads." + " The input must be a valid Gmail query." + " The output is a JSON list of the requested resource." + ) + args_schema: Type[SearchArgsSchema] = SearchArgsSchema + + def _parse_threads(self, threads: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + # Add the thread message snippets to the thread results + results = [] + for thread in threads: + thread_id = thread["id"] + thread_data = ( + self.api_resource.users() + .threads() + .get(userId="me", id=thread_id) + .execute() + ) + messages = thread_data["messages"] + thread["messages"] = [] + for message in messages: + snippet = message["snippet"] + thread["messages"].append({"snippet": snippet, "id": message["id"]}) + results.append(thread) + + return results + + def _parse_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + results = [] + for message in messages: + message_id = message["id"] + message_data = ( + self.api_resource.users() + .messages() + .get(userId="me", format="raw", id=message_id) + .execute() + ) + + raw_message = base64.urlsafe_b64decode(message_data["raw"]) + + email_msg = email.message_from_bytes(raw_message) + + subject = email_msg["Subject"] + sender = email_msg["From"] + + message_body = "" + if email_msg.is_multipart(): + for part in email_msg.walk(): + ctype = part.get_content_type() + cdispo = str(part.get("Content-Disposition")) + if ctype == "text/plain" and "attachment" not in cdispo: + try: + message_body = part.get_payload(decode=True).decode("utf-8") + except UnicodeDecodeError: + message_body = part.get_payload(decode=True).decode( + "latin-1" + ) + break + else: + message_body = email_msg.get_payload(decode=True).decode("utf-8") + + body = clean_email_body(message_body) + + results.append( + { + "id": message["id"], + "threadId": message_data["threadId"], + "snippet": message_data["snippet"], + "body": body, + "subject": subject, + "sender": sender, + } + ) + return results + + def _run( + self, + query: str, + resource: Resource = Resource.MESSAGES, + max_results: int = 10, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> List[Dict[str, Any]]: + """Run the tool.""" + results = ( + self.api_resource.users() + .messages() + .list(userId="me", q=query, maxResults=max_results) + .execute() + .get(resource.value, []) + ) + if resource == Resource.THREADS: + return self._parse_threads(results) + elif resource == Resource.MESSAGES: + return self._parse_messages(results) + else: + raise NotImplementedError(f"Resource of type {resource} not implemented.") diff --git a/libs/community/langchain_google_community/gmail/send_message.py b/libs/community/langchain_google_community/gmail/send_message.py new file mode 100644 index 000000000..112db9cb3 --- /dev/null +++ b/libs/community/langchain_google_community/gmail/send_message.py @@ -0,0 +1,90 @@ +"""Send Gmail messages.""" +import base64 +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from typing import Any, Dict, List, Optional, Type, Union + +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.pydantic_v1 import BaseModel, Field + +from langchain_google_community.gmail.base import GmailBaseTool + + +class SendMessageSchema(BaseModel): + """Input for SendMessageTool.""" + + message: str = Field( + ..., + description="The message to send.", + ) + to: Union[str, List[str]] = Field( + ..., + description="The list of recipients.", + ) + subject: str = Field( + ..., + description="The subject of the message.", + ) + cc: Optional[Union[str, List[str]]] = Field( + None, + description="The list of CC recipients.", + ) + bcc: Optional[Union[str, List[str]]] = Field( + None, + description="The list of BCC recipients.", + ) + + +class GmailSendMessage(GmailBaseTool): + """Tool that sends a message to Gmail.""" + + name: str = "send_gmail_message" + description: str = ( + "Use this tool to send email messages." " The input is the message, recipients" + ) + args_schema: Type[SendMessageSchema] = SendMessageSchema + + def _prepare_message( + self, + message: str, + to: Union[str, List[str]], + subject: str, + cc: Optional[Union[str, List[str]]] = None, + bcc: Optional[Union[str, List[str]]] = None, + ) -> Dict[str, Any]: + """Create a message for an email.""" + mime_message = MIMEMultipart() + mime_message.attach(MIMEText(message, "html")) + + mime_message["To"] = ", ".join(to if isinstance(to, list) else [to]) + mime_message["Subject"] = subject + if cc is not None: + mime_message["Cc"] = ", ".join(cc if isinstance(cc, list) else [cc]) + + if bcc is not None: + mime_message["Bcc"] = ", ".join(bcc if isinstance(bcc, list) else [bcc]) + + encoded_message = base64.urlsafe_b64encode(mime_message.as_bytes()).decode() + return {"raw": encoded_message} + + def _run( + self, + message: str, + to: Union[str, List[str]], + subject: str, + cc: Optional[Union[str, List[str]]] = None, + bcc: Optional[Union[str, List[str]]] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Run the tool.""" + try: + create_message = self._prepare_message(message, to, subject, cc=cc, bcc=bcc) + send_message = ( + self.api_resource.users() + .messages() + .send(userId="me", body=create_message) + ) + sent_message = send_message.execute() + return f'Message sent. Message Id: {sent_message["id"]}' + except Exception as error: + raise Exception(f"An error occurred: {error}") diff --git a/libs/community/langchain_google_community/gmail/toolkit.py b/libs/community/langchain_google_community/gmail/toolkit.py index 6177fcf7d..7c2cb64f4 100644 --- a/libs/community/langchain_google_community/gmail/toolkit.py +++ b/libs/community/langchain_google_community/gmail/toolkit.py @@ -3,14 +3,15 @@ from typing import TYPE_CHECKING, List from langchain_community.agent_toolkits.base import BaseToolkit -from langchain_community.tools import BaseTool -from langchain_community.tools.gmail.create_draft import GmailCreateDraft -from langchain_community.tools.gmail.get_message import GmailGetMessage -from langchain_community.tools.gmail.get_thread import GmailGetThread -from langchain_community.tools.gmail.search import GmailSearch -from langchain_community.tools.gmail.send_message import GmailSendMessage -from langchain_community.tools.gmail.utils import build_resource_service from langchain_core.pydantic_v1 import Field +from langchain_core.tools import BaseTool + +from langchain_google_community.gmail.create_draft import GmailCreateDraft +from langchain_google_community.gmail.get_message import GmailGetMessage +from langchain_google_community.gmail.get_thread import GmailGetThread +from langchain_google_community.gmail.search import GmailSearch +from langchain_google_community.gmail.send_message import GmailSendMessage +from langchain_google_community.gmail.utils import build_resource_service if TYPE_CHECKING: # This is for linting and IDE typehints diff --git a/libs/community/langchain_google_community/gmail/utils.py b/libs/community/langchain_google_community/gmail/utils.py new file mode 100644 index 000000000..ee562174c --- /dev/null +++ b/libs/community/langchain_google_community/gmail/utils.py @@ -0,0 +1,132 @@ +"""Gmail tool utils.""" +from __future__ import annotations + +import logging +import os +from typing import TYPE_CHECKING, List, Optional, Tuple + +if TYPE_CHECKING: + from google.auth.transport.requests import Request + from google.oauth2.credentials import Credentials + from google_auth_oauthlib.flow import InstalledAppFlow + from googleapiclient.discovery import Resource + from googleapiclient.discovery import build as build_resource + +logger = logging.getLogger(__name__) + + +def import_google() -> Tuple[Request, Credentials]: + """Import google libraries. + + Returns: + Tuple[Request, Credentials]: Request and Credentials classes. + """ + # google-auth-httplib2 + try: + from google.auth.transport.requests import Request # noqa: F401 + from google.oauth2.credentials import Credentials # noqa: F401 + except ImportError: + raise ImportError( + "You need to install google-auth-httplib2 to use this toolkit. " + "Try running pip install --upgrade google-auth-httplib2" + ) + return Request, Credentials + + +def import_installed_app_flow() -> InstalledAppFlow: + """Import InstalledAppFlow class. + + Returns: + InstalledAppFlow: InstalledAppFlow class. + """ + try: + from google_auth_oauthlib.flow import InstalledAppFlow + except ImportError: + raise ImportError( + "You need to install google-auth-oauthlib to use this toolkit. " + "Try running pip install --upgrade google-auth-oauthlib" + ) + return InstalledAppFlow + + +def import_googleapiclient_resource_builder() -> build_resource: + """Import googleapiclient.discovery.build function. + + Returns: + build_resource: googleapiclient.discovery.build function. + """ + try: + from googleapiclient.discovery import build + except ImportError: + raise ImportError( + "You need to install googleapiclient to use this toolkit. " + "Try running pip install --upgrade google-api-python-client" + ) + return build + + +DEFAULT_SCOPES = ["https://mail.google.com/"] +DEFAULT_CREDS_TOKEN_FILE = "token.json" +DEFAULT_CLIENT_SECRETS_FILE = "credentials.json" + + +def get_gmail_credentials( + token_file: Optional[str] = None, + client_secrets_file: Optional[str] = None, + scopes: Optional[List[str]] = None, +) -> Credentials: + """Get credentials.""" + # From https://developers.google.com/gmail/api/quickstart/python + Request, Credentials = import_google() + InstalledAppFlow = import_installed_app_flow() + creds = None + scopes = scopes or DEFAULT_SCOPES + token_file = token_file or DEFAULT_CREDS_TOKEN_FILE + client_secrets_file = client_secrets_file or DEFAULT_CLIENT_SECRETS_FILE + # The file token.json stores the user's access and refresh tokens, and is + # created automatically when the authorization flow completes for the first + # time. + if os.path.exists(token_file): + creds = Credentials.from_authorized_user_file(token_file, scopes) + # If there are no (valid) credentials available, let the user log in. + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + # https://developers.google.com/gmail/api/quickstart/python#authorize_credentials_for_a_desktop_application # noqa + flow = InstalledAppFlow.from_client_secrets_file( + client_secrets_file, scopes + ) + creds = flow.run_local_server(port=0) + # Save the credentials for the next run + with open(token_file, "w") as token: + token.write(creds.to_json()) + return creds + + +def build_resource_service( + credentials: Optional[Credentials] = None, + service_name: str = "gmail", + service_version: str = "v1", +) -> Resource: + """Build a Gmail service.""" + credentials = credentials or get_gmail_credentials() + builder = import_googleapiclient_resource_builder() + return builder(service_name, service_version, credentials=credentials) + + +def clean_email_body(body: str) -> str: + """Clean email body.""" + try: + from bs4 import BeautifulSoup + + try: + soup = BeautifulSoup(str(body), "html.parser") + body = soup.get_text() + return str(body) + except Exception as e: + logger.error(e) + return str(body) + except ImportError: + logger.warning("BeautifulSoup not installed. Skipping cleaning.") + return str(body) diff --git a/libs/community/langchain_google_community/google_speech_to_text.py b/libs/community/langchain_google_community/google_speech_to_text.py index bcfb817ed..08b30a930 100644 --- a/libs/community/langchain_google_community/google_speech_to_text.py +++ b/libs/community/langchain_google_community/google_speech_to_text.py @@ -12,7 +12,7 @@ from google.protobuf.field_mask_pb2 import FieldMask -class GoogleSpeechToTextLoader(BaseLoader): +class SpeechToTextLoader(BaseLoader): """ Loader for Google Cloud Speech-to-Text audio transcripts. diff --git a/libs/community/langchain_google_community/places_api.py b/libs/community/langchain_google_community/places_api.py new file mode 100644 index 000000000..eb32632e5 --- /dev/null +++ b/libs/community/langchain_google_community/places_api.py @@ -0,0 +1,144 @@ +"""Chain that calls Google Places API. +""" + +import logging +from typing import Any, Dict, Optional, Type + +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.pydantic_v1 import BaseModel, Extra, Field, root_validator +from langchain_core.tools import BaseTool +from langchain_core.utils import get_from_dict_or_env + + +class GooglePlacesAPIWrapper(BaseModel): + """Wrapper around Google Places API. + + To use, you should have the ``googlemaps`` python package installed, + **an API key for the google maps platform**, + and the environment variable ''GPLACES_API_KEY'' + set with your API key , or pass 'gplaces_api_key' + as a named parameter to the constructor. + + By default, this will return the all the results on the input query. + You can use the top_k_results argument to limit the number of results. + + Example: + .. code-block:: python + + + from langchain_community.utilities import GooglePlacesAPIWrapper + gplaceapi = GooglePlacesAPIWrapper() + """ + + gplaces_api_key: Optional[str] = None + google_map_client: Any #: :meta private: + top_k_results: Optional[int] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key is in your environment variable.""" + gplaces_api_key = get_from_dict_or_env( + values, "gplaces_api_key", "GPLACES_API_KEY" + ) + values["gplaces_api_key"] = gplaces_api_key + try: + import googlemaps + + values["google_map_client"] = googlemaps.Client(gplaces_api_key) + except ImportError: + raise ImportError( + "Could not import googlemaps python package. " + "Please install it with `pip install googlemaps`." + ) + return values + + def run(self, query: str) -> str: + """Run Places search and get k number of places that exists that match.""" + search_results = self.google_map_client.places(query)["results"] + num_to_return = len(search_results) + + places = [] + + if num_to_return == 0: + return "Google Places did not find any places that match the description" + + num_to_return = ( + num_to_return + if self.top_k_results is None + else min(num_to_return, self.top_k_results) + ) + + for i in range(num_to_return): + result = search_results[i] + details = self.fetch_place_details(result["place_id"]) + + if details is not None: + places.append(details) + + return "\n".join([f"{i+1}. {item}" for i, item in enumerate(places)]) + + def fetch_place_details(self, place_id: str) -> Optional[str]: + try: + place_details = self.google_map_client.place(place_id) + place_details["place_id"] = place_id + formatted_details = self.format_place_details(place_details) + return formatted_details + except Exception as e: + logging.error(f"An Error occurred while fetching place details: {e}") + return None + + def format_place_details(self, place_details: Dict[str, Any]) -> Optional[str]: + try: + name = place_details.get("result", {}).get("name", "Unknown") + address = place_details.get("result", {}).get( + "formatted_address", "Unknown" + ) + phone_number = place_details.get("result", {}).get( + "formatted_phone_number", "Unknown" + ) + website = place_details.get("result", {}).get("website", "Unknown") + place_id = place_details.get("result", {}).get("place_id", "Unknown") + + formatted_details = ( + f"{name}\nAddress: {address}\n" + f"Google place ID: {place_id}\n" + f"Phone: {phone_number}\nWebsite: {website}\n\n" + ) + return formatted_details + except Exception as e: + logging.error(f"An error occurred while formatting place details: {e}") + return None + + +class GooglePlacesSchema(BaseModel): + """Input for GooglePlacesTool.""" + + query: str = Field(..., description="Query for google maps") + + +class GooglePlacesTool(BaseTool): + """Tool that queries the Google places API.""" + + name: str = "google_places" + description: str = ( + "A wrapper around Google Places. " + "Useful for when you need to validate or " + "discover addressed from ambiguous text. " + "Input should be a search query." + ) + api_wrapper: GooglePlacesAPIWrapper = Field(default_factory=GooglePlacesAPIWrapper) + args_schema: Type[BaseModel] = GooglePlacesSchema + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + return self.api_wrapper.run(query) diff --git a/libs/community/langchain_google_community/search.py b/libs/community/langchain_google_community/search.py new file mode 100644 index 000000000..1d8f32623 --- /dev/null +++ b/libs/community/langchain_google_community/search.py @@ -0,0 +1,180 @@ +"""Util that calls Google Search.""" +from typing import Any, Dict, List, Optional + +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator +from langchain_core.tools import BaseTool +from langchain_core.utils import get_from_dict_or_env + + +class GoogleSearchAPIWrapper(BaseModel): + """Wrapper for Google Search API. + + Adapted from: Instructions adapted from https://stackoverflow.com/questions/ + 37083058/ + programmatically-searching-google-in-python-using-custom-search + + TODO: DOCS for using it + 1. Install google-api-python-client + - If you don't already have a Google account, sign up. + - If you have never created a Google APIs Console project, + read the Managing Projects page and create a project in the Google API Console. + - Install the library using pip install google-api-python-client + + 2. Enable the Custom Search API + - Navigate to the APIs & Services→Dashboard panel in Cloud Console. + - Click Enable APIs and Services. + - Search for Custom Search API and click on it. + - Click Enable. + URL for it: https://console.cloud.google.com/apis/library/customsearch.googleapis + .com + + 3. To create an API key: + - Navigate to the APIs & Services → Credentials panel in Cloud Console. + - Select Create credentials, then select API key from the drop-down menu. + - The API key created dialog box displays your newly created key. + - You now have an API_KEY + + Alternatively, you can just generate an API key here: + https://developers.google.com/custom-search/docs/paid_element#api_key + + 4. Setup Custom Search Engine so you can search the entire web + - Create a custom search engine here: https://programmablesearchengine.google.com/. + - In `What to search` to search, pick the `Search the entire Web` option. + After search engine is created, you can click on it and find `Search engine ID` + on the Overview page. + + """ + + search_engine: Any #: :meta private: + google_api_key: Optional[str] = None + google_cse_id: Optional[str] = None + k: int = 10 + siterestrict: bool = False + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def _google_search_results(self, search_term: str, **kwargs: Any) -> List[dict]: + cse = self.search_engine.cse() + if self.siterestrict: + cse = cse.siterestrict() + res = cse.list(q=search_term, cx=self.google_cse_id, **kwargs).execute() + return res.get("items", []) + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + google_api_key = get_from_dict_or_env( + values, "google_api_key", "GOOGLE_API_KEY" + ) + values["google_api_key"] = google_api_key + + google_cse_id = get_from_dict_or_env(values, "google_cse_id", "GOOGLE_CSE_ID") + values["google_cse_id"] = google_cse_id + + try: + from googleapiclient.discovery import build + + except ImportError: + raise ImportError( + "google-api-python-client is not installed. " + "Please install it with `pip install google-api-python-client" + ">=2.100.0`" + ) + + service = build("customsearch", "v1", developerKey=google_api_key) + values["search_engine"] = service + + return values + + def run(self, query: str) -> str: + """Run query through GoogleSearch and parse result.""" + snippets = [] + results = self._google_search_results(query, num=self.k) + if len(results) == 0: + return "No good Google Search Result was found" + for result in results: + if "snippet" in result: + snippets.append(result["snippet"]) + + return " ".join(snippets) + + def results( + self, + query: str, + num_results: int, + search_params: Optional[Dict[str, str]] = None, + ) -> List[Dict]: + """Run query through GoogleSearch and return metadata. + + Args: + query: The query to search for. + num_results: The number of results to return. + search_params: Parameters to be passed on search + + Returns: + A list of dictionaries with the following keys: + snippet - The description of the result. + title - The title of the result. + link - The link to the result. + """ + metadata_results = [] + results = self._google_search_results( + query, num=num_results, **(search_params or {}) + ) + if len(results) == 0: + return [{"Result": "No good Google Search Result was found"}] + for result in results: + metadata_result = { + "title": result["title"], + "link": result["link"], + } + if "snippet" in result: + metadata_result["snippet"] = result["snippet"] + metadata_results.append(metadata_result) + + return metadata_results + + +class GoogleSearchRun(BaseTool): + """Tool that queries the Google search API.""" + + name: str = "google_search" + description: str = ( + "A wrapper around Google Search. " + "Useful for when you need to answer questions about current events. " + "Input should be a search query." + ) + api_wrapper: GoogleSearchAPIWrapper + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + return self.api_wrapper.run(query) + + +class GoogleSearchResults(BaseTool): + """Tool that queries the Google Search API and gets back json.""" + + name: str = "google_search_results_json" + description: str = ( + "A wrapper around Google Search. " + "Useful for when you need to answer questions about current events. " + "Input should be a search query. Output is a JSON array of the query results" + ) + num_results: int = 4 + api_wrapper: GoogleSearchAPIWrapper + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + return str(self.api_wrapper.results(query, self.num_results)) diff --git a/libs/community/langchain_google_community/texttospeech.py b/libs/community/langchain_google_community/texttospeech.py new file mode 100644 index 000000000..8d6afcd4e --- /dev/null +++ b/libs/community/langchain_google_community/texttospeech.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +import tempfile +from typing import TYPE_CHECKING, Any, Optional + +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.tools import BaseTool + +from langchain_google_community._utils import get_client_info + +if TYPE_CHECKING: + from google.cloud import texttospeech + + +def _import_google_cloud_texttospeech() -> Any: + try: + from google.cloud import texttospeech + except ImportError as e: + raise ImportError( + "Cannot import google.cloud.texttospeech, please install " + "`pip install google-cloud-texttospeech`." + ) from e + return texttospeech + + +def _encoding_file_extension_map(encoding: texttospeech.AudioEncoding) -> Optional[str]: + texttospeech = _import_google_cloud_texttospeech() + + ENCODING_FILE_EXTENSION_MAP = { + texttospeech.AudioEncoding.LINEAR16: ".wav", + texttospeech.AudioEncoding.MP3: ".mp3", + texttospeech.AudioEncoding.OGG_OPUS: ".ogg", + texttospeech.AudioEncoding.MULAW: ".wav", + texttospeech.AudioEncoding.ALAW: ".wav", + } + return ENCODING_FILE_EXTENSION_MAP.get(encoding) + + +class TextToSpeechTool(BaseTool): + """Tool that queries the Google Cloud Text to Speech API. + + In order to set this up, follow instructions at: + https://cloud.google.com/text-to-speech/docs/before-you-begin + """ + + name: str = "google_cloud_texttospeech" + description: str = ( + "A wrapper around Google Cloud Text-to-Speech. " + "Useful for when you need to synthesize audio from text. " + "It supports multiple languages, including English, German, Polish, " + "Spanish, Italian, French, Portuguese, and Hindi. " + ) + + _client: Any + + def __init__(self, **kwargs: Any) -> None: + """Initializes private fields.""" + texttospeech = _import_google_cloud_texttospeech() + + super().__init__(**kwargs) + + self._client = texttospeech.TextToSpeechClient( + client_info=get_client_info(module="text-to-speech") + ) + + def _run( + self, + input_text: str, + language_code: str = "en-US", + ssml_gender: Optional[texttospeech.SsmlVoiceGender] = None, + audio_encoding: Optional[texttospeech.AudioEncoding] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + texttospeech = _import_google_cloud_texttospeech() + ssml_gender = ssml_gender or texttospeech.SsmlVoiceGender.NEUTRAL + audio_encoding = audio_encoding or texttospeech.AudioEncoding.MP3 + + response = self._client.synthesize_speech( + input=texttospeech.SynthesisInput(text=input_text), + voice=texttospeech.VoiceSelectionParams( + language_code=language_code, ssml_gender=ssml_gender + ), + audio_config=texttospeech.AudioConfig(audio_encoding=audio_encoding), + ) + + suffix = _encoding_file_extension_map(audio_encoding) + + with tempfile.NamedTemporaryFile(mode="bx", suffix=suffix, delete=False) as f: + f.write(response.audio_content) + return f.name diff --git a/libs/community/langchain_google_community/translate.py b/libs/community/langchain_google_community/translate.py new file mode 100644 index 000000000..1d4f6be0f --- /dev/null +++ b/libs/community/langchain_google_community/translate.py @@ -0,0 +1,107 @@ +from typing import Any, Optional, Sequence + +from langchain_core.documents import BaseDocumentTransformer, Document + +from langchain_google_community._utils import get_client_info + + +class GoogleTranslateTransformer(BaseDocumentTransformer): + """Translate text documents using Google Cloud Translation.""" + + def __init__( + self, + project_id: str, + *, + location: str = "global", + model_id: Optional[str] = None, + glossary_id: Optional[str] = None, + api_endpoint: Optional[str] = None, + ) -> None: + """ + Arguments: + project_id: Google Cloud Project ID. + location: (Optional) Translate model location. + model_id: (Optional) Translate model ID to use. + glossary_id: (Optional) Translate glossary ID to use. + api_endpoint: (Optional) Regional endpoint to use. + """ + try: + from google.api_core.client_options import ClientOptions + from google.cloud import translate + except ImportError as exc: + raise ImportError( + "Install Google Cloud Translate to use this parser." + "(pip install google-cloud-translate)" + ) from exc + + self.project_id = project_id + self.location = location + self.model_id = model_id + self.glossary_id = glossary_id + + self._client = translate.TranslationServiceClient( + client_info=get_client_info("translate"), + client_options=( + ClientOptions(api_endpoint=api_endpoint) if api_endpoint else None + ), + ) + self._parent_path = self._client.common_location_path(project_id, location) + # For some reason, there's no `model_path()` method for the client. + self._model_path = ( + f"{self._parent_path}/models/{model_id}" if model_id else None + ) + self._glossary_path = ( + self._client.glossary_path(project_id, location, glossary_id) + if glossary_id + else None + ) + + def transform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + """Translate text documents using Google Translate. + + Arguments: + source_language_code: ISO 639 language code of the input document. + target_language_code: ISO 639 language code of the output document. + For supported languages, refer to: + https://cloud.google.com/translate/docs/languages + mime_type: (Optional) Media Type of input text. + Options: `text/plain`, `text/html` + """ + try: + from google.cloud import translate + except ImportError as exc: + raise ImportError( + "Install Google Cloud Translate to use this parser." + "(pip install google-cloud-translate)" + ) from exc + + response = self._client.translate_text( + request=translate.TranslateTextRequest( + contents=[doc.page_content for doc in documents], + parent=self._parent_path, + model=self._model_path, + glossary_config=translate.TranslateTextGlossaryConfig( + glossary=self._glossary_path + ), + source_language_code=kwargs.get("source_language_code", None), + target_language_code=kwargs.get("target_language_code"), + mime_type=kwargs.get("mime_type", "text/plain"), + ) + ) + + # If using a glossary, the translations will be in `glossary_translations`. + translations = response.glossary_translations or response.translations + + return [ + Document( + page_content=translation.translated_text, + metadata={ + **doc.metadata, + "model": translation.model, + "detected_language_code": translation.detected_language_code, + }, + ) + for doc, translation in zip(documents, translations) + ] diff --git a/libs/community/poetry.lock b/libs/community/poetry.lock index edac23ab4..19c690e19 100644 --- a/libs/community/poetry.lock +++ b/libs/community/poetry.lock @@ -3295,4 +3295,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "5e339be1a11064883f6b1f42169906cae8148cd644a0bc1ae83e7b435eadabe8" +content-hash = "9e3366ba702f7e80faa37e18c049a162269860cbc5a23d095560b52f35b9fe7f" diff --git a/libs/community/pyproject.toml b/libs/community/pyproject.toml index 9d9a3674a..51145d635 100644 --- a/libs/community/pyproject.toml +++ b/libs/community/pyproject.toml @@ -29,6 +29,7 @@ pytest-watcher = "^0.3.4" pytest-asyncio = "^0.21.1" google-cloud-documentai = "^2.24.2" google-cloud-documentai-toolbox = "^0.13.3a0" +google-cloud-bigquery = "^3.19.0" [tool.poetry.group.codespell] optional = true diff --git a/libs/community/tests/integration_tests/test_bigquery.py b/libs/community/tests/integration_tests/test_bigquery.py new file mode 100644 index 000000000..332bc1fc5 --- /dev/null +++ b/libs/community/tests/integration_tests/test_bigquery.py @@ -0,0 +1,38 @@ +from langchain_google_community.bigquery import BigQueryLoader + + +def test_bigquery_loader_no_options() -> None: + loader = BigQueryLoader("SELECT 1 AS a, 2 AS b") + docs = loader.load() + + assert len(docs) == 1 + assert docs[0].page_content == "a: 1\nb: 2" + assert docs[0].metadata == {} + + +def test_bigquery_loader_page_content_columns() -> None: + loader = BigQueryLoader( + "SELECT 1 AS a, 2 AS b UNION ALL SELECT 3 AS a, 4 AS b", + page_content_columns=["a"], + ) + docs = loader.load() + + assert len(docs) == 2 + assert docs[0].page_content == "a: 1" + assert docs[0].metadata == {} + + assert docs[1].page_content == "a: 3" + assert docs[1].metadata == {} + + +def test_bigquery_loader_metadata_columns() -> None: + loader = BigQueryLoader( + "SELECT 1 AS a, 2 AS b", + page_content_columns=["a"], + metadata_columns=["b"], + ) + docs = loader.load() + + assert len(docs) == 1 + assert docs[0].page_content == "a: 1" + assert docs[0].metadata == {"b": 2} diff --git a/libs/community/tests/integration_tests/test_googlesearch_api.py b/libs/community/tests/integration_tests/test_googlesearch_api.py new file mode 100644 index 000000000..88326b9c5 --- /dev/null +++ b/libs/community/tests/integration_tests/test_googlesearch_api.py @@ -0,0 +1,35 @@ +"""Integration test for Google Search API Wrapper.""" +import pytest + +from langchain_google_community.google_search import GoogleSearchAPIWrapper + + +@pytest.mark.skip(reason="CI/CD not ready.") +def test_call() -> None: + """Test that call gives the correct answer.""" + search = GoogleSearchAPIWrapper() + output = search.run("What was Obama's first name?") + assert "Barack Hussein Obama II" in output + + +@pytest.mark.skip(reason="CI/CD not ready.") +def test_no_result_call() -> None: + """Test that call gives no result.""" + search = GoogleSearchAPIWrapper() + output = search.run( + "NORESULTCALL_NORESULTCALL_NORESULTCALL_NORESULTCALL_NORESULTCALL_NORESULTCALL" + ) + print(type(output)) # noqa: T201 + assert "No good Google Search Result was found" == output + + +@pytest.mark.skip(reason="CI/CD not ready.") +def test_result_with_params_call() -> None: + """Test that call gives the correct answer with extra params.""" + search = GoogleSearchAPIWrapper() + output = search.results( + query="What was Obama's first name?", + num_results=5, + search_params={"cr": "us", "safe": "active"}, + ) + assert len(output)