diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/conversational_retrieval/openai_functions.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/conversational_retrieval/openai_functions.py new file mode 100644 index 0000000000000..e1c2c40bcfb88 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/conversational_retrieval/openai_functions.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +from typing import Any, List, Optional, TYPE_CHECKING + +from langchain_core.language_models import BaseLanguageModel +from langchain_core.memory import BaseMemory +from langchain_core.messages import SystemMessage +from langchain_core.prompts.chat import MessagesPlaceholder +from langchain_core.tools import BaseTool + +if TYPE_CHECKING: + from langchain.agents.agent import AgentExecutor + + +def _get_default_system_message() -> SystemMessage: + return SystemMessage( + content=( + "Do your best to answer the questions. " + "Feel free to use any tools available to look up " + "relevant information, only if necessary" + ) + ) + +def create_conversational_retrieval_agent( + llm: BaseLanguageModel, + tools: List[BaseTool], + remember_intermediate_steps: bool = True, + memory_key: str = "chat_history", + system_message: Optional[SystemMessage] = None, + verbose: bool = False, + max_token_limit: int = 2000, + **kwargs: Any, +) -> AgentExecutor: + """A convenience method for creating a conversational retrieval agent. + + Args: + llm: The language model to use, should be ChatOpenAI + tools: A list of tools the agent has access to + remember_intermediate_steps: Whether the agent should remember intermediate + steps or not. Intermediate steps refer to prior action/observation + pairs from previous questions. The benefit of remembering these is if + there is relevant information in there, the agent can use it to answer + follow up questions. The downside is it will take up more tokens. + memory_key: The name of the memory key in the prompt. + system_message: The system message to use. By default, a basic one will + be used. + verbose: Whether or not the final AgentExecutor should be verbose or not, + defaults to False. + max_token_limit: The max number of tokens to keep around in memory. + Defaults to 2000. + + Returns: + An agent executor initialized appropriately + """ + from langchain.agents.agent import AgentExecutor + from langchain.agents.openai_functions_agent.agent_token_buffer_memory import ( + AgentTokenBufferMemory, + ) + from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent + from langchain.memory.token_buffer import ConversationTokenBufferMemory + + if remember_intermediate_steps: + memory: BaseMemory = AgentTokenBufferMemory( + memory_key=memory_key, llm=llm, max_token_limit=max_token_limit + ) + else: + memory = ConversationTokenBufferMemory( + memory_key=memory_key, + return_messages=True, + output_key="output", + llm=llm, + max_token_limit=max_token_limit, + ) + + _system_message = system_message or _get_default_system_message() + prompt = OpenAIFunctionsAgent.create_prompt( + system_message=_system_message, + extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)], + ) + agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt) + return AgentExecutor( + agent=agent, + tools=tools, + memory=memory, + verbose=verbose, + return_intermediate_steps=remember_intermediate_steps, + **kwargs, + ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/json/base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/json/base.py new file mode 100644 index 0000000000000..0d9f10d14c6c5 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/json/base.py @@ -0,0 +1,53 @@ +"""Json agent.""" +from __future__ import annotations +from typing import Any, Dict, List, Optional, TYPE_CHECKING + +from langchain_core.callbacks import BaseCallbackManager +from langchain_core.language_models import BaseLanguageModel + +from langchain_community.agent_toolkits.json.prompt import JSON_PREFIX, JSON_SUFFIX +from langchain_community.agent_toolkits.json.toolkit import JsonToolkit + +if TYPE_CHECKING: + from langchain.agents.agent import AgentExecutor + + +def create_json_agent( + llm: BaseLanguageModel, + toolkit: JsonToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = JSON_PREFIX, + suffix: str = JSON_SUFFIX, + format_instructions: Optional[str] = None, + input_variables: Optional[List[str]] = None, + verbose: bool = False, + agent_executor_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, +) -> AgentExecutor: + """Construct a json agent from an LLM and tools.""" + from langchain.agents.agent import AgentExecutor + from langchain.agents.mrkl.base import ZeroShotAgent + from langchain.chains.llm import LLMChain + tools = toolkit.get_tools() + prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {} + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + input_variables=input_variables, + **prompt_params, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools( + agent=agent, + tools=tools, + callback_manager=callback_manager, + verbose=verbose, + **(agent_executor_kwargs or {}), + ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/nla/tool.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/nla/tool.py new file mode 100644 index 0000000000000..7d94b2f9bca5d --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/nla/tool.py @@ -0,0 +1,57 @@ +"""Tool for interacting with a single API with natural language definition.""" + +from __future__ import annotations +from typing import Any, Optional, TYPE_CHECKING + +from langchain_core.language_models import BaseLanguageModel +from langchain_core.tools import Tool + +from langchain_community.tools.openapi.utils.api_models import APIOperation +from langchain_community.tools.openapi.utils.openapi_utils import OpenAPISpec +from langchain_community.utilities.requests import Requests + +if TYPE_CHECKING: + from langchain.chains.api.openapi.chain import OpenAPIEndpointChain + + +class NLATool(Tool): + """Natural Language API Tool.""" + + @classmethod + def from_open_api_endpoint_chain( + cls, chain: OpenAPIEndpointChain, api_title: str + ) -> "NLATool": + """Convert an endpoint chain to an API endpoint tool.""" + expanded_name = ( + f'{api_title.replace(" ", "_")}.{chain.api_operation.operation_id}' + ) + description = ( + f"I'm an AI from {api_title}. Instruct what you want," + " and I'll assist via an API with description:" + f" {chain.api_operation.description}" + ) + return cls(name=expanded_name, func=chain.run, description=description) + + @classmethod + def from_llm_and_method( + cls, + llm: BaseLanguageModel, + path: str, + method: str, + spec: OpenAPISpec, + requests: Optional[Requests] = None, + verbose: bool = False, + return_intermediate_steps: bool = False, + **kwargs: Any, + ) -> "NLATool": + """Instantiate the tool from the specified path and method.""" + api_operation = APIOperation.from_openapi_spec(spec, path, method) + chain = OpenAPIEndpointChain.from_api_operation( + api_operation, + llm, + requests=requests, + verbose=verbose, + return_intermediate_steps=return_intermediate_steps, + **kwargs, + ) + return cls.from_open_api_endpoint_chain(chain, spec.info.title) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/base.py new file mode 100644 index 0000000000000..f8dc14d85f3ff --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/base.py @@ -0,0 +1,77 @@ +"""OpenAPI spec agent.""" +from __future__ import annotations +from typing import Any, Dict, List, Optional, TYPE_CHECKING + +from langchain_core.callbacks import BaseCallbackManager +from langchain_core.language_models import BaseLanguageModel + +from langchain_community.agent_toolkits.openapi.prompt import ( + OPENAPI_PREFIX, + OPENAPI_SUFFIX, +) +from langchain_community.agent_toolkits.openapi.toolkit import OpenAPIToolkit + +if TYPE_CHECKING: + from langchain.agents.agent import AgentExecutor + + +def create_openapi_agent( + llm: BaseLanguageModel, + toolkit: OpenAPIToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = OPENAPI_PREFIX, + suffix: str = OPENAPI_SUFFIX, + format_instructions: Optional[str] = None, + input_variables: Optional[List[str]] = None, + max_iterations: Optional[int] = 15, + max_execution_time: Optional[float] = None, + early_stopping_method: str = "force", + verbose: bool = False, + return_intermediate_steps: bool = False, + agent_executor_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, +) -> AgentExecutor: + """Construct an OpenAPI agent from an LLM and tools. + + *Security Note*: When creating an OpenAPI agent, check the permissions + and capabilities of the underlying toolkit. + + For example, if the default implementation of OpenAPIToolkit + uses the RequestsToolkit which contains tools to make arbitrary + network requests against any URL (e.g., GET, POST, PATCH, PUT, DELETE), + + Control access to who can submit issue requests using this toolkit and + what network access it has. + + See https://python.langchain.com/docs/security for more information. + """ + from langchain.agents.agent import AgentExecutor + from langchain.agents.mrkl.base import ZeroShotAgent + from langchain.chains.llm import LLMChain + tools = toolkit.get_tools() + prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {} + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + input_variables=input_variables, + **prompt_params + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools( + agent=agent, + tools=tools, + callback_manager=callback_manager, + verbose=verbose, + return_intermediate_steps=return_intermediate_steps, + max_iterations=max_iterations, + max_execution_time=max_execution_time, + early_stopping_method=early_stopping_method, + **(agent_executor_kwargs or {}), + ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/planner.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/planner.py new file mode 100644 index 0000000000000..9912de670a070 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/planner.py @@ -0,0 +1,366 @@ +"""Agent that interacts with OpenAPI APIs via a hierarchical planning approach.""" +import json +import re +from functools import partial +from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING + +import yaml +from langchain_core.callbacks import BaseCallbackManager +from langchain_core.language_models import BaseLanguageModel +from langchain_core.prompts import BasePromptTemplate, PromptTemplate +from langchain_core.pydantic_v1 import Field +from langchain_core.tools import BaseTool, Tool +from langchain_openai.llms import OpenAI + +from langchain_community.agent_toolkits.openapi.planner_prompt import ( + API_CONTROLLER_PROMPT, + API_CONTROLLER_TOOL_DESCRIPTION, + API_CONTROLLER_TOOL_NAME, + API_ORCHESTRATOR_PROMPT, + API_PLANNER_PROMPT, + API_PLANNER_TOOL_DESCRIPTION, + API_PLANNER_TOOL_NAME, + PARSING_DELETE_PROMPT, + PARSING_GET_PROMPT, + PARSING_PATCH_PROMPT, + PARSING_POST_PROMPT, + PARSING_PUT_PROMPT, + REQUESTS_DELETE_TOOL_DESCRIPTION, + REQUESTS_GET_TOOL_DESCRIPTION, + REQUESTS_PATCH_TOOL_DESCRIPTION, + REQUESTS_POST_TOOL_DESCRIPTION, + REQUESTS_PUT_TOOL_DESCRIPTION, +) +from langchain_community.agent_toolkits.openapi.spec import ReducedOpenAPISpec +from langchain_community.output_parsers.json import parse_json_markdown +from langchain_community.tools.requests.tool import BaseRequestsTool +from langchain_community.utilities.requests import RequestsWrapper + +if TYPE_CHECKING: + from langchain.agents.agent import AgentExecutor + from langchain.chains.llm import LLMChain + from langchain.memory import ReadOnlySharedMemory + +# +# Requests tools with LLM-instructed extraction of truncated responses. +# +# Of course, truncating so bluntly may lose a lot of valuable +# information in the response. +# However, the goal for now is to have only a single inference step. +MAX_RESPONSE_LENGTH = 5000 +"""Maximum length of the response to be returned.""" + + +def _get_default_llm_chain(prompt: BasePromptTemplate) -> LLMChain: + from langchain.chains.llm import LLMChain + return LLMChain( + llm=OpenAI(), + prompt=prompt, + ) + + +def _get_default_llm_chain_factory( + prompt: BasePromptTemplate, +) -> Callable[[], LLMChain]: + """Returns a default LLMChain factory.""" + return partial(_get_default_llm_chain, prompt) + + +class RequestsGetToolWithParsing(BaseRequestsTool, BaseTool): + """Requests GET tool with LLM-instructed extraction of truncated responses.""" + + name: str = "requests_get" + """Tool name.""" + description = REQUESTS_GET_TOOL_DESCRIPTION + """Tool description.""" + response_length: Optional[int] = MAX_RESPONSE_LENGTH + """Maximum length of the response to be returned.""" + llm_chain: Any = Field( + default_factory=_get_default_llm_chain_factory(PARSING_GET_PROMPT) + ) + """LLMChain used to extract the response.""" + + def _run(self, text: str) -> str: + try: + data = parse_json_markdown(text) + except json.JSONDecodeError as e: + raise e + data_params = data.get("params") + response = self.requests_wrapper.get(data["url"], params=data_params) + response = response[: self.response_length] + return self.llm_chain.predict( + response=response, instructions=data["output_instructions"] + ).strip() + + async def _arun(self, text: str) -> str: + raise NotImplementedError() + + +class RequestsPostToolWithParsing(BaseRequestsTool, BaseTool): + """Requests POST tool with LLM-instructed extraction of truncated responses.""" + + name: str = "requests_post" + """Tool name.""" + description = REQUESTS_POST_TOOL_DESCRIPTION + """Tool description.""" + response_length: Optional[int] = MAX_RESPONSE_LENGTH + """Maximum length of the response to be returned.""" + llm_chain: Any = Field( + default_factory=_get_default_llm_chain_factory(PARSING_POST_PROMPT) + ) + """LLMChain used to extract the response.""" + + def _run(self, text: str) -> str: + try: + data = parse_json_markdown(text) + except json.JSONDecodeError as e: + raise e + response = self.requests_wrapper.post(data["url"], data["data"]) + response = response[: self.response_length] + return self.llm_chain.predict( + response=response, instructions=data["output_instructions"] + ).strip() + + async def _arun(self, text: str) -> str: + raise NotImplementedError() + + +class RequestsPatchToolWithParsing(BaseRequestsTool, BaseTool): + """Requests PATCH tool with LLM-instructed extraction of truncated responses.""" + + name: str = "requests_patch" + """Tool name.""" + description = REQUESTS_PATCH_TOOL_DESCRIPTION + """Tool description.""" + response_length: Optional[int] = MAX_RESPONSE_LENGTH + """Maximum length of the response to be returned.""" + llm_chain: Any = Field( + default_factory=_get_default_llm_chain_factory(PARSING_PATCH_PROMPT) + ) + """LLMChain used to extract the response.""" + + def _run(self, text: str) -> str: + try: + data = parse_json_markdown(text) + except json.JSONDecodeError as e: + raise e + response = self.requests_wrapper.patch(data["url"], data["data"]) + response = response[: self.response_length] + return self.llm_chain.predict( + response=response, instructions=data["output_instructions"] + ).strip() + + async def _arun(self, text: str) -> str: + raise NotImplementedError() + + +class RequestsPutToolWithParsing(BaseRequestsTool, BaseTool): + """Requests PUT tool with LLM-instructed extraction of truncated responses.""" + + name: str = "requests_put" + """Tool name.""" + description = REQUESTS_PUT_TOOL_DESCRIPTION + """Tool description.""" + response_length: Optional[int] = MAX_RESPONSE_LENGTH + """Maximum length of the response to be returned.""" + llm_chain: Any = Field( + default_factory=_get_default_llm_chain_factory(PARSING_PUT_PROMPT) + ) + """LLMChain used to extract the response.""" + + def _run(self, text: str) -> str: + try: + data = parse_json_markdown(text) + except json.JSONDecodeError as e: + raise e + response = self.requests_wrapper.put(data["url"], data["data"]) + response = response[: self.response_length] + return self.llm_chain.predict( + response=response, instructions=data["output_instructions"] + ).strip() + + async def _arun(self, text: str) -> str: + raise NotImplementedError() + + +class RequestsDeleteToolWithParsing(BaseRequestsTool, BaseTool): + """A tool that sends a DELETE request and parses the response.""" + + name: str = "requests_delete" + """The name of the tool.""" + description = REQUESTS_DELETE_TOOL_DESCRIPTION + """The description of the tool.""" + + response_length: Optional[int] = MAX_RESPONSE_LENGTH + """The maximum length of the response.""" + llm_chain: Any = Field( + default_factory=_get_default_llm_chain_factory(PARSING_DELETE_PROMPT) + ) + """The LLM chain used to parse the response.""" + + def _run(self, text: str) -> str: + try: + data = parse_json_markdown(text) + except json.JSONDecodeError as e: + raise e + response = self.requests_wrapper.delete(data["url"]) + response = response[: self.response_length] + return self.llm_chain.predict( + response=response, instructions=data["output_instructions"] + ).strip() + + async def _arun(self, text: str) -> str: + raise NotImplementedError() + + +# +# Orchestrator, planner, controller. +# +def _create_api_planner_tool( + api_spec: ReducedOpenAPISpec, llm: BaseLanguageModel +) -> Tool: + from langchain.chains.llm import LLMChain + endpoint_descriptions = [ + f"{name} {description}" for name, description, _ in api_spec.endpoints + ] + prompt = PromptTemplate( + template=API_PLANNER_PROMPT, + input_variables=["query"], + partial_variables={"endpoints": "- " + "- ".join(endpoint_descriptions)}, + ) + chain = LLMChain(llm=llm, prompt=prompt) + tool = Tool( + name=API_PLANNER_TOOL_NAME, + description=API_PLANNER_TOOL_DESCRIPTION, + func=chain.run, + ) + return tool + + +def _create_api_controller_agent( + api_url: str, + api_docs: str, + requests_wrapper: RequestsWrapper, + llm: BaseLanguageModel, +) -> AgentExecutor: + from langchain.agents.mrkl.base import ZeroShotAgent + from langchain.agents.agent import AgentExecutor + from langchain.chains.llm import LLMChain + get_llm_chain = LLMChain(llm=llm, prompt=PARSING_GET_PROMPT) + post_llm_chain = LLMChain(llm=llm, prompt=PARSING_POST_PROMPT) + tools: List[BaseTool] = [ + RequestsGetToolWithParsing( + requests_wrapper=requests_wrapper, llm_chain=get_llm_chain + ), + RequestsPostToolWithParsing( + requests_wrapper=requests_wrapper, llm_chain=post_llm_chain + ), + ] + prompt = PromptTemplate( + template=API_CONTROLLER_PROMPT, + input_variables=["input", "agent_scratchpad"], + partial_variables={ + "api_url": api_url, + "api_docs": api_docs, + "tool_names": ", ".join([tool.name for tool in tools]), + "tool_descriptions": "\n".join( + [f"{tool.name}: {tool.description}" for tool in tools] + ), + }, + ) + agent = ZeroShotAgent( + llm_chain=LLMChain(llm=llm, prompt=prompt), + allowed_tools=[tool.name for tool in tools], + ) + return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True) + + +def _create_api_controller_tool( + api_spec: ReducedOpenAPISpec, + requests_wrapper: RequestsWrapper, + llm: BaseLanguageModel, +) -> Tool: + """Expose controller as a tool. + + The tool is invoked with a plan from the planner, and dynamically + creates a controller agent with relevant documentation only to + constrain the context. + """ + + base_url = api_spec.servers[0]["url"] # TODO: do better. + + def _create_and_run_api_controller_agent(plan_str: str) -> str: + pattern = r"\b(GET|POST|PATCH|DELETE)\s+(/\S+)*" + matches = re.findall(pattern, plan_str) + endpoint_names = [ + "{method} {route}".format(method=method, route=route.split("?")[0]) + for method, route in matches + ] + docs_str = "" + for endpoint_name in endpoint_names: + found_match = False + for name, _, docs in api_spec.endpoints: + regex_name = re.compile(re.sub("\{.*?\}", ".*", name)) + if regex_name.match(endpoint_name): + found_match = True + docs_str += f"== Docs for {endpoint_name} == \n{yaml.dump(docs)}\n" + if not found_match: + raise ValueError(f"{endpoint_name} endpoint does not exist.") + + agent = _create_api_controller_agent(base_url, docs_str, requests_wrapper, llm) + return agent.run(plan_str) + + return Tool( + name=API_CONTROLLER_TOOL_NAME, + func=_create_and_run_api_controller_agent, + description=API_CONTROLLER_TOOL_DESCRIPTION, + ) + + +def create_openapi_agent( + api_spec: ReducedOpenAPISpec, + requests_wrapper: RequestsWrapper, + llm: BaseLanguageModel, + shared_memory: Optional[ReadOnlySharedMemory] = None, + callback_manager: Optional[BaseCallbackManager] = None, + verbose: bool = True, + agent_executor_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, +) -> AgentExecutor: + """Instantiate OpenAI API planner and controller for a given spec. + + Inject credentials via requests_wrapper. + + We use a top-level "orchestrator" agent to invoke the planner and controller, + rather than a top-level planner + that invokes a controller with its plan. This is to keep the planner simple. + """ + from langchain.agents.mrkl.base import ZeroShotAgent + from langchain.agents.agent import AgentExecutor + from langchain.chains.llm import LLMChain + tools = [ + _create_api_planner_tool(api_spec, llm), + _create_api_controller_tool(api_spec, requests_wrapper, llm), + ] + prompt = PromptTemplate( + template=API_ORCHESTRATOR_PROMPT, + input_variables=["input", "agent_scratchpad"], + partial_variables={ + "tool_names": ", ".join([tool.name for tool in tools]), + "tool_descriptions": "\n".join( + [f"{tool.name}: {tool.description}" for tool in tools] + ), + }, + ) + agent = ZeroShotAgent( + llm_chain=LLMChain(llm=llm, prompt=prompt, memory=shared_memory), + allowed_tools=[tool.name for tool in tools], + **kwargs, + ) + return AgentExecutor.from_agent_and_tools( + agent=agent, + tools=tools, + callback_manager=callback_manager, + verbose=verbose, + **(agent_executor_kwargs or {}), + ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/toolkit.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/toolkit.py new file mode 100644 index 0000000000000..5b7f3fcbd5239 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/openapi/toolkit.py @@ -0,0 +1,90 @@ +"""Requests toolkit.""" +from __future__ import annotations + +from typing import Any, List + +from langchain_core.language_models import BaseLanguageModel +from langchain_core.tools import Tool + +from langchain_community.agent_toolkits.base import BaseToolkit +from langchain_community.agent_toolkits.json.base import create_json_agent +from langchain_community.agent_toolkits.json.toolkit import JsonToolkit +from langchain_community.agent_toolkits.openapi.prompt import DESCRIPTION +from langchain_community.tools import BaseTool +from langchain_community.tools.json.tool import JsonSpec +from langchain_community.tools.requests.tool import ( + RequestsDeleteTool, + RequestsGetTool, + RequestsPatchTool, + RequestsPostTool, + RequestsPutTool, +) +from langchain_community.utilities.requests import TextRequestsWrapper + + +class RequestsToolkit(BaseToolkit): + """Toolkit for making REST requests. + + *Security Note*: This toolkit contains tools to make GET, POST, PATCH, PUT, + and DELETE requests to an API. + + Exercise care in who is allowed to use this toolkit. If exposing + to end users, consider that users will be able to make arbitrary + requests on behalf of the server hosting the code. For example, + users could ask the server to make a request to a private API + that is only accessible from the server. + + Control access to who can submit issue requests using this toolkit and + what network access it has. + + See https://python.langchain.com/docs/security for more information. + """ + + requests_wrapper: TextRequestsWrapper + + def get_tools(self) -> List[BaseTool]: + """Return a list of tools.""" + return [ + RequestsGetTool(requests_wrapper=self.requests_wrapper), + RequestsPostTool(requests_wrapper=self.requests_wrapper), + RequestsPatchTool(requests_wrapper=self.requests_wrapper), + RequestsPutTool(requests_wrapper=self.requests_wrapper), + RequestsDeleteTool(requests_wrapper=self.requests_wrapper), + ] + + +class OpenAPIToolkit(BaseToolkit): + """Toolkit for interacting with an OpenAPI API. + + *Security Note*: This toolkit contains tools that can read and modify + the state of a service; e.g., by creating, deleting, or updating, + reading underlying data. + + For example, this toolkit can be used to delete data exposed via + an OpenAPI compliant API. + """ + + json_agent: Any + requests_wrapper: TextRequestsWrapper + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + json_agent_tool = Tool( + name="json_explorer", + func=self.json_agent.run, + description=DESCRIPTION, + ) + request_toolkit = RequestsToolkit(requests_wrapper=self.requests_wrapper) + return [*request_toolkit.get_tools(), json_agent_tool] + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + json_spec: JsonSpec, + requests_wrapper: TextRequestsWrapper, + **kwargs: Any, + ) -> OpenAPIToolkit: + """Create json agent from llm, then initialize.""" + json_agent = create_json_agent(llm, JsonToolkit(spec=json_spec), **kwargs) + return cls(json_agent=json_agent, requests_wrapper=requests_wrapper) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/base.py new file mode 100644 index 0000000000000..ef70ee7b43e6f --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/base.py @@ -0,0 +1,68 @@ +"""Power BI agent.""" +from __future__ import annotations + +from typing import Any, Dict, List, Optional, TYPE_CHECKING + +from langchain_core.callbacks import BaseCallbackManager +from langchain_core.language_models import BaseLanguageModel + +from langchain_community.agent_toolkits.powerbi.prompt import ( + POWERBI_PREFIX, + POWERBI_SUFFIX, +) +from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit +from langchain_community.utilities.powerbi import PowerBIDataset + +if TYPE_CHECKING: + from langchain.agents import AgentExecutor + + +def create_pbi_agent( + llm: BaseLanguageModel, + toolkit: Optional[PowerBIToolkit] = None, + powerbi: Optional[PowerBIDataset] = None, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = POWERBI_PREFIX, + suffix: str = POWERBI_SUFFIX, + format_instructions: Optional[str] = None, + examples: Optional[str] = None, + input_variables: Optional[List[str]] = None, + top_k: int = 10, + verbose: bool = False, + agent_executor_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, +) -> AgentExecutor: + """Construct a Power BI agent from an LLM and tools.""" + from langchain.agents.mrkl.base import ZeroShotAgent + from langchain.agents import AgentExecutor + from langchain.chains.llm import LLMChain + if toolkit is None: + if powerbi is None: + raise ValueError("Must provide either a toolkit or powerbi dataset") + toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples) + tools = toolkit.get_tools() + tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names + prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {} + agent = ZeroShotAgent( + llm_chain=LLMChain( + llm=llm, + prompt=ZeroShotAgent.create_prompt( + tools, + prefix=prefix.format(top_k=top_k).format(tables=tables), + suffix=suffix, + input_variables=input_variables, + **prompt_params, + ), + callback_manager=callback_manager, # type: ignore + verbose=verbose, + ), + allowed_tools=[tool.name for tool in tools], + **kwargs, + ) + return AgentExecutor.from_agent_and_tools( + agent=agent, + tools=tools, + callback_manager=callback_manager, + verbose=verbose, + **(agent_executor_kwargs or {}), + ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/chat_base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/chat_base.py new file mode 100644 index 0000000000000..0171e0371354a --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/chat_base.py @@ -0,0 +1,69 @@ +"""Power BI agent.""" +from __future__ import annotations +from typing import Any, Dict, List, Optional, TYPE_CHECKING + +from langchain_core.callbacks import BaseCallbackManager +from langchain_core.language_models.chat_models import BaseChatModel + +from langchain_community.agent_toolkits.powerbi.prompt import ( + POWERBI_CHAT_PREFIX, + POWERBI_CHAT_SUFFIX, +) +from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit +from langchain_community.utilities.powerbi import PowerBIDataset + +if TYPE_CHECKING: + from langchain.agents import AgentExecutor + from langchain.agents.agent import AgentOutputParser + from langchain.memory.chat_memory import BaseChatMemory + + +def create_pbi_chat_agent( + llm: BaseChatModel, + toolkit: Optional[PowerBIToolkit] = None, + powerbi: Optional[PowerBIDataset] = None, + callback_manager: Optional[BaseCallbackManager] = None, + output_parser: Optional[AgentOutputParser] = None, + prefix: str = POWERBI_CHAT_PREFIX, + suffix: str = POWERBI_CHAT_SUFFIX, + examples: Optional[str] = None, + input_variables: Optional[List[str]] = None, + memory: Optional[BaseChatMemory] = None, + top_k: int = 10, + verbose: bool = False, + agent_executor_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, +) -> AgentExecutor: + """Construct a Power BI agent from a Chat LLM and tools. + + If you supply only a toolkit and no Power BI dataset, the same LLM is used for both. + """ + from langchain.agents import AgentExecutor + from langchain.agents.conversational_chat.base import ConversationalChatAgent + from langchain.memory import ConversationBufferMemory + if toolkit is None: + if powerbi is None: + raise ValueError("Must provide either a toolkit or powerbi dataset") + toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples) + tools = toolkit.get_tools() + tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names + agent = ConversationalChatAgent.from_llm_and_tools( + llm=llm, + tools=tools, + system_message=prefix.format(top_k=top_k).format(tables=tables), + human_message=suffix, + input_variables=input_variables, + callback_manager=callback_manager, + output_parser=output_parser, + verbose=verbose, + **kwargs, + ) + return AgentExecutor.from_agent_and_tools( + agent=agent, + tools=tools, + callback_manager=callback_manager, + memory=memory + or ConversationBufferMemory(memory_key="chat_history", return_messages=True), + verbose=verbose, + **(agent_executor_kwargs or {}), + ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/toolkit.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/toolkit.py new file mode 100644 index 0000000000000..bf4969b889cea --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/powerbi/toolkit.py @@ -0,0 +1,106 @@ +"""Toolkit for interacting with a Power BI dataset.""" +from __future__ import annotations +from typing import List, Optional, Union, TYPE_CHECKING + +from langchain_core.callbacks import BaseCallbackManager +from langchain_core.language_models import BaseLanguageModel +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.prompts import PromptTemplate +from langchain_core.prompts.chat import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +) +from langchain_core.pydantic_v1 import Field + +from langchain_community.agent_toolkits.base import BaseToolkit +from langchain_community.tools import BaseTool +from langchain_community.tools.powerbi.prompt import ( + QUESTION_TO_QUERY_BASE, + SINGLE_QUESTION_TO_QUERY, + USER_INPUT, +) +from langchain_community.tools.powerbi.tool import ( + InfoPowerBITool, + ListPowerBITool, + QueryPowerBITool, +) +from langchain_community.utilities.powerbi import PowerBIDataset + +if TYPE_CHECKING: + from langchain.chains.llm import LLMChain + + +class PowerBIToolkit(BaseToolkit): + """Toolkit for interacting with Power BI dataset. + + *Security Note*: This toolkit interacts with an external service. + + Control access to who can use this toolkit. + + Make sure that the capabilities given by this toolkit to the calling + code are appropriately scoped to the application. + + See https://python.langchain.com/docs/security for more information. + """ + + powerbi: PowerBIDataset = Field(exclude=True) + llm: Union[BaseLanguageModel, BaseChatModel] = Field(exclude=True) + examples: Optional[str] = None + max_iterations: int = 5 + callback_manager: Optional[BaseCallbackManager] = None + output_token_limit: Optional[int] = None + tiktoken_model_name: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return [ + QueryPowerBITool( + llm_chain=self._get_chain(), + powerbi=self.powerbi, + examples=self.examples, + max_iterations=self.max_iterations, + output_token_limit=self.output_token_limit, + tiktoken_model_name=self.tiktoken_model_name, + ), + InfoPowerBITool(powerbi=self.powerbi), + ListPowerBITool(powerbi=self.powerbi), + ] + + def _get_chain(self) -> LLMChain: + """Construct the chain based on the callback manager and model type.""" + from langchain.chains.llm import LLMChain + if isinstance(self.llm, BaseLanguageModel): + return LLMChain( + llm=self.llm, + callback_manager=self.callback_manager + if self.callback_manager + else None, + prompt=PromptTemplate( + template=SINGLE_QUESTION_TO_QUERY, + input_variables=["tool_input", "tables", "schemas", "examples"], + ), + ) + + system_prompt = SystemMessagePromptTemplate( + prompt=PromptTemplate( + template=QUESTION_TO_QUERY_BASE, + input_variables=["tables", "schemas", "examples"], + ) + ) + human_prompt = HumanMessagePromptTemplate( + prompt=PromptTemplate( + template=USER_INPUT, + input_variables=["tool_input"], + ) + ) + return LLMChain( + llm=self.llm, + callback_manager=self.callback_manager if self.callback_manager else None, + prompt=ChatPromptTemplate.from_messages([system_prompt, human_prompt]), + ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/spark_sql/base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/spark_sql/base.py new file mode 100644 index 0000000000000..8b5cd9d9078e7 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/spark_sql/base.py @@ -0,0 +1,64 @@ +"""Spark SQL agent.""" +from __future__ import annotations +from typing import Any, Dict, List, Optional, TYPE_CHECKING + +from langchain_core.callbacks import BaseCallbackManager, Callbacks +from langchain_core.language_models import BaseLanguageModel + +from langchain_community.agent_toolkits.spark_sql.prompt import SQL_PREFIX, SQL_SUFFIX +from langchain_community.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit + +if TYPE_CHECKING: + from langchain.agents.agent import AgentExecutor + + +def create_spark_sql_agent( + llm: BaseLanguageModel, + toolkit: SparkSQLToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + callbacks: Callbacks = None, + prefix: str = SQL_PREFIX, + suffix: str = SQL_SUFFIX, + format_instructions: Optional[str] = None, + input_variables: Optional[List[str]] = None, + top_k: int = 10, + max_iterations: Optional[int] = 15, + max_execution_time: Optional[float] = None, + early_stopping_method: str = "force", + verbose: bool = False, + agent_executor_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, +) -> AgentExecutor: + """Construct a Spark SQL agent from an LLM and tools.""" + from langchain.agents.agent import AgentExecutor + from langchain.agents.mrkl.base import ZeroShotAgent + from langchain.chains.llm import LLMChain + tools = toolkit.get_tools() + prefix = prefix.format(top_k=top_k) + prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {} + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + input_variables=input_variables, + **prompt_params, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + callbacks=callbacks, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools( + agent=agent, + tools=tools, + callback_manager=callback_manager, + callbacks=callbacks, + verbose=verbose, + max_iterations=max_iterations, + max_execution_time=max_execution_time, + early_stopping_method=early_stopping_method, + **(agent_executor_kwargs or {}), + ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/sql/base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/sql/base.py new file mode 100644 index 0000000000000..bab14e6497aa0 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/sql/base.py @@ -0,0 +1,102 @@ +"""SQL agent.""" +from __future__ import annotations +from typing import Any, Dict, List, Optional, Sequence, TYPE_CHECKING + +from langchain_core.callbacks import BaseCallbackManager +from langchain_core.language_models import BaseLanguageModel +from langchain_core.messages import AIMessage, SystemMessage +from langchain_core.prompts.chat import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, + MessagesPlaceholder, +) + +from langchain_community.agent_toolkits.sql.prompt import ( + SQL_FUNCTIONS_SUFFIX, + SQL_PREFIX, + SQL_SUFFIX, +) +from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit +from langchain_community.tools import BaseTool + +if TYPE_CHECKING: + from langchain.agents.agent import AgentExecutor + from langchain.agents.agent_types import AgentType + + +def create_sql_agent( + llm: BaseLanguageModel, + toolkit: SQLDatabaseToolkit, + agent_type: Optional[AgentType] = None, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = SQL_PREFIX, + suffix: Optional[str] = None, + format_instructions: Optional[str] = None, + input_variables: Optional[List[str]] = None, + top_k: int = 10, + max_iterations: Optional[int] = 15, + max_execution_time: Optional[float] = None, + early_stopping_method: str = "force", + verbose: bool = False, + agent_executor_kwargs: Optional[Dict[str, Any]] = None, + extra_tools: Sequence[BaseTool] = (), + **kwargs: Any, +) -> AgentExecutor: + """Construct an SQL agent from an LLM and tools.""" + from langchain.agents.agent import AgentExecutor, BaseSingleActionAgent + from langchain.agents.agent_types import AgentType + from langchain.agents.mrkl.base import ZeroShotAgent + from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent + from langchain.chains.llm import LLMChain + agent_type = agent_type or AgentType.ZERO_SHOT_REACT_DESCRIPTION + tools = toolkit.get_tools() + list(extra_tools) + prefix = prefix.format(dialect=toolkit.dialect, top_k=top_k) + agent: BaseSingleActionAgent + + if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION: + prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {} + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=prefix, + suffix=suffix or SQL_SUFFIX, + input_variables=input_variables, + **prompt_params, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + + elif agent_type == AgentType.OPENAI_FUNCTIONS: + messages = [ + SystemMessage(content=prefix), + HumanMessagePromptTemplate.from_template("{input}"), + AIMessage(content=suffix or SQL_FUNCTIONS_SUFFIX), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + input_variables = ["input", "agent_scratchpad"] + _prompt = ChatPromptTemplate(input_variables=input_variables, messages=messages) + + agent = OpenAIFunctionsAgent( + llm=llm, + prompt=_prompt, + tools=tools, + callback_manager=callback_manager, + **kwargs, + ) + else: + raise ValueError(f"Agent type {agent_type} not supported at the moment.") + + return AgentExecutor.from_agent_and_tools( + agent=agent, + tools=tools, + callback_manager=callback_manager, + verbose=verbose, + max_iterations=max_iterations, + max_execution_time=max_execution_time, + early_stopping_method=early_stopping_method, + **(agent_executor_kwargs or {}), + ) diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/vectorstore/base.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/vectorstore/base.py new file mode 100644 index 0000000000000..3e25a06b3065b --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/vectorstore/base.py @@ -0,0 +1,103 @@ +"""VectorStore agent.""" +from __future__ import annotations +from typing import Any, Dict, Optional, TYPE_CHECKING + +from langchain_core.callbacks import BaseCallbackManager +from langchain_core.language_models import BaseLanguageModel + +from langchain_community.agent_toolkits.vectorstore.prompt import PREFIX, ROUTER_PREFIX +from langchain_community.agent_toolkits.vectorstore.toolkit import ( + VectorStoreRouterToolkit, + VectorStoreToolkit, +) + +if TYPE_CHECKING: + from langchain.agents.agent import AgentExecutor + + +def create_vectorstore_agent( + llm: BaseLanguageModel, + toolkit: VectorStoreToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = PREFIX, + verbose: bool = False, + agent_executor_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, +) -> AgentExecutor: + """Construct a VectorStore agent from an LLM and tools. + + Args: + llm (BaseLanguageModel): LLM that will be used by the agent + toolkit (VectorStoreToolkit): Set of tools for the agent + callback_manager (Optional[BaseCallbackManager], optional): Object to handle the callback [ Defaults to None. ] + prefix (str, optional): The prefix prompt for the agent. If not provided uses default PREFIX. + verbose (bool, optional): If you want to see the content of the scratchpad. [ Defaults to False ] + agent_executor_kwargs (Optional[Dict[str, Any]], optional): If there is any other parameter you want to send to the agent. [ Defaults to None ] + **kwargs: Additional named parameters to pass to the ZeroShotAgent. + + Returns: + AgentExecutor: Returns a callable AgentExecutor object. Either you can call it or use run method with the query to get the response + """ # noqa: E501 + from langchain.agents.agent import AgentExecutor + from langchain.agents.mrkl.base import ZeroShotAgent + from langchain.chains.llm import LLMChain + tools = toolkit.get_tools() + prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools( + agent=agent, + tools=tools, + callback_manager=callback_manager, + verbose=verbose, + **(agent_executor_kwargs or {}), + ) + + +def create_vectorstore_router_agent( + llm: BaseLanguageModel, + toolkit: VectorStoreRouterToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = ROUTER_PREFIX, + verbose: bool = False, + agent_executor_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, +) -> AgentExecutor: + """Construct a VectorStore router agent from an LLM and tools. + + Args: + llm (BaseLanguageModel): LLM that will be used by the agent + toolkit (VectorStoreRouterToolkit): Set of tools for the agent which have routing capability with multiple vector stores + callback_manager (Optional[BaseCallbackManager], optional): Object to handle the callback [ Defaults to None. ] + prefix (str, optional): The prefix prompt for the router agent. If not provided uses default ROUTER_PREFIX. + verbose (bool, optional): If you want to see the content of the scratchpad. [ Defaults to False ] + agent_executor_kwargs (Optional[Dict[str, Any]], optional): If there is any other parameter you want to send to the agent. [ Defaults to None ] + **kwargs: Additional named parameters to pass to the ZeroShotAgent. + + Returns: + AgentExecutor: Returns a callable AgentExecutor object. Either you can call it or use run method with the query to get the response. + """ # noqa: E501 + from langchain.agents.agent import AgentExecutor + from langchain.agents.mrkl.base import ZeroShotAgent + from langchain.chains.llm import LLMChain + tools = toolkit.get_tools() + prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools( + agent=agent, + tools=tools, + callback_manager=callback_manager, + verbose=verbose, + **(agent_executor_kwargs or {}), + ) diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/base.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/base.py new file mode 100644 index 0000000000000..770cae744295e --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/document_loaders/base.py @@ -0,0 +1,101 @@ +"""Abstract interface for document loader implementations.""" +from __future__ import annotations +from abc import ABC, abstractmethod +from typing import Iterator, List, Optional, TYPE_CHECKING + +from langchain_core.documents import Document + +from langchain_community.document_loaders.blob_loaders import Blob + +if TYPE_CHECKING: + from langchain.text_splitter import TextSplitter + + +class BaseLoader(ABC): + """Interface for Document Loader. + + Implementations should implement the lazy-loading method using generators + to avoid loading all Documents into memory at once. + + The `load` method will remain as is for backwards compatibility, but its + implementation should be just `list(self.lazy_load())`. + """ + + # Sub-classes should implement this method + # as return list(self.lazy_load()). + # This method returns a List which is materialized in memory. + @abstractmethod + def load(self) -> List[Document]: + """Load data into Document objects.""" + + def load_and_split( + self, text_splitter: Optional[TextSplitter] = None + ) -> List[Document]: + """Load Documents and split into chunks. Chunks are returned as Documents. + + Args: + text_splitter: TextSplitter instance to use for splitting documents. + Defaults to RecursiveCharacterTextSplitter. + + Returns: + List of Documents. + """ + from langchain.text_splitter import RecursiveCharacterTextSplitter + + if text_splitter is None: + _text_splitter: TextSplitter = RecursiveCharacterTextSplitter() + else: + _text_splitter = text_splitter + docs = self.load() + return _text_splitter.split_documents(docs) + + # Attention: This method will be upgraded into an abstractmethod once it's + # implemented in all the existing subclasses. + def lazy_load( + self, + ) -> Iterator[Document]: + """A lazy loader for Documents.""" + raise NotImplementedError( + f"{self.__class__.__name__} does not implement lazy_load()" + ) + + +class BaseBlobParser(ABC): + """Abstract interface for blob parsers. + + A blob parser provides a way to parse raw data stored in a blob into one + or more documents. + + The parser can be composed with blob loaders, making it easy to reuse + a parser independent of how the blob was originally loaded. + """ + + @abstractmethod + def lazy_parse(self, blob: Blob) -> Iterator[Document]: + """Lazy parsing interface. + + Subclasses are required to implement this method. + + Args: + blob: Blob instance + + Returns: + Generator of documents + """ + + def parse(self, blob: Blob) -> List[Document]: + """Eagerly parse the blob into a document or documents. + + This is a convenience method for interactive development environment. + + Production applications should favor the lazy_parse method instead. + + Subclasses should generally not over-ride this parse method. + + Args: + blob: Blob instance + + Returns: + List of documents + """ + return list(self.lazy_parse(blob)) diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/generic.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/generic.py new file mode 100644 index 0000000000000..8028dafab5775 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/document_loaders/generic.py @@ -0,0 +1,182 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any, Iterator, List, Literal, Optional, Sequence, Union, \ + TYPE_CHECKING + +from langchain_core.documents import Document + +from langchain_community.document_loaders.base import BaseBlobParser, BaseLoader +from langchain_community.document_loaders.blob_loaders import ( + BlobLoader, + FileSystemBlobLoader, +) +from langchain_community.document_loaders.parsers.registry import get_parser + +if TYPE_CHECKING: + from langchain.text_splitter import TextSplitter + +_PathLike = Union[str, Path] + +DEFAULT = Literal["default"] + + +class GenericLoader(BaseLoader): + """Generic Document Loader. + + A generic document loader that allows combining an arbitrary blob loader with + a blob parser. + + Examples: + + Parse a specific PDF file: + + .. code-block:: python + + from langchain_community.document_loaders import GenericLoader + from langchain_community.document_loaders.parsers.pdf import PyPDFParser + + # Recursively load all text files in a directory. + loader = GenericLoader.from_filesystem( + "my_lovely_pdf.pdf", + parser=PyPDFParser() + ) + + .. code-block:: python + + from langchain_community.document_loaders import GenericLoader + from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader + + + loader = GenericLoader.from_filesystem( + path="path/to/directory", + glob="**/[!.]*", + suffixes=[".pdf"], + show_progress=True, + ) + + docs = loader.lazy_load() + next(docs) + + Example instantiations to change which files are loaded: + + .. code-block:: python + + # Recursively load all text files in a directory. + loader = GenericLoader.from_filesystem("/path/to/dir", glob="**/*.txt") + + # Recursively load all non-hidden files in a directory. + loader = GenericLoader.from_filesystem("/path/to/dir", glob="**/[!.]*") + + # Load all files in a directory without recursion. + loader = GenericLoader.from_filesystem("/path/to/dir", glob="*") + + Example instantiations to change which parser is used: + + .. code-block:: python + + from langchain_community.document_loaders.parsers.pdf import PyPDFParser + + # Recursively load all text files in a directory. + loader = GenericLoader.from_filesystem( + "/path/to/dir", + glob="**/*.pdf", + parser=PyPDFParser() + ) + + """ + + def __init__( + self, + blob_loader: BlobLoader, + blob_parser: BaseBlobParser, + ) -> None: + """A generic document loader. + + Args: + blob_loader: A blob loader which knows how to yield blobs + blob_parser: A blob parser which knows how to parse blobs into documents + """ + self.blob_loader = blob_loader + self.blob_parser = blob_parser + + def lazy_load( + self, + ) -> Iterator[Document]: + """Load documents lazily. Use this when working at a large scale.""" + for blob in self.blob_loader.yield_blobs(): + yield from self.blob_parser.lazy_parse(blob) + + def load(self) -> List[Document]: + """Load all documents.""" + return list(self.lazy_load()) + + def load_and_split( + self, text_splitter: Optional[TextSplitter] = None + ) -> List[Document]: + """Load all documents and split them into sentences.""" + raise NotImplementedError( + "Loading and splitting is not yet implemented for generic loaders. " + "When they will be implemented they will be added via the initializer. " + "This method should not be used going forward." + ) + + @classmethod + def from_filesystem( + cls, + path: _PathLike, + *, + glob: str = "**/[!.]*", + exclude: Sequence[str] = (), + suffixes: Optional[Sequence[str]] = None, + show_progress: bool = False, + parser: Union[DEFAULT, BaseBlobParser] = "default", + parser_kwargs: Optional[dict] = None, + ) -> GenericLoader: + """Create a generic document loader using a filesystem blob loader. + + Args: + path: The path to the directory to load documents from OR the path to a + single file to load. If this is a file, glob, exclude, suffixes + will be ignored. + glob: The glob pattern to use to find documents. + suffixes: The suffixes to use to filter documents. If None, all files + matching the glob will be loaded. + exclude: A list of patterns to exclude from the loader. + show_progress: Whether to show a progress bar or not (requires tqdm). + Proxies to the file system loader. + parser: A blob parser which knows how to parse blobs into documents, + will instantiate a default parser if not provided. + The default can be overridden by either passing a parser or + setting the class attribute `blob_parser` (the latter + should be used with inheritance). + parser_kwargs: Keyword arguments to pass to the parser. + + Returns: + A generic document loader. + """ + blob_loader = FileSystemBlobLoader( + path, + glob=glob, + exclude=exclude, + suffixes=suffixes, + show_progress=show_progress, + ) + if isinstance(parser, str): + if parser == "default": + try: + # If there is an implementation of get_parser on the class, use it. + blob_parser = cls.get_parser(**(parser_kwargs or {})) + except NotImplementedError: + # if not then use the global registry. + blob_parser = get_parser(parser) + else: + blob_parser = get_parser(parser) + else: + blob_parser = parser + return cls(blob_loader, blob_parser) + + @staticmethod + def get_parser(**kwargs: Any) -> BaseBlobParser: + """Override this method to associate a default parser with the class.""" + raise NotImplementedError() diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/parsers/language/language_parser.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/parsers/language/language_parser.py new file mode 100644 index 0000000000000..b87ca9006575f --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/document_loaders/parsers/language/language_parser.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +from typing import Any, Dict, Iterator, Optional, TYPE_CHECKING + +from langchain_core.documents import Document + +from langchain_community.document_loaders.base import BaseBlobParser +from langchain_community.document_loaders.blob_loaders import Blob +from langchain_community.document_loaders.parsers.language.cobol import CobolSegmenter +from langchain_community.document_loaders.parsers.language.javascript import ( + JavaScriptSegmenter, +) +from langchain_community.document_loaders.parsers.language.python import PythonSegmenter + +if TYPE_CHECKING: + from langchain.text_splitter import Language + +try: + from langchain.text_splitter import Language + LANGUAGE_EXTENSIONS: Dict[str, str] = { + "py": Language.PYTHON, + "js": Language.JS, + "cobol": Language.COBOL, + } + + LANGUAGE_SEGMENTERS: Dict[str, Any] = { + Language.PYTHON: PythonSegmenter, + Language.JS: JavaScriptSegmenter, + Language.COBOL: CobolSegmenter, + } +except ImportError: + LANGUAGE_EXTENSIONS = {} + LANGUAGE_SEGMENTERS = {} + + +class LanguageParser(BaseBlobParser): + """Parse using the respective programming language syntax. + + Each top-level function and class in the code is loaded into separate documents. + Furthermore, an extra document is generated, containing the remaining top-level code + that excludes the already segmented functions and classes. + + This approach can potentially improve the accuracy of QA models over source code. + + Currently, the supported languages for code parsing are Python and JavaScript. + + The language used for parsing can be configured, along with the minimum number of + lines required to activate the splitting based on syntax. + + Examples: + + .. code-block:: python + + from langchain.text_splitter.Language + from langchain_community.document_loaders.generic import GenericLoader + from langchain_community.document_loaders.parsers import LanguageParser + + loader = GenericLoader.from_filesystem( + "./code", + glob="**/*", + suffixes=[".py", ".js"], + parser=LanguageParser() + ) + docs = loader.load() + + Example instantiations to manually select the language: + + .. code-block:: python + + from langchain.text_splitter import Language + + loader = GenericLoader.from_filesystem( + "./code", + glob="**/*", + suffixes=[".py"], + parser=LanguageParser(language=Language.PYTHON) + ) + + Example instantiations to set number of lines threshold: + + .. code-block:: python + + loader = GenericLoader.from_filesystem( + "./code", + glob="**/*", + suffixes=[".py"], + parser=LanguageParser(parser_threshold=200) + ) + """ + + def __init__(self, language: Optional[Language] = None, parser_threshold: int = 0): + """ + Language parser that split code using the respective language syntax. + + Args: + language: If None (default), it will try to infer language from source. + parser_threshold: Minimum lines needed to activate parsing (0 by default). + """ + self.language = language + self.parser_threshold = parser_threshold + + def lazy_parse(self, blob: Blob) -> Iterator[Document]: + code = blob.as_string() + + language = self.language or ( + LANGUAGE_EXTENSIONS.get(blob.source.rsplit(".", 1)[-1]) + if isinstance(blob.source, str) + else None + ) + + if language is None: + yield Document( + page_content=code, + metadata={ + "source": blob.source, + }, + ) + return + + if self.parser_threshold >= len(code.splitlines()): + yield Document( + page_content=code, + metadata={ + "source": blob.source, + "language": language, + }, + ) + return + + self.Segmenter = LANGUAGE_SEGMENTERS[language] + segmenter = self.Segmenter(blob.as_string()) + if not segmenter.is_valid(): + yield Document( + page_content=code, + metadata={ + "source": blob.source, + }, + ) + return + + for functions_classes in segmenter.extract_functions_classes(): + yield Document( + page_content=functions_classes, + metadata={ + "source": blob.source, + "content_type": "functions_classes", + "language": language, + }, + ) + yield Document( + page_content=segmenter.simplify_code(), + metadata={ + "source": blob.source, + "content_type": "simplified_code", + "language": language, + }, + ) diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/telegram.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/telegram.py new file mode 100644 index 0000000000000..b4b796b6abc6e --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/document_loaders/telegram.py @@ -0,0 +1,262 @@ +from __future__ import annotations + +import asyncio +import json +from pathlib import Path +from typing import TYPE_CHECKING, Dict, List, Optional, Union + +from langchain_core.documents import Document + +from langchain_community.document_loaders.base import BaseLoader + +if TYPE_CHECKING: + import pandas as pd + from telethon.hints import EntityLike + + +def concatenate_rows(row: dict) -> str: + """Combine message information in a readable format ready to be used.""" + date = row["date"] + sender = row["from"] + text = row["text"] + return f"{sender} on {date}: {text}\n\n" + + +class TelegramChatFileLoader(BaseLoader): + """Load from `Telegram chat` dump.""" + + def __init__(self, path: str): + """Initialize with a path.""" + self.file_path = path + + def load(self) -> List[Document]: + """Load documents.""" + p = Path(self.file_path) + + with open(p, encoding="utf8") as f: + d = json.load(f) + + text = "".join( + concatenate_rows(message) + for message in d["messages"] + if message["type"] == "message" and isinstance(message["text"], str) + ) + metadata = {"source": str(p)} + + return [Document(page_content=text, metadata=metadata)] + + +def text_to_docs(text: Union[str, List[str]]) -> List[Document]: + """Convert a string or list of strings to a list of Documents with metadata.""" + from langchain.text_splitter import RecursiveCharacterTextSplitter + if isinstance(text, str): + # Take a single string as one page + text = [text] + page_docs = [Document(page_content=page) for page in text] + + # Add page numbers as metadata + for i, doc in enumerate(page_docs): + doc.metadata["page"] = i + 1 + + # Split pages into chunks + doc_chunks = [] + + for doc in page_docs: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=800, + separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""], + chunk_overlap=20, + ) + chunks = text_splitter.split_text(doc.page_content) + for i, chunk in enumerate(chunks): + doc = Document( + page_content=chunk, metadata={"page": doc.metadata["page"], "chunk": i} + ) + # Add sources a metadata + doc.metadata["source"] = f"{doc.metadata['page']}-{doc.metadata['chunk']}" + doc_chunks.append(doc) + return doc_chunks + + +class TelegramChatApiLoader(BaseLoader): + """Load `Telegram` chat json directory dump.""" + + def __init__( + self, + chat_entity: Optional[EntityLike] = None, + api_id: Optional[int] = None, + api_hash: Optional[str] = None, + username: Optional[str] = None, + file_path: str = "telegram_data.json", + ): + """Initialize with API parameters. + + Args: + chat_entity: The chat entity to fetch data from. + api_id: The API ID. + api_hash: The API hash. + username: The username. + file_path: The file path to save the data to. Defaults to + "telegram_data.json". + """ + self.chat_entity = chat_entity + self.api_id = api_id + self.api_hash = api_hash + self.username = username + self.file_path = file_path + + async def fetch_data_from_telegram(self) -> None: + """Fetch data from Telegram API and save it as a JSON file.""" + from telethon.sync import TelegramClient + + data = [] + async with TelegramClient(self.username, self.api_id, self.api_hash) as client: + async for message in client.iter_messages(self.chat_entity): + is_reply = message.reply_to is not None + reply_to_id = message.reply_to.reply_to_msg_id if is_reply else None + data.append( + { + "sender_id": message.sender_id, + "text": message.text, + "date": message.date.isoformat(), + "message.id": message.id, + "is_reply": is_reply, + "reply_to_id": reply_to_id, + } + ) + + with open(self.file_path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=4) + + def _get_message_threads(self, data: pd.DataFrame) -> dict: + """Create a dictionary of message threads from the given data. + + Args: + data (pd.DataFrame): A DataFrame containing the conversation \ + data with columns: + - message.sender_id + - text + - date + - message.id + - is_reply + - reply_to_id + + Returns: + dict: A dictionary where the key is the parent message ID and \ + the value is a list of message IDs in ascending order. + """ + + def find_replies(parent_id: int, reply_data: pd.DataFrame) -> List[int]: + """ + Recursively find all replies to a given parent message ID. + + Args: + parent_id (int): The parent message ID. + reply_data (pd.DataFrame): A DataFrame containing reply messages. + + Returns: + list: A list of message IDs that are replies to the parent message ID. + """ + # Find direct replies to the parent message ID + direct_replies = reply_data[reply_data["reply_to_id"] == parent_id][ + "message.id" + ].tolist() + + # Recursively find replies to the direct replies + all_replies = [] + for reply_id in direct_replies: + all_replies += [reply_id] + find_replies(reply_id, reply_data) + + return all_replies + + # Filter out parent messages + parent_messages = data[~data["is_reply"]] + + # Filter out reply messages and drop rows with NaN in 'reply_to_id' + reply_messages = data[data["is_reply"]].dropna(subset=["reply_to_id"]) + + # Convert 'reply_to_id' to integer + reply_messages["reply_to_id"] = reply_messages["reply_to_id"].astype(int) + + # Create a dictionary of message threads with parent message IDs as keys and \ + # lists of reply message IDs as values + message_threads = { + parent_id: [parent_id] + find_replies(parent_id, reply_messages) + for parent_id in parent_messages["message.id"] + } + + return message_threads + + def _combine_message_texts( + self, message_threads: Dict[int, List[int]], data: pd.DataFrame + ) -> str: + """ + Combine the message texts for each parent message ID based \ + on the list of message threads. + + Args: + message_threads (dict): A dictionary where the key is the parent message \ + ID and the value is a list of message IDs in ascending order. + data (pd.DataFrame): A DataFrame containing the conversation data: + - message.sender_id + - text + - date + - message.id + - is_reply + - reply_to_id + + Returns: + str: A combined string of message texts sorted by date. + """ + combined_text = "" + + # Iterate through sorted parent message IDs + for parent_id, message_ids in message_threads.items(): + # Get the message texts for the message IDs and sort them by date + message_texts = ( + data[data["message.id"].isin(message_ids)] + .sort_values(by="date")["text"] + .tolist() + ) + message_texts = [str(elem) for elem in message_texts] + + # Combine the message texts + combined_text += " ".join(message_texts) + ".\n" + + return combined_text.strip() + + def load(self) -> List[Document]: + """Load documents.""" + + if self.chat_entity is not None: + try: + import nest_asyncio + + nest_asyncio.apply() + asyncio.run(self.fetch_data_from_telegram()) + except ImportError: + raise ImportError( + """`nest_asyncio` package not found. + please install with `pip install nest_asyncio` + """ + ) + + p = Path(self.file_path) + + with open(p, encoding="utf8") as f: + d = json.load(f) + try: + import pandas as pd + except ImportError: + raise ImportError( + """`pandas` package not found. + please install with `pip install pandas` + """ + ) + normalized_messages = pd.json_normalize(d) + df = pd.DataFrame(normalized_messages) + + message_threads = self._get_message_threads(df) + combined_texts = self._combine_message_texts(message_threads, df) + + return text_to_docs(combined_texts) diff --git a/.scripts/community_split/libs/community/langchain_community/document_transformers/openai_functions.py b/.scripts/community_split/libs/community/langchain_community/document_transformers/openai_functions.py new file mode 100644 index 0000000000000..b796e6db87b5e --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/document_transformers/openai_functions.py @@ -0,0 +1,140 @@ +"""Document transformers that use OpenAI Functions models""" +from typing import Any, Dict, Optional, Sequence, Type, Union + +from langchain_core.documents import BaseDocumentTransformer, Document +from langchain_core.language_models import BaseLanguageModel +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.pydantic_v1 import BaseModel + + +class OpenAIMetadataTagger(BaseDocumentTransformer, BaseModel): + """Extract metadata tags from document contents using OpenAI functions. + + Example: + .. code-block:: python + + from langchain_community.chat_models import ChatOpenAI + from langchain_community.document_transformers import OpenAIMetadataTagger + from langchain_core.documents import Document + + schema = { + "properties": { + "movie_title": { "type": "string" }, + "critic": { "type": "string" }, + "tone": { + "type": "string", + "enum": ["positive", "negative"] + }, + "rating": { + "type": "integer", + "description": "The number of stars the critic rated the movie" + } + }, + "required": ["movie_title", "critic", "tone"] + } + + # Must be an OpenAI model that supports functions + llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613") + tagging_chain = create_tagging_chain(schema, llm) + document_transformer = OpenAIMetadataTagger(tagging_chain=tagging_chain) + original_documents = [ + Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\nThis is the greatest movie ever made. 4 out of 5 stars."), + Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}), + ] + + enhanced_documents = document_transformer.transform_documents(original_documents) + """ # noqa: E501 + + tagging_chain: Any + """The chain used to extract metadata from each document.""" + + def transform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + """Automatically extract and populate metadata + for each document according to the provided schema.""" + + new_documents = [] + + for document in documents: + extracted_metadata: Dict = self.tagging_chain.run(document.page_content) # type: ignore[assignment] # noqa: E501 + new_document = Document( + page_content=document.page_content, + metadata={**extracted_metadata, **document.metadata}, + ) + new_documents.append(new_document) + return new_documents + + async def atransform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + raise NotImplementedError + + +def create_metadata_tagger( + metadata_schema: Union[Dict[str, Any], Type[BaseModel]], + llm: BaseLanguageModel, + prompt: Optional[ChatPromptTemplate] = None, + *, + tagging_chain_kwargs: Optional[Dict] = None, +) -> OpenAIMetadataTagger: + """Create a DocumentTransformer that uses an OpenAI function chain to automatically + tag documents with metadata based on their content and an input schema. + + Args: + metadata_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary + is passed in, it's assumed to already be a valid JsonSchema. + For best results, pydantic.BaseModels should have docstrings describing what + the schema represents and descriptions for the parameters. + llm: Language model to use, assumed to support the OpenAI function-calling API. + Defaults to use "gpt-3.5-turbo-0613" + prompt: BasePromptTemplate to pass to the model. + + Returns: + An LLMChain that will pass the given function to the model. + + Example: + .. code-block:: python + + from langchain_community.chat_models import ChatOpenAI + from langchain_community.document_transformers import create_metadata_tagger + from langchain_core.documents import Document + + schema = { + "properties": { + "movie_title": { "type": "string" }, + "critic": { "type": "string" }, + "tone": { + "type": "string", + "enum": ["positive", "negative"] + }, + "rating": { + "type": "integer", + "description": "The number of stars the critic rated the movie" + } + }, + "required": ["movie_title", "critic", "tone"] + } + + # Must be an OpenAI model that supports functions + llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613") + + document_transformer = create_metadata_tagger(schema, llm) + original_documents = [ + Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\nThis is the greatest movie ever made. 4 out of 5 stars."), + Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}), + ] + + enhanced_documents = document_transformer.transform_documents(original_documents) + """ # noqa: E501 + from langchain.chains.openai_functions import create_tagging_chain + metadata_schema = ( + metadata_schema + if isinstance(metadata_schema, dict) + else metadata_schema.schema() + ) + _tagging_chain_kwargs = tagging_chain_kwargs or {} + tagging_chain = create_tagging_chain( + metadata_schema, llm, prompt=prompt, **_tagging_chain_kwargs + ) + return OpenAIMetadataTagger(tagging_chain=tagging_chain) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/amadeus/closest_airport.py b/.scripts/community_split/libs/community/langchain_community/tools/amadeus/closest_airport.py new file mode 100644 index 0000000000000..dff78e761367f --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/tools/amadeus/closest_airport.py @@ -0,0 +1,50 @@ +from typing import Optional, Type + +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.pydantic_v1 import BaseModel, Field + +from langchain_community.chat_models import ChatOpenAI +from langchain_community.tools.amadeus.base import AmadeusBaseTool + + +class ClosestAirportSchema(BaseModel): + """Schema for the AmadeusClosestAirport tool.""" + + location: str = Field( + description=( + " The location for which you would like to find the nearest airport " + " along with optional details such as country, state, region, or " + " province, allowing for easy processing and identification of " + " the closest airport. Examples of the format are the following:\n" + " Cali, Colombia\n " + " Lincoln, Nebraska, United States\n" + " New York, United States\n" + " Sydney, New South Wales, Australia\n" + " Rome, Lazio, Italy\n" + " Toronto, Ontario, Canada\n" + ) + ) + + +class AmadeusClosestAirport(AmadeusBaseTool): + """Tool for finding the closest airport to a particular location.""" + + name: str = "closest_airport" + description: str = ( + "Use this tool to find the closest airport to a particular location." + ) + args_schema: Type[ClosestAirportSchema] = ClosestAirportSchema + + def _run( + self, + location: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + content = ( + f" What is the nearest airport to {location}? Please respond with the " + " airport's International Air Transport Association (IATA) Location " + ' Identifier in the following JSON format. JSON: "iataCode": "IATA ' + ' Location Identifier" ' + ) + + return ChatOpenAI(temperature=0).invoke(content) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/clickup/tool.py b/.scripts/community_split/libs/community/langchain_community/tools/clickup/tool.py new file mode 100644 index 0000000000000..93988dd7d59f0 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/tools/clickup/tool.py @@ -0,0 +1,42 @@ +""" +This tool allows agents to interact with the clickup library +and operate on a Clickup instance. +To use this tool, you must first set as environment variables: + client_secret + client_id + code + +Below is a sample script that uses the Clickup tool: + +```python +from langchain_community.agent_toolkits.clickup.toolkit import ClickupToolkit +from langchain_community.utilities.clickup import ClickupAPIWrapper + +clickup = ClickupAPIWrapper() +toolkit = ClickupToolkit.from_clickup_api_wrapper(clickup) +``` +""" +from typing import Optional + +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.pydantic_v1 import Field +from langchain_core.tools import BaseTool + +from langchain_community.utilities.clickup import ClickupAPIWrapper + + +class ClickupAction(BaseTool): + """Tool that queries the Clickup API.""" + + api_wrapper: ClickupAPIWrapper = Field(default_factory=ClickupAPIWrapper) + mode: str + name: str = "" + description: str = "" + + def _run( + self, + instructions: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the Clickup API to run an operation.""" + return self.api_wrapper.run(self.mode, instructions) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/jira/tool.py b/.scripts/community_split/libs/community/langchain_community/tools/jira/tool.py new file mode 100644 index 0000000000000..dc57b13dc205b --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/tools/jira/tool.py @@ -0,0 +1,44 @@ +""" +This tool allows agents to interact with the atlassian-python-api library +and operate on a Jira instance. For more information on the +atlassian-python-api library, see https://atlassian-python-api.readthedocs.io/jira.html + +To use this tool, you must first set as environment variables: + JIRA_API_TOKEN + JIRA_USERNAME + JIRA_INSTANCE_URL + +Below is a sample script that uses the Jira tool: + +```python +from langchain_community.agent_toolkits.jira.toolkit import JiraToolkit +from langchain_community.utilities.jira import JiraAPIWrapper + +jira = JiraAPIWrapper() +toolkit = JiraToolkit.from_jira_api_wrapper(jira) +``` +""" +from typing import Optional + +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.pydantic_v1 import Field +from langchain_core.tools import BaseTool + +from langchain_community.utilities.jira import JiraAPIWrapper + + +class JiraAction(BaseTool): + """Tool that queries the Atlassian Jira API.""" + + api_wrapper: JiraAPIWrapper = Field(default_factory=JiraAPIWrapper) + mode: str + name: str = "" + description: str = "" + + def _run( + self, + instructions: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the Atlassian Jira API to run an operation.""" + return self.api_wrapper.run(self.mode, instructions) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/powerbi/tool.py b/.scripts/community_split/libs/community/langchain_community/tools/powerbi/tool.py new file mode 100644 index 0000000000000..b780bf1d9acd3 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/tools/powerbi/tool.py @@ -0,0 +1,276 @@ +"""Tools for interacting with a Power BI dataset.""" +import logging +from time import perf_counter +from typing import Any, Dict, Optional, Tuple + +from langchain_core.callbacks import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) +from langchain_core.pydantic_v1 import Field, validator +from langchain_core.tools import BaseTool +from langchain_openai.chat_models import _import_tiktoken + +from langchain_community.tools.powerbi.prompt import ( + BAD_REQUEST_RESPONSE, + DEFAULT_FEWSHOT_EXAMPLES, + RETRY_RESPONSE, +) +from langchain_community.utilities.powerbi import PowerBIDataset, json_to_md + +logger = logging.getLogger(__name__) + + +class QueryPowerBITool(BaseTool): + """Tool for querying a Power BI Dataset.""" + + name: str = "query_powerbi" + description: str = """ + Input to this tool is a detailed question about the dataset, output is a result from the dataset. It will try to answer the question using the dataset, and if it cannot, it will ask for clarification. + + Example Input: "How many rows are in table1?" + """ # noqa: E501 + llm_chain: Any + powerbi: PowerBIDataset = Field(exclude=True) + examples: Optional[str] = DEFAULT_FEWSHOT_EXAMPLES + session_cache: Dict[str, Any] = Field(default_factory=dict, exclude=True) + max_iterations: int = 5 + output_token_limit: int = 4000 + tiktoken_model_name: Optional[str] = None # "cl100k_base" + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + @validator("llm_chain") + def validate_llm_chain_input_variables( # pylint: disable=E0213 + cls, llm_chain: Any + ) -> Any: + """Make sure the LLM chain has the correct input variables.""" + for var in llm_chain.prompt.input_variables: + if var not in ["tool_input", "tables", "schemas", "examples"]: + raise ValueError( + "LLM chain for QueryPowerBITool must have input variables ['tool_input', 'tables', 'schemas', 'examples'], found %s", # noqa: C0301 E501 # pylint: disable=C0301 + llm_chain.prompt.input_variables, + ) + return llm_chain + + def _check_cache(self, tool_input: str) -> Optional[str]: + """Check if the input is present in the cache. + + If the value is a bad request, overwrite with the escalated version, + if not present return None.""" + if tool_input not in self.session_cache: + return None + return self.session_cache[tool_input] + + def _run( + self, + tool_input: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + **kwargs: Any, + ) -> str: + """Execute the query, return the results or an error message.""" + if cache := self._check_cache(tool_input): + logger.debug("Found cached result for %s: %s", tool_input, cache) + return cache + + try: + logger.info("Running PBI Query Tool with input: %s", tool_input) + query = self.llm_chain.predict( + tool_input=tool_input, + tables=self.powerbi.get_table_names(), + schemas=self.powerbi.get_schemas(), + examples=self.examples, + callbacks=run_manager.get_child() if run_manager else None, + ) + except Exception as exc: # pylint: disable=broad-except + self.session_cache[tool_input] = f"Error on call to LLM: {exc}" + return self.session_cache[tool_input] + if query == "I cannot answer this": + self.session_cache[tool_input] = query + return self.session_cache[tool_input] + logger.info("PBI Query:\n%s", query) + start_time = perf_counter() + pbi_result = self.powerbi.run(command=query) + end_time = perf_counter() + logger.debug("PBI Result: %s", pbi_result) + logger.debug(f"PBI Query duration: {end_time - start_time:0.6f}") + result, error = self._parse_output(pbi_result) + if error is not None and "TokenExpired" in error: + self.session_cache[ + tool_input + ] = "Authentication token expired or invalid, please try reauthenticate." + return self.session_cache[tool_input] + + iterations = kwargs.get("iterations", 0) + if error and iterations < self.max_iterations: + return self._run( + tool_input=RETRY_RESPONSE.format( + tool_input=tool_input, query=query, error=error + ), + run_manager=run_manager, + iterations=iterations + 1, + ) + + self.session_cache[tool_input] = ( + result if result else BAD_REQUEST_RESPONSE.format(error=error) + ) + return self.session_cache[tool_input] + + async def _arun( + self, + tool_input: str, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + **kwargs: Any, + ) -> str: + """Execute the query, return the results or an error message.""" + if cache := self._check_cache(tool_input): + logger.debug("Found cached result for %s: %s", tool_input, cache) + return f"{cache}, from cache, you have already asked this question." + try: + logger.info("Running PBI Query Tool with input: %s", tool_input) + query = await self.llm_chain.apredict( + tool_input=tool_input, + tables=self.powerbi.get_table_names(), + schemas=self.powerbi.get_schemas(), + examples=self.examples, + callbacks=run_manager.get_child() if run_manager else None, + ) + except Exception as exc: # pylint: disable=broad-except + self.session_cache[tool_input] = f"Error on call to LLM: {exc}" + return self.session_cache[tool_input] + + if query == "I cannot answer this": + self.session_cache[tool_input] = query + return self.session_cache[tool_input] + logger.info("PBI Query: %s", query) + start_time = perf_counter() + pbi_result = await self.powerbi.arun(command=query) + end_time = perf_counter() + logger.debug("PBI Result: %s", pbi_result) + logger.debug(f"PBI Query duration: {end_time - start_time:0.6f}") + result, error = self._parse_output(pbi_result) + if error is not None and ("TokenExpired" in error or "TokenError" in error): + self.session_cache[ + tool_input + ] = "Authentication token expired or invalid, please try to reauthenticate or check the scope of the credential." # noqa: E501 + return self.session_cache[tool_input] + + iterations = kwargs.get("iterations", 0) + if error and iterations < self.max_iterations: + return await self._arun( + tool_input=RETRY_RESPONSE.format( + tool_input=tool_input, query=query, error=error + ), + run_manager=run_manager, + iterations=iterations + 1, + ) + + self.session_cache[tool_input] = ( + result if result else BAD_REQUEST_RESPONSE.format(error=error) + ) + return self.session_cache[tool_input] + + def _parse_output( + self, pbi_result: Dict[str, Any] + ) -> Tuple[Optional[str], Optional[Any]]: + """Parse the output of the query to a markdown table.""" + if "results" in pbi_result: + rows = pbi_result["results"][0]["tables"][0]["rows"] + if len(rows) == 0: + logger.info("0 records in result, query was valid.") + return ( + None, + "0 rows returned, this might be correct, but please validate if all filter values were correct?", # noqa: E501 + ) + result = json_to_md(rows) + too_long, length = self._result_too_large(result) + if too_long: + return ( + f"Result too large, please try to be more specific or use the `TOPN` function. The result is {length} tokens long, the limit is {self.output_token_limit} tokens.", # noqa: E501 + None, + ) + return result, None + + if "error" in pbi_result: + if ( + "pbi.error" in pbi_result["error"] + and "details" in pbi_result["error"]["pbi.error"] + ): + return None, pbi_result["error"]["pbi.error"]["details"][0]["detail"] + return None, pbi_result["error"] + return None, pbi_result + + def _result_too_large(self, result: str) -> Tuple[bool, int]: + """Tokenize the output of the query.""" + if self.tiktoken_model_name: + tiktoken_ = _import_tiktoken() + encoding = tiktoken_.encoding_for_model(self.tiktoken_model_name) + length = len(encoding.encode(result)) + logger.info("Result length: %s", length) + return length > self.output_token_limit, length + return False, 0 + + +class InfoPowerBITool(BaseTool): + """Tool for getting metadata about a PowerBI Dataset.""" + + name: str = "schema_powerbi" + description: str = """ + Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. + Be sure that the tables actually exist by calling list_tables_powerbi first! + + Example Input: "table1, table2, table3" + """ # noqa: E501 + powerbi: PowerBIDataset = Field(exclude=True) + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + def _run( + self, + tool_input: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Get the schema for tables in a comma-separated list.""" + return self.powerbi.get_table_info(tool_input.split(", ")) + + async def _arun( + self, + tool_input: str, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + return await self.powerbi.aget_table_info(tool_input.split(", ")) + + +class ListPowerBITool(BaseTool): + """Tool for getting tables names.""" + + name: str = "list_tables_powerbi" + description: str = "Input is an empty string, output is a comma separated list of tables in the database." # noqa: E501 # pylint: disable=C0301 + powerbi: PowerBIDataset = Field(exclude=True) + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + def _run( + self, + tool_input: Optional[str] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Get the names of the tables.""" + return ", ".join(self.powerbi.get_table_names()) + + async def _arun( + self, + tool_input: Optional[str] = None, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + """Get the names of the tables.""" + return ", ".join(self.powerbi.get_table_names()) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/spark_sql/tool.py b/.scripts/community_split/libs/community/langchain_community/tools/spark_sql/tool.py new file mode 100644 index 0000000000000..4a07000249d33 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/tools/spark_sql/tool.py @@ -0,0 +1,130 @@ +# flake8: noqa +"""Tools for interacting with Spark SQL.""" +from typing import Any, Dict, Optional + +from langchain_core.pydantic_v1 import BaseModel, Field, root_validator + +from langchain_core.language_models import BaseLanguageModel +from langchain_core.callbacks import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) +from langchain_core.prompts import PromptTemplate +from langchain_community.utilities.spark_sql import SparkSQL +from langchain_core.tools import BaseTool +from langchain_community.tools.spark_sql.prompt import QUERY_CHECKER + + +class BaseSparkSQLTool(BaseModel): + """Base tool for interacting with Spark SQL.""" + + db: SparkSQL = Field(exclude=True) + + class Config(BaseTool.Config): + pass + + +class QuerySparkSQLTool(BaseSparkSQLTool, BaseTool): + """Tool for querying a Spark SQL.""" + + name: str = "query_sql_db" + description: str = """ + Input to this tool is a detailed and correct SQL query, output is a result from the Spark SQL. + If the query is not correct, an error message will be returned. + If an error is returned, rewrite the query, check the query, and try again. + """ + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Execute the query, return the results or an error message.""" + return self.db.run_no_throw(query) + + +class InfoSparkSQLTool(BaseSparkSQLTool, BaseTool): + """Tool for getting metadata about a Spark SQL.""" + + name: str = "schema_sql_db" + description: str = """ + Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. + Be sure that the tables actually exist by calling list_tables_sql_db first! + + Example Input: "table1, table2, table3" + """ + + def _run( + self, + table_names: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Get the schema for tables in a comma-separated list.""" + return self.db.get_table_info_no_throw(table_names.split(", ")) + + +class ListSparkSQLTool(BaseSparkSQLTool, BaseTool): + """Tool for getting tables names.""" + + name: str = "list_tables_sql_db" + description: str = "Input is an empty string, output is a comma separated list of tables in the Spark SQL." + + def _run( + self, + tool_input: str = "", + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Get the schema for a specific table.""" + return ", ".join(self.db.get_usable_table_names()) + + +class QueryCheckerTool(BaseSparkSQLTool, BaseTool): + """Use an LLM to check if a query is correct. + Adapted from https://www.patterns.app/blog/2023/01/18/crunchbot-sql-analyst-gpt/""" + + template: str = QUERY_CHECKER + llm: BaseLanguageModel + llm_chain: Any = Field(init=False) + name: str = "query_checker_sql_db" + description: str = """ + Use this tool to double check if your query is correct before executing it. + Always use this tool before executing a query with query_sql_db! + """ + + @root_validator(pre=True) + def initialize_llm_chain(cls, values: Dict[str, Any]) -> Dict[str, Any]: + if "llm_chain" not in values: + from langchain.chains.llm import LLMChain + values["llm_chain"] = LLMChain( + llm=values.get("llm"), + prompt=PromptTemplate( + template=QUERY_CHECKER, input_variables=["query"] + ), + ) + + if values["llm_chain"].prompt.input_variables != ["query"]: + raise ValueError( + "LLM chain for QueryCheckerTool need to use ['query'] as input_variables " + "for the embedded prompt" + ) + + return values + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the LLM to check the query.""" + return self.llm_chain.predict( + query=query, callbacks=run_manager.get_child() if run_manager else None + ) + + async def _arun( + self, + query: str, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + return await self.llm_chain.apredict( + query=query, callbacks=run_manager.get_child() if run_manager else None + ) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/sql_database/tool.py b/.scripts/community_split/libs/community/langchain_community/tools/sql_database/tool.py new file mode 100644 index 0000000000000..3e0d6509b995a --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/tools/sql_database/tool.py @@ -0,0 +1,134 @@ +# flake8: noqa +"""Tools for interacting with a SQL database.""" +from typing import Any, Dict, Optional + +from langchain_core.pydantic_v1 import BaseModel, Extra, Field, root_validator + +from langchain_core.language_models import BaseLanguageModel +from langchain_core.callbacks import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) +from langchain_core.prompts import PromptTemplate +from langchain_community.utilities.sql_database import SQLDatabase +from langchain_core.tools import BaseTool +from langchain_community.tools.sql_database.prompt import QUERY_CHECKER + + +class BaseSQLDatabaseTool(BaseModel): + """Base tool for interacting with a SQL database.""" + + db: SQLDatabase = Field(exclude=True) + + class Config(BaseTool.Config): + pass + + +class QuerySQLDataBaseTool(BaseSQLDatabaseTool, BaseTool): + """Tool for querying a SQL database.""" + + name: str = "sql_db_query" + description: str = """ + Input to this tool is a detailed and correct SQL query, output is a result from the database. + If the query is not correct, an error message will be returned. + If an error is returned, rewrite the query, check the query, and try again. + """ + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Execute the query, return the results or an error message.""" + return self.db.run_no_throw(query) + + +class InfoSQLDatabaseTool(BaseSQLDatabaseTool, BaseTool): + """Tool for getting metadata about a SQL database.""" + + name: str = "sql_db_schema" + description: str = """ + Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. + + Example Input: "table1, table2, table3" + """ + + def _run( + self, + table_names: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Get the schema for tables in a comma-separated list.""" + return self.db.get_table_info_no_throw( + [t.strip() for t in table_names.split(",")] + ) + + +class ListSQLDatabaseTool(BaseSQLDatabaseTool, BaseTool): + """Tool for getting tables names.""" + + name: str = "sql_db_list_tables" + description: str = "Input is an empty string, output is a comma separated list of tables in the database." + + def _run( + self, + tool_input: str = "", + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Get the schema for a specific table.""" + return ", ".join(self.db.get_usable_table_names()) + + +class QuerySQLCheckerTool(BaseSQLDatabaseTool, BaseTool): + """Use an LLM to check if a query is correct. + Adapted from https://www.patterns.app/blog/2023/01/18/crunchbot-sql-analyst-gpt/""" + + template: str = QUERY_CHECKER + llm: BaseLanguageModel + llm_chain: Any = Field(init=False) + name: str = "sql_db_query_checker" + description: str = """ + Use this tool to double check if your query is correct before executing it. + Always use this tool before executing a query with sql_db_query! + """ + + @root_validator(pre=True) + def initialize_llm_chain(cls, values: Dict[str, Any]) -> Dict[str, Any]: + if "llm_chain" not in values: + from langchain.chains.llm import LLMChain + values["llm_chain"] = LLMChain( + llm=values.get("llm"), + prompt=PromptTemplate( + template=QUERY_CHECKER, input_variables=["dialect", "query"] + ), + ) + + if values["llm_chain"].prompt.input_variables != ["dialect", "query"]: + raise ValueError( + "LLM chain for QueryCheckerTool must have input variables ['query', 'dialect']" + ) + + return values + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the LLM to check the query.""" + return self.llm_chain.predict( + query=query, + dialect=self.db.dialect, + callbacks=run_manager.get_child() if run_manager else None, + ) + + async def _arun( + self, + query: str, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + return await self.llm_chain.apredict( + query=query, + dialect=self.db.dialect, + callbacks=run_manager.get_child() if run_manager else None, + ) diff --git a/.scripts/community_split/libs/community/langchain_community/tools/zapier/tool.py b/.scripts/community_split/libs/community/langchain_community/tools/zapier/tool.py new file mode 100644 index 0000000000000..3d5f395554618 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/tools/zapier/tool.py @@ -0,0 +1,215 @@ +"""[DEPRECATED] + +## Zapier Natural Language Actions API +\ +Full docs here: https://nla.zapier.com/start/ + +**Zapier Natural Language Actions** gives you access to the 5k+ apps, 20k+ actions +on Zapier's platform through a natural language API interface. + +NLA supports apps like Gmail, Salesforce, Trello, Slack, Asana, HubSpot, Google Sheets, +Microsoft Teams, and thousands more apps: https://zapier.com/apps + +Zapier NLA handles ALL the underlying API auth and translation from +natural language --> underlying API call --> return simplified output for LLMs +The key idea is you, or your users, expose a set of actions via an oauth-like setup +window, which you can then query and execute via a REST API. + +NLA offers both API Key and OAuth for signing NLA API requests. + +1. Server-side (API Key): for quickly getting started, testing, and production scenarios + where LangChain will only use actions exposed in the developer's Zapier account + (and will use the developer's connected accounts on Zapier.com) + +2. User-facing (Oauth): for production scenarios where you are deploying an end-user + facing application and LangChain needs access to end-user's exposed actions and + connected accounts on Zapier.com + +This quick start will focus on the server-side use case for brevity. +Review [full docs](https://nla.zapier.com/start/) for user-facing oauth developer +support. + +Typically, you'd use SequentialChain, here's a basic example: + + 1. Use NLA to find an email in Gmail + 2. Use LLMChain to generate a draft reply to (1) + 3. Use NLA to send the draft reply (2) to someone in Slack via direct message + +In code, below: + +```python + +import os + +# get from https://platform.openai.com/ +os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY", "") + +# get from https://nla.zapier.com/docs/authentication/ +os.environ["ZAPIER_NLA_API_KEY"] = os.environ.get("ZAPIER_NLA_API_KEY", "") + +from langchain_community.agent_toolkits import ZapierToolkit +from langchain_community.utilities.zapier import ZapierNLAWrapper + +## step 0. expose gmail 'find email' and slack 'send channel message' actions + +# first go here, log in, expose (enable) the two actions: +# https://nla.zapier.com/demo/start +# -- for this example, can leave all fields "Have AI guess" +# in an oauth scenario, you'd get your own id (instead of 'demo') +# which you route your users through first + +zapier = ZapierNLAWrapper() +## To leverage OAuth you may pass the value `nla_oauth_access_token` to +## the ZapierNLAWrapper. If you do this there is no need to initialize +## the ZAPIER_NLA_API_KEY env variable +# zapier = ZapierNLAWrapper(zapier_nla_oauth_access_token="TOKEN_HERE") +toolkit = ZapierToolkit.from_zapier_nla_wrapper(zapier) +``` + +""" +from typing import Any, Dict, Optional + +from langchain_core._api import warn_deprecated +from langchain_core.callbacks import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) +from langchain_core.pydantic_v1 import Field, root_validator +from langchain_core.tools import BaseTool + +from langchain_community.tools.zapier.prompt import BASE_ZAPIER_TOOL_PROMPT +from langchain_community.utilities.zapier import ZapierNLAWrapper + + +class ZapierNLARunAction(BaseTool): + """ + Args: + action_id: a specific action ID (from list actions) of the action to execute + (the set api_key must be associated with the action owner) + instructions: a natural language instruction string for using the action + (eg. "get the latest email from Mike Knoop" for "Gmail: find email" action) + params: a dict, optional. Any params provided will *override* AI guesses + from `instructions` (see "understanding the AI guessing flow" here: + https://nla.zapier.com/docs/using-the-api#ai-guessing) + + """ + + api_wrapper: ZapierNLAWrapper = Field(default_factory=ZapierNLAWrapper) + action_id: str + params: Optional[dict] = None + base_prompt: str = BASE_ZAPIER_TOOL_PROMPT + zapier_description: str + params_schema: Dict[str, str] = Field(default_factory=dict) + name: str = "" + description: str = "" + + @root_validator + def set_name_description(cls, values: Dict[str, Any]) -> Dict[str, Any]: + zapier_description = values["zapier_description"] + params_schema = values["params_schema"] + if "instructions" in params_schema: + del params_schema["instructions"] + + # Ensure base prompt (if overridden) contains necessary input fields + necessary_fields = {"{zapier_description}", "{params}"} + if not all(field in values["base_prompt"] for field in necessary_fields): + raise ValueError( + "Your custom base Zapier prompt must contain input fields for " + "{zapier_description} and {params}." + ) + + values["name"] = zapier_description + values["description"] = values["base_prompt"].format( + zapier_description=zapier_description, + params=str(list(params_schema.keys())), + ) + return values + + def _run( + self, instructions: str, run_manager: Optional[CallbackManagerForToolRun] = None + ) -> str: + """Use the Zapier NLA tool to return a list of all exposed user actions.""" + warn_deprecated( + since="0.0.319", + message=( + "This tool will be deprecated on 2023-11-17. See " + "https://nla.zapier.com/sunset/ for details" + ), + ) + return self.api_wrapper.run_as_str(self.action_id, instructions, self.params) + + async def _arun( + self, + instructions: str, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + """Use the Zapier NLA tool to return a list of all exposed user actions.""" + warn_deprecated( + since="0.0.319", + message=( + "This tool will be deprecated on 2023-11-17. See " + "https://nla.zapier.com/sunset/ for details" + ), + ) + return await self.api_wrapper.arun_as_str( + self.action_id, + instructions, + self.params, + ) + + +ZapierNLARunAction.__doc__ = ( + ZapierNLAWrapper.run.__doc__ + ZapierNLARunAction.__doc__ # type: ignore +) + + +# other useful actions + + +class ZapierNLAListActions(BaseTool): + """ + Args: + None + + """ + + name: str = "ZapierNLA_list_actions" + description: str = BASE_ZAPIER_TOOL_PROMPT + ( + "This tool returns a list of the user's exposed actions." + ) + api_wrapper: ZapierNLAWrapper = Field(default_factory=ZapierNLAWrapper) + + def _run( + self, + _: str = "", + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the Zapier NLA tool to return a list of all exposed user actions.""" + warn_deprecated( + since="0.0.319", + message=( + "This tool will be deprecated on 2023-11-17. See " + "https://nla.zapier.com/sunset/ for details" + ), + ) + return self.api_wrapper.list_as_str() + + async def _arun( + self, + _: str = "", + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + """Use the Zapier NLA tool to return a list of all exposed user actions.""" + warn_deprecated( + since="0.0.319", + message=( + "This tool will be deprecated on 2023-11-17. See " + "https://nla.zapier.com/sunset/ for details" + ), + ) + return await self.api_wrapper.alist_as_str() + + +ZapierNLAListActions.__doc__ = ( + ZapierNLAWrapper.list.__doc__ + ZapierNLAListActions.__doc__ # type: ignore +) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py new file mode 100644 index 0000000000000..1228619b2f283 --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py @@ -0,0 +1,284 @@ +"""Integration tests for the langchain tracer module.""" +import asyncio +import os + +from aiohttp import ClientSession +from langchain_core.callbacks import atrace_as_chain_group, trace_as_chain_group +from langchain_core.tracers.context import tracing_v2_enabled +from langchain_core.prompts import PromptTemplate + +from langchain_community.callbacks import tracing_enabled +from langchain_community.chat_models import ChatOpenAI +from langchain_community.llms import OpenAI + +questions = [ + ( + "Who won the US Open men's final in 2019? " + "What is his age raised to the 0.334 power?" + ), + ( + "Who is Olivia Wilde's boyfriend? " + "What is his current age raised to the 0.23 power?" + ), + ( + "Who won the most recent formula 1 grand prix? " + "What is their age raised to the 0.23 power?" + ), + ( + "Who won the US Open women's final in 2019? " + "What is her age raised to the 0.34 power?" + ), + ("Who is Beyonce's husband? " "What is his age raised to the 0.19 power?"), +] + + +def test_tracing_sequential() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + os.environ["LANGCHAIN_TRACING"] = "true" + + for q in questions[:3]: + llm = OpenAI(temperature=0) + tools = load_tools(["llm-math", "serpapi"], llm=llm) + agent = initialize_agent( + tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + agent.run(q) + + +def test_tracing_session_env_var() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + os.environ["LANGCHAIN_TRACING"] = "true" + os.environ["LANGCHAIN_SESSION"] = "my_session" + + llm = OpenAI(temperature=0) + tools = load_tools(["llm-math", "serpapi"], llm=llm) + agent = initialize_agent( + tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + agent.run(questions[0]) + if "LANGCHAIN_SESSION" in os.environ: + del os.environ["LANGCHAIN_SESSION"] + + +async def test_tracing_concurrent() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + os.environ["LANGCHAIN_TRACING"] = "true" + aiosession = ClientSession() + llm = OpenAI(temperature=0) + async_tools = load_tools(["llm-math", "serpapi"], llm=llm, aiosession=aiosession) + agent = initialize_agent( + async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + tasks = [agent.arun(q) for q in questions[:3]] + await asyncio.gather(*tasks) + await aiosession.close() + + +async def test_tracing_concurrent_bw_compat_environ() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + os.environ["LANGCHAIN_HANDLER"] = "langchain" + if "LANGCHAIN_TRACING" in os.environ: + del os.environ["LANGCHAIN_TRACING"] + aiosession = ClientSession() + llm = OpenAI(temperature=0) + async_tools = load_tools(["llm-math", "serpapi"], llm=llm, aiosession=aiosession) + agent = initialize_agent( + async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + tasks = [agent.arun(q) for q in questions[:3]] + await asyncio.gather(*tasks) + await aiosession.close() + if "LANGCHAIN_HANDLER" in os.environ: + del os.environ["LANGCHAIN_HANDLER"] + + +def test_tracing_context_manager() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + llm = OpenAI(temperature=0) + tools = load_tools(["llm-math", "serpapi"], llm=llm) + agent = initialize_agent( + tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + if "LANGCHAIN_TRACING" in os.environ: + del os.environ["LANGCHAIN_TRACING"] + with tracing_enabled() as session: + assert session + agent.run(questions[0]) # this should be traced + + agent.run(questions[0]) # this should not be traced + + +async def test_tracing_context_manager_async() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + llm = OpenAI(temperature=0) + async_tools = load_tools(["llm-math", "serpapi"], llm=llm) + agent = initialize_agent( + async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + if "LANGCHAIN_TRACING" in os.environ: + del os.environ["LANGCHAIN_TRACING"] + + # start a background task + task = asyncio.create_task(agent.arun(questions[0])) # this should not be traced + with tracing_enabled() as session: + assert session + tasks = [agent.arun(q) for q in questions[1:4]] # these should be traced + await asyncio.gather(*tasks) + + await task + + +async def test_tracing_v2_environment_variable() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + os.environ["LANGCHAIN_TRACING_V2"] = "true" + + aiosession = ClientSession() + llm = OpenAI(temperature=0) + async_tools = load_tools(["llm-math", "serpapi"], llm=llm, aiosession=aiosession) + agent = initialize_agent( + async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + tasks = [agent.arun(q) for q in questions[:3]] + await asyncio.gather(*tasks) + await aiosession.close() + + +def test_tracing_v2_context_manager() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + llm = ChatOpenAI(temperature=0) + tools = load_tools(["llm-math", "serpapi"], llm=llm) + agent = initialize_agent( + tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + if "LANGCHAIN_TRACING_V2" in os.environ: + del os.environ["LANGCHAIN_TRACING_V2"] + with tracing_v2_enabled(): + agent.run(questions[0]) # this should be traced + + agent.run(questions[0]) # this should not be traced + + +def test_tracing_v2_chain_with_tags() -> None: + from langchain.chains.llm import LLMChain + from langchain.chains.constitutional_ai.base import ConstitutionalChain + from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple + llm = OpenAI(temperature=0) + chain = ConstitutionalChain.from_llm( + llm, + chain=LLMChain.from_string(llm, "Q: {question} A:"), + tags=["only-root"], + constitutional_principles=[ + ConstitutionalPrinciple( + critique_request="Tell if this answer is good.", + revision_request="Give a better answer.", + ) + ], + ) + if "LANGCHAIN_TRACING_V2" in os.environ: + del os.environ["LANGCHAIN_TRACING_V2"] + with tracing_v2_enabled(): + chain.run("what is the meaning of life", tags=["a-tag"]) + + +def test_tracing_v2_agent_with_metadata() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + os.environ["LANGCHAIN_TRACING_V2"] = "true" + llm = OpenAI(temperature=0) + chat = ChatOpenAI(temperature=0) + tools = load_tools(["llm-math", "serpapi"], llm=llm) + agent = initialize_agent( + tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + chat_agent = initialize_agent( + tools, chat, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + agent.run(questions[0], tags=["a-tag"], metadata={"a": "b", "c": "d"}) + chat_agent.run(questions[0], tags=["a-tag"], metadata={"a": "b", "c": "d"}) + + +async def test_tracing_v2_async_agent_with_metadata() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + os.environ["LANGCHAIN_TRACING_V2"] = "true" + llm = OpenAI(temperature=0, metadata={"f": "g", "h": "i"}) + chat = ChatOpenAI(temperature=0, metadata={"f": "g", "h": "i"}) + async_tools = load_tools(["llm-math", "serpapi"], llm=llm) + agent = initialize_agent( + async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + chat_agent = initialize_agent( + async_tools, + chat, + agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, + verbose=True, + ) + await agent.arun(questions[0], tags=["a-tag"], metadata={"a": "b", "c": "d"}) + await chat_agent.arun(questions[0], tags=["a-tag"], metadata={"a": "b", "c": "d"}) + + +def test_trace_as_group() -> None: + from langchain.chains.llm import LLMChain + llm = OpenAI(temperature=0.9) + prompt = PromptTemplate( + input_variables=["product"], + template="What is a good name for a company that makes {product}?", + ) + chain = LLMChain(llm=llm, prompt=prompt) + with trace_as_chain_group("my_group", inputs={"input": "cars"}) as group_manager: + chain.run(product="cars", callbacks=group_manager) + chain.run(product="computers", callbacks=group_manager) + final_res = chain.run(product="toys", callbacks=group_manager) + group_manager.on_chain_end({"output": final_res}) + + with trace_as_chain_group("my_group_2", inputs={"input": "toys"}) as group_manager: + final_res = chain.run(product="toys", callbacks=group_manager) + group_manager.on_chain_end({"output": final_res}) + + +def test_trace_as_group_with_env_set() -> None: + from langchain.chains.llm import LLMChain + os.environ["LANGCHAIN_TRACING_V2"] = "true" + llm = OpenAI(temperature=0.9) + prompt = PromptTemplate( + input_variables=["product"], + template="What is a good name for a company that makes {product}?", + ) + chain = LLMChain(llm=llm, prompt=prompt) + with trace_as_chain_group( + "my_group_env_set", inputs={"input": "cars"} + ) as group_manager: + chain.run(product="cars", callbacks=group_manager) + chain.run(product="computers", callbacks=group_manager) + final_res = chain.run(product="toys", callbacks=group_manager) + group_manager.on_chain_end({"output": final_res}) + + with trace_as_chain_group( + "my_group_2_env_set", inputs={"input": "toys"} + ) as group_manager: + final_res = chain.run(product="toys", callbacks=group_manager) + group_manager.on_chain_end({"output": final_res}) + + +async def test_trace_as_group_async() -> None: + from langchain.chains.llm import LLMChain + llm = OpenAI(temperature=0.9) + prompt = PromptTemplate( + input_variables=["product"], + template="What is a good name for a company that makes {product}?", + ) + chain = LLMChain(llm=llm, prompt=prompt) + async with atrace_as_chain_group("my_async_group") as group_manager: + await chain.arun(product="cars", callbacks=group_manager) + await chain.arun(product="computers", callbacks=group_manager) + await chain.arun(product="toys", callbacks=group_manager) + + async with atrace_as_chain_group( + "my_async_group_2", inputs={"input": "toys"} + ) as group_manager: + res = await asyncio.gather( + *[ + chain.arun(product="toys", callbacks=group_manager), + chain.arun(product="computers", callbacks=group_manager), + chain.arun(product="cars", callbacks=group_manager), + ] + ) + await group_manager.on_chain_end({"output": res}) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_openai_callback.py b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_openai_callback.py new file mode 100644 index 0000000000000..f0908bbf8d052 --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_openai_callback.py @@ -0,0 +1,68 @@ +"""Integration tests for the langchain tracer module.""" +import asyncio + + +from langchain_community.callbacks import get_openai_callback +from langchain_community.llms import OpenAI + + +async def test_openai_callback() -> None: + llm = OpenAI(temperature=0) + with get_openai_callback() as cb: + llm("What is the square root of 4?") + + total_tokens = cb.total_tokens + assert total_tokens > 0 + + with get_openai_callback() as cb: + llm("What is the square root of 4?") + llm("What is the square root of 4?") + + assert cb.total_tokens == total_tokens * 2 + + with get_openai_callback() as cb: + await asyncio.gather( + *[llm.agenerate(["What is the square root of 4?"]) for _ in range(3)] + ) + + assert cb.total_tokens == total_tokens * 3 + + task = asyncio.create_task(llm.agenerate(["What is the square root of 4?"])) + with get_openai_callback() as cb: + await llm.agenerate(["What is the square root of 4?"]) + + await task + assert cb.total_tokens == total_tokens + + +def test_openai_callback_batch_llm() -> None: + llm = OpenAI(temperature=0) + with get_openai_callback() as cb: + llm.generate(["What is the square root of 4?", "What is the square root of 4?"]) + + assert cb.total_tokens > 0 + total_tokens = cb.total_tokens + + with get_openai_callback() as cb: + llm("What is the square root of 4?") + llm("What is the square root of 4?") + + assert cb.total_tokens == total_tokens + + +def test_openai_callback_agent() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + llm = OpenAI(temperature=0) + tools = load_tools(["serpapi", "llm-math"], llm=llm) + agent = initialize_agent( + tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + with get_openai_callback() as cb: + agent.run( + "Who is Olivia Wilde's boyfriend? " + "What is his current age raised to the 0.23 power?" + ) + print(f"Total Tokens: {cb.total_tokens}") + print(f"Prompt Tokens: {cb.prompt_tokens}") + print(f"Completion Tokens: {cb.completion_tokens}") + print(f"Total Cost (USD): ${cb.total_cost}") diff --git a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py new file mode 100644 index 0000000000000..1ffe61dbdcf07 --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py @@ -0,0 +1,30 @@ +"""Integration tests for the StreamlitCallbackHandler module.""" + +import pytest + +# Import the internal StreamlitCallbackHandler from its module - and not from +# the `langchain_community.callbacks.streamlit` package - so that we don't end up using +# Streamlit's externally-provided callback handler. +from langchain_community.callbacks.streamlit.streamlit_callback_handler import ( + StreamlitCallbackHandler, +) +from langchain_community.llms import OpenAI + + +@pytest.mark.requires("streamlit") +def test_streamlit_callback_agent() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + import streamlit as st + + streamlit_callback = StreamlitCallbackHandler(st.container()) + + llm = OpenAI(temperature=0) + tools = load_tools(["serpapi", "llm-math"], llm=llm) + agent = initialize_agent( + tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + agent.run( + "Who is Olivia Wilde's boyfriend? " + "What is his current age raised to the 0.23 power?", + callbacks=[streamlit_callback], + ) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py new file mode 100644 index 0000000000000..02f022c62ad7f --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py @@ -0,0 +1,118 @@ +"""Integration tests for the langchain tracer module.""" +import asyncio +import os + +from aiohttp import ClientSession +from langchain_community.callbacks import wandb_tracing_enabled + +from langchain_community.llms import OpenAI + +questions = [ + ( + "Who won the US Open men's final in 2019? " + "What is his age raised to the 0.334 power?" + ), + ( + "Who is Olivia Wilde's boyfriend? " + "What is his current age raised to the 0.23 power?" + ), + ( + "Who won the most recent formula 1 grand prix? " + "What is their age raised to the 0.23 power?" + ), + ( + "Who won the US Open women's final in 2019? " + "What is her age raised to the 0.34 power?" + ), + ("Who is Beyonce's husband? " "What is his age raised to the 0.19 power?"), +] + + +def test_tracing_sequential() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + os.environ["LANGCHAIN_WANDB_TRACING"] = "true" + os.environ["WANDB_PROJECT"] = "langchain-tracing" + + for q in questions[:3]: + llm = OpenAI(temperature=0) + tools = load_tools( + ["llm-math", "serpapi"], + llm=llm, + ) + agent = initialize_agent( + tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + agent.run(q) + + +def test_tracing_session_env_var() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + os.environ["LANGCHAIN_WANDB_TRACING"] = "true" + + llm = OpenAI(temperature=0) + tools = load_tools( + ["llm-math", "serpapi"], + llm=llm, + ) + agent = initialize_agent( + tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + agent.run(questions[0]) + + +async def test_tracing_concurrent() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + os.environ["LANGCHAIN_WANDB_TRACING"] = "true" + aiosession = ClientSession() + llm = OpenAI(temperature=0) + async_tools = load_tools( + ["llm-math", "serpapi"], + llm=llm, + aiosession=aiosession, + ) + agent = initialize_agent( + async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + tasks = [agent.arun(q) for q in questions[:3]] + await asyncio.gather(*tasks) + await aiosession.close() + + +def test_tracing_context_manager() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + llm = OpenAI(temperature=0) + tools = load_tools( + ["llm-math", "serpapi"], + llm=llm, + ) + agent = initialize_agent( + tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + if "LANGCHAIN_WANDB_TRACING" in os.environ: + del os.environ["LANGCHAIN_WANDB_TRACING"] + with wandb_tracing_enabled(): + agent.run(questions[0]) # this should be traced + + agent.run(questions[0]) # this should not be traced + + +async def test_tracing_context_manager_async() -> None: + from langchain.agents import AgentType, initialize_agent, load_tools + llm = OpenAI(temperature=0) + async_tools = load_tools( + ["llm-math", "serpapi"], + llm=llm, + ) + agent = initialize_agent( + async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + if "LANGCHAIN_WANDB_TRACING" in os.environ: + del os.environ["LANGCHAIN_TRACING"] + + # start a background task + task = asyncio.create_task(agent.arun(questions[0])) # this should not be traced + with wandb_tracing_enabled(): + tasks = [agent.arun(q) for q in questions[1:4]] # these should be traced + await asyncio.gather(*tasks) + + await task diff --git a/.scripts/community_split/libs/community/tests/integration_tests/chat_models/test_openai.py b/.scripts/community_split/libs/community/tests/integration_tests/chat_models/test_openai.py new file mode 100644 index 0000000000000..46b346eb9a549 --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/chat_models/test_openai.py @@ -0,0 +1,341 @@ +"""Test ChatOpenAI wrapper.""" +from typing import Any, Optional + +import pytest +from langchain_core.callbacks import CallbackManager +from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage +from langchain_core.outputs import ( + ChatGeneration, + ChatResult, + LLMResult, +) +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.pydantic_v1 import BaseModel, Field +from langchain_openai.chat_models import ChatOpenAI + +from langchain_community.output_parsers.openai_functions import ( + JsonOutputFunctionsParser, +) +from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler + + +@pytest.mark.scheduled +def test_chat_openai() -> None: + """Test ChatOpenAI wrapper.""" + chat = ChatOpenAI( + temperature=0.7, + base_url=None, + organization=None, + openai_proxy=None, + timeout=10.0, + max_retries=3, + http_client=None, + n=1, + max_tokens=10, + default_headers=None, + default_query=None, + ) + message = HumanMessage(content="Hello") + response = chat([message]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_chat_openai_model() -> None: + """Test ChatOpenAI wrapper handles model_name.""" + chat = ChatOpenAI(model="foo") + assert chat.model_name == "foo" + chat = ChatOpenAI(model_name="bar") + assert chat.model_name == "bar" + + +def test_chat_openai_system_message() -> None: + """Test ChatOpenAI wrapper with system message.""" + chat = ChatOpenAI(max_tokens=10) + system_message = SystemMessage(content="You are to chat with the user.") + human_message = HumanMessage(content="Hello") + response = chat([system_message, human_message]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +@pytest.mark.scheduled +def test_chat_openai_generate() -> None: + """Test ChatOpenAI wrapper with generate.""" + chat = ChatOpenAI(max_tokens=10, n=2) + message = HumanMessage(content="Hello") + response = chat.generate([[message], [message]]) + assert isinstance(response, LLMResult) + assert len(response.generations) == 2 + assert response.llm_output + assert "system_fingerprint" in response.llm_output + for generations in response.generations: + assert len(generations) == 2 + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content + + +@pytest.mark.scheduled +def test_chat_openai_multiple_completions() -> None: + """Test ChatOpenAI wrapper with multiple completions.""" + chat = ChatOpenAI(max_tokens=10, n=5) + message = HumanMessage(content="Hello") + response = chat._generate([message]) + assert isinstance(response, ChatResult) + assert len(response.generations) == 5 + for generation in response.generations: + assert isinstance(generation.message, BaseMessage) + assert isinstance(generation.message.content, str) + + +@pytest.mark.scheduled +def test_chat_openai_streaming() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + chat = ChatOpenAI( + max_tokens=10, + streaming=True, + temperature=0, + callback_manager=callback_manager, + verbose=True, + ) + message = HumanMessage(content="Hello") + response = chat([message]) + assert callback_handler.llm_streams > 0 + assert isinstance(response, BaseMessage) + + +@pytest.mark.scheduled +def test_chat_openai_streaming_generation_info() -> None: + """Test that generation info is preserved when streaming.""" + + class _FakeCallback(FakeCallbackHandler): + saved_things: dict = {} + + def on_llm_end( + self, + *args: Any, + **kwargs: Any, + ) -> Any: + # Save the generation + self.saved_things["generation"] = args[0] + + callback = _FakeCallback() + callback_manager = CallbackManager([callback]) + chat = ChatOpenAI( + max_tokens=2, + temperature=0, + callback_manager=callback_manager, + ) + list(chat.stream("hi")) + generation = callback.saved_things["generation"] + # `Hello!` is two tokens, assert that that is what is returned + assert generation.generations[0][0].text == "Hello!" + + +def test_chat_openai_llm_output_contains_model_name() -> None: + """Test llm_output contains model_name.""" + chat = ChatOpenAI(max_tokens=10) + message = HumanMessage(content="Hello") + llm_result = chat.generate([[message]]) + assert llm_result.llm_output is not None + assert llm_result.llm_output["model_name"] == chat.model_name + + +def test_chat_openai_streaming_llm_output_contains_model_name() -> None: + """Test llm_output contains model_name.""" + chat = ChatOpenAI(max_tokens=10, streaming=True) + message = HumanMessage(content="Hello") + llm_result = chat.generate([[message]]) + assert llm_result.llm_output is not None + assert llm_result.llm_output["model_name"] == chat.model_name + + +def test_chat_openai_invalid_streaming_params() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + with pytest.raises(ValueError): + ChatOpenAI( + max_tokens=10, + streaming=True, + temperature=0, + n=5, + ) + + +@pytest.mark.scheduled +async def test_async_chat_openai() -> None: + """Test async generation.""" + chat = ChatOpenAI(max_tokens=10, n=2) + message = HumanMessage(content="Hello") + response = await chat.agenerate([[message], [message]]) + assert isinstance(response, LLMResult) + assert len(response.generations) == 2 + assert response.llm_output + assert "system_fingerprint" in response.llm_output + for generations in response.generations: + assert len(generations) == 2 + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content + + +@pytest.mark.scheduled +async def test_async_chat_openai_streaming() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + chat = ChatOpenAI( + max_tokens=10, + streaming=True, + temperature=0, + callback_manager=callback_manager, + verbose=True, + ) + message = HumanMessage(content="Hello") + response = await chat.agenerate([[message], [message]]) + assert callback_handler.llm_streams > 0 + assert isinstance(response, LLMResult) + assert len(response.generations) == 2 + for generations in response.generations: + assert len(generations) == 1 + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content + + + + +@pytest.mark.scheduled +async def test_async_chat_openai_bind_functions() -> None: + """Test ChatOpenAI wrapper with multiple completions.""" + + class Person(BaseModel): + """Identifying information about a person.""" + + name: str = Field(..., title="Name", description="The person's name") + age: int = Field(..., title="Age", description="The person's age") + fav_food: Optional[str] = Field( + default=None, title="Fav Food", description="The person's favorite food" + ) + + chat = ChatOpenAI( + max_tokens=30, + n=1, + streaming=True, + ).bind_functions(functions=[Person], function_call="Person") + + prompt = ChatPromptTemplate.from_messages( + [ + ("system", "Use the provided Person function"), + ("user", "{input}"), + ] + ) + + chain = prompt | chat | JsonOutputFunctionsParser(args_only=True) + + message = HumanMessage(content="Sally is 13 years old") + response = await chain.abatch([{"input": message}]) + + assert isinstance(response, list) + assert len(response) == 1 + for generation in response: + assert isinstance(generation, dict) + assert "name" in generation + assert "age" in generation + + +def test_chat_openai_extra_kwargs() -> None: + """Test extra kwargs to chat openai.""" + # Check that foo is saved in extra_kwargs. + llm = ChatOpenAI(foo=3, max_tokens=10) + assert llm.max_tokens == 10 + assert llm.model_kwargs == {"foo": 3} + + # Test that if extra_kwargs are provided, they are added to it. + llm = ChatOpenAI(foo=3, model_kwargs={"bar": 2}) + assert llm.model_kwargs == {"foo": 3, "bar": 2} + + # Test that if provided twice it errors + with pytest.raises(ValueError): + ChatOpenAI(foo=3, model_kwargs={"foo": 2}) + + # Test that if explicit param is specified in kwargs it errors + with pytest.raises(ValueError): + ChatOpenAI(model_kwargs={"temperature": 0.2}) + + # Test that "model" cannot be specified in kwargs + with pytest.raises(ValueError): + ChatOpenAI(model_kwargs={"model": "text-davinci-003"}) + + +@pytest.mark.scheduled +def test_openai_streaming() -> None: + """Test streaming tokens from OpenAI.""" + llm = ChatOpenAI(max_tokens=10) + + for token in llm.stream("I'm Pickle Rick"): + assert isinstance(token.content, str) + + +@pytest.mark.scheduled +async def test_openai_astream() -> None: + """Test streaming tokens from OpenAI.""" + llm = ChatOpenAI(max_tokens=10) + + async for token in llm.astream("I'm Pickle Rick"): + assert isinstance(token.content, str) + + +@pytest.mark.scheduled +async def test_openai_abatch() -> None: + """Test streaming tokens from ChatOpenAI.""" + llm = ChatOpenAI(max_tokens=10) + + result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"]) + for token in result: + assert isinstance(token.content, str) + + +@pytest.mark.scheduled +async def test_openai_abatch_tags() -> None: + """Test batch tokens from ChatOpenAI.""" + llm = ChatOpenAI(max_tokens=10) + + result = await llm.abatch( + ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]} + ) + for token in result: + assert isinstance(token.content, str) + + +@pytest.mark.scheduled +def test_openai_batch() -> None: + """Test batch tokens from ChatOpenAI.""" + llm = ChatOpenAI(max_tokens=10) + + result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"]) + for token in result: + assert isinstance(token.content, str) + + +@pytest.mark.scheduled +async def test_openai_ainvoke() -> None: + """Test invoke tokens from ChatOpenAI.""" + llm = ChatOpenAI(max_tokens=10) + + result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]}) + assert isinstance(result.content, str) + + +@pytest.mark.scheduled +def test_openai_invoke() -> None: + """Test invoke tokens from ChatOpenAI.""" + llm = ChatOpenAI(max_tokens=10) + + result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"])) + assert isinstance(result.content, str) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/chat_models/test_qianfan_endpoint.py b/.scripts/community_split/libs/community/tests/integration_tests/chat_models/test_qianfan_endpoint.py new file mode 100644 index 0000000000000..88bfc66a38264 --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/chat_models/test_qianfan_endpoint.py @@ -0,0 +1,219 @@ +"""Test Baidu Qianfan Chat Endpoint.""" + +from typing import Any + +from langchain_core.callbacks import CallbackManager +from langchain_core.messages import ( + AIMessage, + BaseMessage, + FunctionMessage, + HumanMessage, +) +from langchain_core.outputs import ChatGeneration, LLMResult +from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate + +from langchain_community.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint +from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler + +_FUNCTIONS: Any = [ + { + "name": "format_person_info", + "description": ( + "Output formatter. Should always be used to format your response to the" + " user." + ), + "parameters": { + "title": "Person", + "description": "Identifying information about a person.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "The person's name", + "type": "string", + }, + "age": { + "title": "Age", + "description": "The person's age", + "type": "integer", + }, + "fav_food": { + "title": "Fav Food", + "description": "The person's favorite food", + "type": "string", + }, + }, + "required": ["name", "age"], + }, + }, + { + "name": "get_current_temperature", + "description": ("Used to get the location's temperature."), + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "city name", + }, + "unit": { + "type": "string", + "enum": ["centigrade", "Fahrenheit"], + }, + }, + "required": ["location", "unit"], + }, + "responses": { + "type": "object", + "properties": { + "temperature": { + "type": "integer", + "description": "city temperature", + }, + "unit": { + "type": "string", + "enum": ["centigrade", "Fahrenheit"], + }, + }, + }, + }, +] + + +def test_default_call() -> None: + """Test default model(`ERNIE-Bot`) call.""" + chat = QianfanChatEndpoint() + response = chat(messages=[HumanMessage(content="Hello")]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_model() -> None: + """Test model kwarg works.""" + chat = QianfanChatEndpoint(model="BLOOMZ-7B") + response = chat(messages=[HumanMessage(content="Hello")]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_model_param() -> None: + """Test model params works.""" + chat = QianfanChatEndpoint() + response = chat(model="BLOOMZ-7B", messages=[HumanMessage(content="Hello")]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_endpoint() -> None: + """Test user custom model deployments like some open source models.""" + chat = QianfanChatEndpoint(endpoint="qianfan_bloomz_7b_compressed") + response = chat(messages=[HumanMessage(content="Hello")]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_endpoint_param() -> None: + """Test user custom model deployments like some open source models.""" + chat = QianfanChatEndpoint() + response = chat( + messages=[ + HumanMessage(endpoint="qianfan_bloomz_7b_compressed", content="Hello") + ] + ) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_multiple_history() -> None: + """Tests multiple history works.""" + chat = QianfanChatEndpoint() + + response = chat( + messages=[ + HumanMessage(content="Hello."), + AIMessage(content="Hello!"), + HumanMessage(content="How are you doing?"), + ] + ) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_stream() -> None: + """Test that stream works.""" + chat = QianfanChatEndpoint(streaming=True) + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + response = chat( + messages=[ + HumanMessage(content="Hello."), + AIMessage(content="Hello!"), + HumanMessage(content="Who are you?"), + ], + stream=True, + callbacks=callback_manager, + ) + assert callback_handler.llm_streams > 0 + assert isinstance(response.content, str) + + +def test_multiple_messages() -> None: + """Tests multiple messages works.""" + chat = QianfanChatEndpoint() + message = HumanMessage(content="Hi, how are you.") + response = chat.generate([[message], [message]]) + + assert isinstance(response, LLMResult) + assert len(response.generations) == 2 + for generations in response.generations: + assert len(generations) == 1 + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content + + +def test_functions_call_thoughts() -> None: + chat = QianfanChatEndpoint(model="ERNIE-Bot") + + prompt_tmpl = "Use the given functions to answer following question: {input}" + prompt_msgs = [ + HumanMessagePromptTemplate.from_template(prompt_tmpl), + ] + prompt = ChatPromptTemplate(messages=prompt_msgs) + + chain = prompt | chat.bind(functions=_FUNCTIONS) + + message = HumanMessage(content="What's the temperature in Shanghai today?") + response = chain.batch([{"input": message}]) + assert isinstance(response[0], AIMessage) + assert "function_call" in response[0].additional_kwargs + + +def test_functions_call() -> None: + chat = QianfanChatEndpoint(model="ERNIE-Bot") + + prompt = ChatPromptTemplate( + messages=[ + HumanMessage(content="What's the temperature in Shanghai today?"), + AIMessage( + content="", + additional_kwargs={ + "function_call": { + "name": "get_current_temperature", + "thoughts": "i will use get_current_temperature " + "to resolve the questions", + "arguments": '{"location":"Shanghai","unit":"centigrade"}', + } + }, + ), + FunctionMessage( + name="get_current_weather", + content='{"temperature": "25", \ + "unit": "摄氏度", "description": "晴朗"}', + ), + ] + ) + chain = prompt | chat.bind(functions=_FUNCTIONS) + resp = chain.invoke({}) + assert isinstance(resp, AIMessage) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/document_loaders/parsers/test_language.py b/.scripts/community_split/libs/community/tests/integration_tests/document_loaders/parsers/test_language.py new file mode 100644 index 0000000000000..c28789c7cd38e --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/document_loaders/parsers/test_language.py @@ -0,0 +1,182 @@ +from pathlib import Path + +import pytest + +from langchain_community.document_loaders.concurrent import ConcurrentLoader +from langchain_community.document_loaders.generic import GenericLoader +from langchain_community.document_loaders.parsers import LanguageParser + + +def test_language_loader_for_python() -> None: + """Test Python loader with parser enabled.""" + file_path = Path(__file__).parent.parent.parent / "examples" + loader = GenericLoader.from_filesystem( + file_path, glob="hello_world.py", parser=LanguageParser(parser_threshold=5) + ) + docs = loader.load() + + assert len(docs) == 2 + + metadata = docs[0].metadata + assert metadata["source"] == str(file_path / "hello_world.py") + assert metadata["content_type"] == "functions_classes" + assert metadata["language"] == "python" + metadata = docs[1].metadata + assert metadata["source"] == str(file_path / "hello_world.py") + assert metadata["content_type"] == "simplified_code" + assert metadata["language"] == "python" + + assert ( + docs[0].page_content + == """def main(): + print("Hello World!") + + return 0""" + ) + assert ( + docs[1].page_content + == """#!/usr/bin/env python3 + +import sys + + +# Code for: def main(): + + +if __name__ == "__main__": + sys.exit(main())""" + ) + + +def test_language_loader_for_python_with_parser_threshold() -> None: + """Test Python loader with parser enabled and below threshold.""" + file_path = Path(__file__).parent.parent.parent / "examples" + loader = GenericLoader.from_filesystem( + file_path, + glob="hello_world.py", + parser=LanguageParser(language="python", parser_threshold=1000), + ) + docs = loader.load() + + assert len(docs) == 1 + + +def esprima_installed() -> bool: + try: + import esprima # noqa: F401 + + return True + except Exception as e: + print(f"esprima not installed, skipping test {e}") + return False + + +@pytest.mark.skipif(not esprima_installed(), reason="requires esprima package") +def test_language_loader_for_javascript() -> None: + """Test JavaScript loader with parser enabled.""" + file_path = Path(__file__).parent.parent.parent / "examples" + loader = GenericLoader.from_filesystem( + file_path, glob="hello_world.js", parser=LanguageParser(parser_threshold=5) + ) + docs = loader.load() + + assert len(docs) == 3 + + metadata = docs[0].metadata + assert metadata["source"] == str(file_path / "hello_world.js") + assert metadata["content_type"] == "functions_classes" + assert metadata["language"] == "js" + metadata = docs[1].metadata + assert metadata["source"] == str(file_path / "hello_world.js") + assert metadata["content_type"] == "functions_classes" + assert metadata["language"] == "js" + metadata = docs[2].metadata + assert metadata["source"] == str(file_path / "hello_world.js") + assert metadata["content_type"] == "simplified_code" + assert metadata["language"] == "js" + + assert ( + docs[0].page_content + == """class HelloWorld { + sayHello() { + console.log("Hello World!"); + } +}""" + ) + assert ( + docs[1].page_content + == """function main() { + const hello = new HelloWorld(); + hello.sayHello(); +}""" + ) + assert ( + docs[2].page_content + == """// Code for: class HelloWorld { + +// Code for: function main() { + +main();""" + ) + + +def test_language_loader_for_javascript_with_parser_threshold() -> None: + """Test JavaScript loader with parser enabled and below threshold.""" + file_path = Path(__file__).parent.parent.parent / "examples" + loader = GenericLoader.from_filesystem( + file_path, + glob="hello_world.js", + parser=LanguageParser(language="js", parser_threshold=1000), + ) + docs = loader.load() + + assert len(docs) == 1 + + +def test_concurrent_language_loader_for_javascript_with_parser_threshold() -> None: + """Test JavaScript ConcurrentLoader with parser enabled and below threshold.""" + file_path = Path(__file__).parent.parent.parent / "examples" + loader = ConcurrentLoader.from_filesystem( + file_path, + glob="hello_world.js", + parser=LanguageParser(language="js", parser_threshold=1000), + ) + docs = loader.load() + + assert len(docs) == 1 + + +def test_concurrent_language_loader_for_python_with_parser_threshold() -> None: + """Test Python ConcurrentLoader with parser enabled and below threshold.""" + file_path = Path(__file__).parent.parent.parent / "examples" + loader = ConcurrentLoader.from_filesystem( + file_path, + glob="hello_world.py", + parser=LanguageParser(language="python", parser_threshold=1000), + ) + docs = loader.load() + + assert len(docs) == 1 + + +@pytest.mark.skipif(not esprima_installed(), reason="requires esprima package") +def test_concurrent_language_loader_for_javascript() -> None: + """Test JavaScript ConcurrentLoader with parser enabled.""" + file_path = Path(__file__).parent.parent.parent / "examples" + loader = ConcurrentLoader.from_filesystem( + file_path, glob="hello_world.js", parser=LanguageParser(parser_threshold=5) + ) + docs = loader.load() + + assert len(docs) == 3 + + +def test_concurrent_language_loader_for_python() -> None: + """Test Python ConcurrentLoader with parser enabled.""" + file_path = Path(__file__).parent.parent.parent / "examples" + loader = ConcurrentLoader.from_filesystem( + file_path, glob="hello_world.py", parser=LanguageParser(parser_threshold=5) + ) + docs = loader.load() + + assert len(docs) == 2 diff --git a/.scripts/community_split/libs/community/tests/integration_tests/llms/test_fireworks.py b/.scripts/community_split/libs/community/tests/integration_tests/llms/test_fireworks.py new file mode 100644 index 0000000000000..665a1f84332bc --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/llms/test_fireworks.py @@ -0,0 +1,147 @@ +"""Test Fireworks AI API Wrapper.""" +import sys +from typing import Generator + +import pytest +from langchain_core.outputs import LLMResult + +from langchain_community.llms.fireworks import Fireworks + +if sys.version_info < (3, 9): + pytest.skip("fireworks-ai requires Python > 3.8", allow_module_level=True) + + +@pytest.fixture +def llm() -> Fireworks: + return Fireworks(model_kwargs={"temperature": 0, "max_tokens": 512}) + + +@pytest.mark.scheduled +def test_fireworks_call(llm: Fireworks) -> None: + """Test valid call to fireworks.""" + output = llm("How is the weather in New York today?") + assert isinstance(output, str) + + +@pytest.mark.scheduled +def test_fireworks_model_param() -> None: + """Tests model parameters for Fireworks""" + llm = Fireworks(model="foo") + assert llm.model == "foo" + + +@pytest.mark.scheduled +def test_fireworks_invoke(llm: Fireworks) -> None: + """Tests completion with invoke""" + output = llm.invoke("How is the weather in New York today?", stop=[","]) + assert isinstance(output, str) + assert output[-1] == "," + + +@pytest.mark.scheduled +async def test_fireworks_ainvoke(llm: Fireworks) -> None: + """Tests completion with invoke""" + output = await llm.ainvoke("How is the weather in New York today?", stop=[","]) + assert isinstance(output, str) + assert output[-1] == "," + + +@pytest.mark.scheduled +def test_fireworks_batch(llm: Fireworks) -> None: + """Tests completion with invoke""" + llm = Fireworks() + output = llm.batch( + [ + "How is the weather in New York today?", + "How is the weather in New York today?", + "How is the weather in New York today?", + "How is the weather in New York today?", + "How is the weather in New York today?", + ], + stop=[","], + ) + for token in output: + assert isinstance(token, str) + assert token[-1] == "," + + +@pytest.mark.scheduled +async def test_fireworks_abatch(llm: Fireworks) -> None: + """Tests completion with invoke""" + output = await llm.abatch( + [ + "How is the weather in New York today?", + "How is the weather in New York today?", + "How is the weather in New York today?", + "How is the weather in New York today?", + "How is the weather in New York today?", + ], + stop=[","], + ) + for token in output: + assert isinstance(token, str) + assert token[-1] == "," + + +@pytest.mark.scheduled +def test_fireworks_multiple_prompts( + llm: Fireworks, +) -> None: + """Test completion with multiple prompts.""" + output = llm.generate(["How is the weather in New York today?", "I'm pickle rick"]) + assert isinstance(output, LLMResult) + assert isinstance(output.generations, list) + assert len(output.generations) == 2 + + +@pytest.mark.scheduled +def test_fireworks_streaming(llm: Fireworks) -> None: + """Test stream completion.""" + generator = llm.stream("Who's the best quarterback in the NFL?") + assert isinstance(generator, Generator) + + for token in generator: + assert isinstance(token, str) + + +@pytest.mark.scheduled +def test_fireworks_streaming_stop_words(llm: Fireworks) -> None: + """Test stream completion with stop words.""" + generator = llm.stream("Who's the best quarterback in the NFL?", stop=[","]) + assert isinstance(generator, Generator) + + last_token = "" + for token in generator: + last_token = token + assert isinstance(token, str) + assert last_token[-1] == "," + + +@pytest.mark.scheduled +async def test_fireworks_streaming_async(llm: Fireworks) -> None: + """Test stream completion.""" + + last_token = "" + async for token in llm.astream( + "Who's the best quarterback in the NFL?", stop=[","] + ): + last_token = token + assert isinstance(token, str) + assert last_token[-1] == "," + + +@pytest.mark.scheduled +async def test_fireworks_async_agenerate(llm: Fireworks) -> None: + """Test async.""" + output = await llm.agenerate(["What is the best city to live in California?"]) + assert isinstance(output, LLMResult) + + +@pytest.mark.scheduled +async def test_fireworks_multiple_prompts_async_agenerate(llm: Fireworks) -> None: + output = await llm.agenerate( + ["How is the weather in New York today?", "I'm pickle rick"] + ) + assert isinstance(output, LLMResult) + assert isinstance(output.generations, list) + assert len(output.generations) == 2 diff --git a/.scripts/community_split/libs/community/tests/integration_tests/llms/test_opaqueprompts.py b/.scripts/community_split/libs/community/tests/integration_tests/llms/test_opaqueprompts.py new file mode 100644 index 0000000000000..69d851765dba3 --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/llms/test_opaqueprompts.py @@ -0,0 +1,77 @@ +import langchain_community.utilities.opaqueprompts as op +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import PromptTemplate +from langchain_core.runnables import RunnableParallel + +from langchain_community.llms import OpenAI +from langchain_community.llms.opaqueprompts import OpaquePrompts + +prompt_template = """ +As an AI assistant, you will answer questions according to given context. + +Sensitive personal information in the question is masked for privacy. +For instance, if the original text says "Giana is good," it will be changed +to "PERSON_998 is good." + +Here's how to handle these changes: +* Consider these masked phrases just as placeholders, but still refer to +them in a relevant way when answering. +* It's possible that different masked terms might mean the same thing. +Stick with the given term and don't modify it. +* All masked terms follow the "TYPE_ID" pattern. +* Please don't invent new masked terms. For instance, if you see "PERSON_998," +don't come up with "PERSON_997" or "PERSON_999" unless they're already in the question. + +Conversation History: ```{history}``` +Context : ```During our recent meeting on February 23, 2023, at 10:30 AM, +John Doe provided me with his personal details. His email is johndoe@example.com +and his contact number is 650-456-7890. He lives in New York City, USA, and +belongs to the American nationality with Christian beliefs and a leaning towards +the Democratic party. He mentioned that he recently made a transaction using his +credit card 4111 1111 1111 1111 and transferred bitcoins to the wallet address +1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa. While discussing his European travels, he +noted down his IBAN as GB29 NWBK 6016 1331 9268 19. Additionally, he provided +his website as https://johndoeportfolio.com. John also discussed +some of his US-specific details. He said his bank account number is +1234567890123456 and his drivers license is Y12345678. His ITIN is 987-65-4321, +and he recently renewed his passport, +the number for which is 123456789. He emphasized not to share his SSN, which is +669-45-6789. Furthermore, he mentioned that he accesses his work files remotely +through the IP 192.168.1.1 and has a medical license number MED-123456. ``` +Question: ```{question}``` +""" + + +def test_opaqueprompts() -> None: + chain = PromptTemplate.from_template(prompt_template) | OpaquePrompts(llm=OpenAI()) + output = chain.invoke( + { + "question": "Write a text message to remind John to do password reset \ + for his website through his email to stay secure." + } + ) + assert isinstance(output, str) + + +def test_opaqueprompts_functions() -> None: + prompt = (PromptTemplate.from_template(prompt_template),) + llm = OpenAI() + pg_chain = ( + op.sanitize + | RunnableParallel( + secure_context=lambda x: x["secure_context"], # type: ignore + response=(lambda x: x["sanitized_input"]) # type: ignore + | prompt + | llm + | StrOutputParser(), + ) + | (lambda x: op.desanitize(x["response"], x["secure_context"])) + ) + + pg_chain.invoke( + { + "question": "Write a text message to remind John to do password reset\ + for his website through his email to stay secure.", + "history": "", + } + ) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/llms/test_symblai_nebula.py b/.scripts/community_split/libs/community/tests/integration_tests/llms/test_symblai_nebula.py new file mode 100644 index 0000000000000..b1b2eb3b5352c --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/llms/test_symblai_nebula.py @@ -0,0 +1,42 @@ +"""Test Nebula API wrapper.""" +from langchain_community.llms.symblai_nebula import Nebula + + +def test_symblai_nebula_call() -> None: + """Test valid call to Nebula.""" + conversation = """Sam: Good morning, team! Let's keep this standup concise. + We'll go in the usual order: what you did yesterday, + what you plan to do today, and any blockers. Alex, kick us off. +Alex: Morning! Yesterday, I wrapped up the UI for the user dashboard. +The new charts and widgets are now responsive. +I also had a sync with the design team to ensure the final touchups are in +line with the brand guidelines. Today, I'll start integrating the frontend with +the new API endpoints Rhea was working on. +The only blocker is waiting for some final API documentation, +but I guess Rhea can update on that. +Rhea: Hey, all! Yep, about the API documentation - I completed the majority of + the backend work for user data retrieval yesterday. + The endpoints are mostly set up, but I need to do a bit more testing today. + I'll finalize the API documentation by noon, so that should unblock Alex. + After that, I’ll be working on optimizing the database queries + for faster data fetching. No other blockers on my end. +Sam: Great, thanks Rhea. Do reach out if you need any testing assistance + or if there are any hitches with the database. + Now, my update: Yesterday, I coordinated with the client to get clarity + on some feature requirements. Today, I'll be updating our project roadmap + and timelines based on their feedback. Additionally, I'll be sitting with + the QA team in the afternoon for preliminary testing. + Blocker: I might need both of you to be available for a quick call + in case the client wants to discuss the changes live. +Alex: Sounds good, Sam. Just let us know a little in advance for the call. +Rhea: Agreed. We can make time for that. +Sam: Perfect! Let's keep the momentum going. Reach out if there are any +sudden issues or support needed. Have a productive day! +Alex: You too. +Rhea: Thanks, bye!""" + llm = Nebula(nebula_api_key="") + + instruction = """Identify the main objectives mentioned in this +conversation.""" + output = llm.invoke(f"{instruction}\n{conversation}") + assert isinstance(output, str) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/llms/test_vertexai.py b/.scripts/community_split/libs/community/tests/integration_tests/llms/test_vertexai.py new file mode 100644 index 0000000000000..ae5f776b4bad6 --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/llms/test_vertexai.py @@ -0,0 +1,151 @@ +"""Test Vertex AI API wrapper. +In order to run this test, you need to install VertexAI SDK: +pip install google-cloud-aiplatform>=1.36.0 + +Your end-user credentials would be used to make the calls (make sure you've run +`gcloud auth login` first). +""" +import os +from typing import Optional + +import pytest +from langchain_core.outputs import LLMResult + +from langchain_community.llms import VertexAI, VertexAIModelGarden + + +def test_vertex_initialization() -> None: + llm = VertexAI() + assert llm._llm_type == "vertexai" + assert llm.model_name == llm.client._model_id + + +def test_vertex_call() -> None: + llm = VertexAI(temperature=0) + output = llm("Say foo:") + assert isinstance(output, str) + + +@pytest.mark.scheduled +def test_vertex_generate() -> None: + llm = VertexAI(temperature=0.3, n=2, model_name="text-bison@001") + output = llm.generate(["Say foo:"]) + assert isinstance(output, LLMResult) + assert len(output.generations) == 1 + assert len(output.generations[0]) == 2 + + +@pytest.mark.scheduled +def test_vertex_generate_code() -> None: + llm = VertexAI(temperature=0.3, n=2, model_name="code-bison@001") + output = llm.generate(["generate a python method that says foo:"]) + assert isinstance(output, LLMResult) + assert len(output.generations) == 1 + assert len(output.generations[0]) == 2 + + +@pytest.mark.scheduled +async def test_vertex_agenerate() -> None: + llm = VertexAI(temperature=0) + output = await llm.agenerate(["Please say foo:"]) + assert isinstance(output, LLMResult) + + +@pytest.mark.scheduled +def test_vertex_stream() -> None: + llm = VertexAI(temperature=0) + outputs = list(llm.stream("Please say foo:")) + assert isinstance(outputs[0], str) + + +async def test_vertex_consistency() -> None: + llm = VertexAI(temperature=0) + output = llm.generate(["Please say foo:"]) + streaming_output = llm.generate(["Please say foo:"], stream=True) + async_output = await llm.agenerate(["Please say foo:"]) + assert output.generations[0][0].text == streaming_output.generations[0][0].text + assert output.generations[0][0].text == async_output.generations[0][0].text + + +@pytest.mark.parametrize( + "endpoint_os_variable_name,result_arg", + [("FALCON_ENDPOINT_ID", "generated_text"), ("LLAMA_ENDPOINT_ID", None)], +) +def test_model_garden( + endpoint_os_variable_name: str, result_arg: Optional[str] +) -> None: + """In order to run this test, you should provide endpoint names. + + Example: + export FALCON_ENDPOINT_ID=... + export LLAMA_ENDPOINT_ID=... + export PROJECT=... + """ + endpoint_id = os.environ[endpoint_os_variable_name] + project = os.environ["PROJECT"] + location = "europe-west4" + llm = VertexAIModelGarden( + endpoint_id=endpoint_id, + project=project, + result_arg=result_arg, + location=location, + ) + output = llm("What is the meaning of life?") + assert isinstance(output, str) + assert llm._llm_type == "vertexai_model_garden" + + +@pytest.mark.parametrize( + "endpoint_os_variable_name,result_arg", + [("FALCON_ENDPOINT_ID", "generated_text"), ("LLAMA_ENDPOINT_ID", None)], +) +def test_model_garden_generate( + endpoint_os_variable_name: str, result_arg: Optional[str] +) -> None: + """In order to run this test, you should provide endpoint names. + + Example: + export FALCON_ENDPOINT_ID=... + export LLAMA_ENDPOINT_ID=... + export PROJECT=... + """ + endpoint_id = os.environ[endpoint_os_variable_name] + project = os.environ["PROJECT"] + location = "europe-west4" + llm = VertexAIModelGarden( + endpoint_id=endpoint_id, + project=project, + result_arg=result_arg, + location=location, + ) + output = llm.generate(["What is the meaning of life?", "How much is 2+2"]) + assert isinstance(output, LLMResult) + assert len(output.generations) == 2 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "endpoint_os_variable_name,result_arg", + [("FALCON_ENDPOINT_ID", "generated_text"), ("LLAMA_ENDPOINT_ID", None)], +) +async def test_model_garden_agenerate( + endpoint_os_variable_name: str, result_arg: Optional[str] +) -> None: + endpoint_id = os.environ[endpoint_os_variable_name] + project = os.environ["PROJECT"] + location = "europe-west4" + llm = VertexAIModelGarden( + endpoint_id=endpoint_id, + project=project, + result_arg=result_arg, + location=location, + ) + output = await llm.agenerate(["What is the meaning of life?", "How much is 2+2"]) + assert isinstance(output, LLMResult) + assert len(output.generations) == 2 + + +def test_vertex_call_count_tokens() -> None: + llm = VertexAI() + output = llm.get_num_tokens("How are you?") + assert output == 4 diff --git a/.scripts/community_split/libs/community/tests/integration_tests/utilities/test_arxiv.py b/.scripts/community_split/libs/community/tests/integration_tests/utilities/test_arxiv.py new file mode 100644 index 0000000000000..536ba323bfb97 --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/utilities/test_arxiv.py @@ -0,0 +1,171 @@ +"""Integration test for Arxiv API Wrapper.""" +from typing import Any, List + +import pytest +from langchain_core.documents import Document +from langchain_core.tools import BaseTool + +from langchain_community.tools import ArxivQueryRun +from langchain_community.utilities import ArxivAPIWrapper + + +@pytest.fixture +def api_client() -> ArxivAPIWrapper: + return ArxivAPIWrapper() + + +def test_run_success_paper_name(api_client: ArxivAPIWrapper) -> None: + """Test a query of paper name that returns the correct answer""" + + output = api_client.run("Heat-bath random walks with Markov bases") + assert "Probability distributions for Markov chains based quantum walks" in output + assert ( + "Transformations of random walks on groups via Markov stopping times" in output + ) + assert ( + "Recurrence of Multidimensional Persistent Random Walks. Fourier and Series " + "Criteria" in output + ) + + +def test_run_success_arxiv_identifier(api_client: ArxivAPIWrapper) -> None: + """Test a query of an arxiv identifier returns the correct answer""" + + output = api_client.run("1605.08386v1") + assert "Heat-bath random walks with Markov bases" in output + + +def test_run_success_multiple_arxiv_identifiers(api_client: ArxivAPIWrapper) -> None: + """Test a query of multiple arxiv identifiers that returns the correct answer""" + + output = api_client.run("1605.08386v1 2212.00794v2 2308.07912") + assert "Heat-bath random walks with Markov bases" in output + assert "Scaling Language-Image Pre-training via Masking" in output + assert ( + "Ultra-low mass PBHs in the early universe can explain the PTA signal" in output + ) + + +def test_run_returns_several_docs(api_client: ArxivAPIWrapper) -> None: + """Test that returns several docs""" + + output = api_client.run("Caprice Stanley") + assert "On Mixing Behavior of a Family of Random Walks" in output + + +def test_run_returns_no_result(api_client: ArxivAPIWrapper) -> None: + """Test that gives no result.""" + + output = api_client.run("1605.08386WWW") + assert "No good Arxiv Result was found" == output + + +def assert_docs(docs: List[Document]) -> None: + for doc in docs: + assert doc.page_content + assert doc.metadata + assert set(doc.metadata) == {"Published", "Title", "Authors", "Summary"} + + +def test_load_success_paper_name(api_client: ArxivAPIWrapper) -> None: + """Test a query of paper name that returns one document""" + + docs = api_client.load("Heat-bath random walks with Markov bases") + assert len(docs) == 3 + assert_docs(docs) + + +def test_load_success_arxiv_identifier(api_client: ArxivAPIWrapper) -> None: + """Test a query of an arxiv identifier that returns one document""" + + docs = api_client.load("1605.08386v1") + assert len(docs) == 1 + assert_docs(docs) + + +def test_load_success_multiple_arxiv_identifiers(api_client: ArxivAPIWrapper) -> None: + """Test a query of arxiv identifiers that returns the correct answer""" + + docs = api_client.load("1605.08386v1 2212.00794v2 2308.07912") + assert len(docs) == 3 + assert_docs(docs) + + +def test_load_returns_no_result(api_client: ArxivAPIWrapper) -> None: + """Test that returns no docs""" + + docs = api_client.load("1605.08386WWW") + assert len(docs) == 0 + + +def test_load_returns_limited_docs() -> None: + """Test that returns several docs""" + expected_docs = 2 + api_client = ArxivAPIWrapper(load_max_docs=expected_docs) + docs = api_client.load("ChatGPT") + assert len(docs) == expected_docs + assert_docs(docs) + + +def test_load_returns_limited_doc_content_chars() -> None: + """Test that returns limited doc_content_chars_max""" + + doc_content_chars_max = 100 + api_client = ArxivAPIWrapper(doc_content_chars_max=doc_content_chars_max) + docs = api_client.load("1605.08386") + assert len(docs[0].page_content) == doc_content_chars_max + + +def test_load_returns_unlimited_doc_content_chars() -> None: + """Test that returns unlimited doc_content_chars_max""" + + doc_content_chars_max = None + api_client = ArxivAPIWrapper(doc_content_chars_max=doc_content_chars_max) + docs = api_client.load("1605.08386") + assert len(docs[0].page_content) == pytest.approx(54338, rel=1e-2) + + +def test_load_returns_full_set_of_metadata() -> None: + """Test that returns several docs""" + api_client = ArxivAPIWrapper(load_max_docs=1, load_all_available_meta=True) + docs = api_client.load("ChatGPT") + assert len(docs) == 1 + for doc in docs: + assert doc.page_content + assert doc.metadata + assert set(doc.metadata).issuperset( + {"Published", "Title", "Authors", "Summary"} + ) + print(doc.metadata) + assert len(set(doc.metadata)) > 4 + + +def _load_arxiv_from_universal_entry(**kwargs: Any) -> BaseTool: + from langchain.agents.load_tools import load_tools + tools = load_tools(["arxiv"], **kwargs) + assert len(tools) == 1, "loaded more than 1 tool" + return tools[0] + + +def test_load_arxiv_from_universal_entry() -> None: + arxiv_tool = _load_arxiv_from_universal_entry() + output = arxiv_tool("Caprice Stanley") + assert ( + "On Mixing Behavior of a Family of Random Walks" in output + ), "failed to fetch a valid result" + + +def test_load_arxiv_from_universal_entry_with_params() -> None: + params = { + "top_k_results": 1, + "load_max_docs": 10, + "load_all_available_meta": True, + } + arxiv_tool = _load_arxiv_from_universal_entry(**params) + assert isinstance(arxiv_tool, ArxivQueryRun) + wp = arxiv_tool.api_wrapper + assert wp.top_k_results == 1, "failed to assert top_k_results" + assert wp.load_max_docs == 10, "failed to assert load_max_docs" + assert ( + wp.load_all_available_meta is True + ), "failed to assert load_all_available_meta" diff --git a/.scripts/community_split/libs/community/tests/integration_tests/utilities/test_pubmed.py b/.scripts/community_split/libs/community/tests/integration_tests/utilities/test_pubmed.py new file mode 100644 index 0000000000000..bed2681e68139 --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/utilities/test_pubmed.py @@ -0,0 +1,164 @@ +"""Integration test for PubMed API Wrapper.""" +from typing import Any, List + +import pytest +from langchain_core.documents import Document +from langchain_core.tools import BaseTool + +from langchain_community.tools import PubmedQueryRun +from langchain_community.utilities import PubMedAPIWrapper + +xmltodict = pytest.importorskip("xmltodict") + + +@pytest.fixture +def api_client() -> PubMedAPIWrapper: + return PubMedAPIWrapper() + + +def test_run_success(api_client: PubMedAPIWrapper) -> None: + """Test that returns the correct answer""" + + search_string = ( + "Examining the Validity of ChatGPT in Identifying " + "Relevant Nephrology Literature" + ) + output = api_client.run(search_string) + test_string = ( + "Examining the Validity of ChatGPT in Identifying " + "Relevant Nephrology Literature: Findings and Implications" + ) + assert test_string in output + assert len(output) == api_client.doc_content_chars_max + + +def test_run_returns_no_result(api_client: PubMedAPIWrapper) -> None: + """Test that gives no result.""" + + output = api_client.run("1605.08386WWW") + assert "No good PubMed Result was found" == output + + +def test_retrieve_article_returns_book_abstract(api_client: PubMedAPIWrapper) -> None: + """Test that returns the excerpt of a book.""" + + output_nolabel = api_client.retrieve_article("25905357", "") + output_withlabel = api_client.retrieve_article("29262144", "") + test_string_nolabel = ( + "Osteoporosis is a multifactorial disorder associated with low bone mass and " + "enhanced skeletal fragility. Although" + ) + assert test_string_nolabel in output_nolabel["Summary"] + assert ( + "Wallenberg syndrome was first described in 1808 by Gaspard Vieusseux. However," + in output_withlabel["Summary"] + ) + + +def test_retrieve_article_returns_article_abstract( + api_client: PubMedAPIWrapper, +) -> None: + """Test that returns the abstract of an article.""" + + output_nolabel = api_client.retrieve_article("37666905", "") + output_withlabel = api_client.retrieve_article("37666551", "") + test_string_nolabel = ( + "This work aims to: (1) Provide maximal hand force data on six different " + "grasp types for healthy subjects; (2) detect grasp types with maximal " + "force significantly affected by hand osteoarthritis (HOA) in women; (3) " + "look for predictors to detect HOA from the maximal forces using discriminant " + "analyses." + ) + assert test_string_nolabel in output_nolabel["Summary"] + test_string_withlabel = ( + "OBJECTIVES: To assess across seven hospitals from six different countries " + "the extent to which the COVID-19 pandemic affected the volumes of orthopaedic " + "hospital admissions and patient outcomes for non-COVID-19 patients admitted " + "for orthopaedic care." + ) + assert test_string_withlabel in output_withlabel["Summary"] + + +def test_retrieve_article_no_abstract_available(api_client: PubMedAPIWrapper) -> None: + """Test that returns 'No abstract available'.""" + + output = api_client.retrieve_article("10766884", "") + assert "No abstract available" == output["Summary"] + + +def assert_docs(docs: List[Document]) -> None: + for doc in docs: + assert doc.metadata + assert set(doc.metadata) == { + "Copyright Information", + "uid", + "Title", + "Published", + } + + +def test_load_success(api_client: PubMedAPIWrapper) -> None: + """Test that returns one document""" + + docs = api_client.load_docs("chatgpt") + assert len(docs) == api_client.top_k_results == 3 + assert_docs(docs) + + +def test_load_returns_no_result(api_client: PubMedAPIWrapper) -> None: + """Test that returns no docs""" + + docs = api_client.load_docs("1605.08386WWW") + assert len(docs) == 0 + + +def test_load_returns_limited_docs() -> None: + """Test that returns several docs""" + expected_docs = 2 + api_client = PubMedAPIWrapper(top_k_results=expected_docs) + docs = api_client.load_docs("ChatGPT") + assert len(docs) == expected_docs + assert_docs(docs) + + +def test_load_returns_full_set_of_metadata() -> None: + """Test that returns several docs""" + api_client = PubMedAPIWrapper(load_max_docs=1, load_all_available_meta=True) + docs = api_client.load_docs("ChatGPT") + assert len(docs) == 3 + for doc in docs: + assert doc.metadata + assert set(doc.metadata).issuperset( + {"Copyright Information", "Published", "Title", "uid"} + ) + + +def _load_pubmed_from_universal_entry(**kwargs: Any) -> BaseTool: + from langchain.agents.load_tools import load_tools + tools = load_tools(["pubmed"], **kwargs) + assert len(tools) == 1, "loaded more than 1 tool" + return tools[0] + + +def test_load_pupmed_from_universal_entry() -> None: + pubmed_tool = _load_pubmed_from_universal_entry() + search_string = ( + "Examining the Validity of ChatGPT in Identifying " + "Relevant Nephrology Literature" + ) + output = pubmed_tool(search_string) + test_string = ( + "Examining the Validity of ChatGPT in Identifying " + "Relevant Nephrology Literature: Findings and Implications" + ) + assert test_string in output + + +def test_load_pupmed_from_universal_entry_with_params() -> None: + params = { + "top_k_results": 1, + } + pubmed_tool = _load_pubmed_from_universal_entry(**params) + assert isinstance(pubmed_tool, PubmedQueryRun) + wp = pubmed_tool.api_wrapper + assert wp.top_k_results == 1, "failed to assert top_k_results" diff --git a/.scripts/community_split/libs/community/tests/integration_tests/vectorstores/conftest.py b/.scripts/community_split/libs/community/tests/integration_tests/vectorstores/conftest.py new file mode 100644 index 0000000000000..a2fc9053128b5 --- /dev/null +++ b/.scripts/community_split/libs/community/tests/integration_tests/vectorstores/conftest.py @@ -0,0 +1,44 @@ +import os +from typing import Union + +import pytest +from vcr.request import Request + +# Those environment variables turn on Deep Lake pytest mode. +# It significantly makes tests run much faster. +# Need to run before `import deeplake` +os.environ["BUGGER_OFF"] = "true" +os.environ["DEEPLAKE_DOWNLOAD_PATH"] = "./testing/local_storage" +os.environ["DEEPLAKE_PYTEST_ENABLED"] = "true" + + +# This fixture returns a dictionary containing filter_headers options +# for replacing certain headers with dummy values during cassette playback +# Specifically, it replaces the authorization header with a dummy value to +# prevent sensitive data from being recorded in the cassette. +# It also filters request to certain hosts (specified in the `ignored_hosts` list) +# to prevent data from being recorded in the cassette. +@pytest.fixture(scope="module") +def vcr_config() -> dict: + skipped_host = ["pinecone.io"] + + def before_record_response(response: dict) -> Union[dict, None]: + return response + + def before_record_request(request: Request) -> Union[Request, None]: + for host in skipped_host: + if request.host.startswith(host) or request.host.endswith(host): + return None + return request + + return { + "before_record_request": before_record_request, + "before_record_response": before_record_response, + "filter_headers": [ + ("authorization", "authorization-DUMMY"), + ("X-OpenAI-Client-User-Agent", "X-OpenAI-Client-User-Agent-DUMMY"), + ("Api-Key", "Api-Key-DUMMY"), + ("User-Agent", "User-Agent-DUMMY"), + ], + "ignore_localhost": True, + } diff --git a/.scripts/community_split/libs/core/langchain_core/utils/env.py b/.scripts/community_split/libs/core/langchain_core/utils/env.py new file mode 100644 index 0000000000000..b1579e07b7d48 --- /dev/null +++ b/.scripts/community_split/libs/core/langchain_core/utils/env.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import os +from typing import Any, Dict, Optional + + +def env_var_is_set(env_var: str) -> bool: + """Check if an environment variable is set. + + Args: + env_var (str): The name of the environment variable. + + Returns: + bool: True if the environment variable is set, False otherwise. + """ + return env_var in os.environ and os.environ[env_var] not in ( + "", + "0", + "false", + "False", + ) + + +def get_from_dict_or_env( + data: Dict[str, Any], key: str, env_key: str, default: Optional[str] = None +) -> str: + """Get a value from a dictionary or an environment variable.""" + if key in data and data[key]: + return data[key] + else: + return get_from_env(key, env_key, default=default) + + +def get_from_env(key: str, env_key: str, default: Optional[str] = None) -> str: + """Get a value from a dictionary or an environment variable.""" + if env_key in os.environ and os.environ[env_key]: + return os.environ[env_key] + elif default is not None: + return default + else: + raise ValueError( + f"Did not find {key}, please add an environment variable" + f" `{env_key}` which contains it, or pass" + f" `{key}` as a named parameter." + )