Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

langchain[minor]: openai tools structured_output_chain #17296

Merged
merged 8 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 162 additions & 17 deletions libs/langchain/langchain/chains/structured_output/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,16 @@
from langchain_core.prompts import BasePromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import Runnable
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain_core.utils.function_calling import (
convert_to_openai_function,
convert_to_openai_tool,
)

from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers import (
JsonOutputKeyToolsParser,
PydanticOutputParser,
PydanticToolsParser,
)
from langchain.output_parsers.openai_functions import (
JsonOutputFunctionsParser,
PydanticAttrOutputFunctionsParser,
Expand Down Expand Up @@ -106,15 +113,17 @@ class RecordDog(BaseModel):
return prompt | llm.bind(**llm_kwargs) | output_parser


# TODO: implement mode='openai-tools'.
def create_structured_output_runnable(
output_schema: Union[Dict[str, Any], Type[BaseModel]],
llm: Runnable,
prompt: BasePromptTemplate,
*,
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
mode: Literal["openai-functions", "openai-json"] = "openai-functions",
enforce_single_function_usage: bool = True,
enforce_function_usage: bool = True,
return_single: bool = True,
eyurtsev marked this conversation as resolved.
Show resolved Hide resolved
mode: Literal[
"openai-functions", "openai-tools", "openai-json"
] = "openai-functions",
**kwargs: Any,
) -> Runnable:
"""Create a runnable for extracting structured outputs.
Expand All @@ -135,19 +144,101 @@ def create_structured_output_runnable(
in, then the OutputParser will try to parse outputs using the pydantic
class. Otherwise model outputs will be parsed as JSON.
mode: How structured outputs are extracted from the model. If 'openai-functions'
then OpenAI function calling is used. If 'openai-json' then OpenAI model
then OpenAI function calling is used with the deprecated 'functions',
'function_call' schema. If 'openai-tools' then OpenAI function
calling with the latest 'tools', 'tool_choice' schema is used. This is
recommended over 'openai-functions'. If 'openai-json' then OpenAI model
with response_format set to JSON is used.
enforce_single_function_usage: Only used if mode is 'openai-functions'. Only
used if a single function is passed in. If
True, then the model will be forced to use the given function. If False,
then the model will be given the option to use the given function or not.
enforce_function_usage: Only applies when mode is 'openai-tools' or
'openai-functions'. If True, then the model will be forced to use the given
output schema. If False, then the model can elect whether to use the output
schema.
return_single: Only applies when mode is 'openai-tools'. Whether to a list of
structured outputs or a single one. If True and model does not return any
structured outputs then chain output is None. If False and model does not
return any structured outputs then chain output is an empty list.
**kwargs: Additional named arguments.

Returns:
A runnable sequence that will return a structured output matching the given
A runnable sequence that will return a structured output(s) matching the given
output_schema.

OpenAI tools example with Pydantic schema (mode='openai-tools'):
.. code-block:: python

from typing import Optional

from langchain.chains import create_structured_output_runnable
from langchain_openai import ChatOpenAI
from langchain_core.pydantic_v1 import BaseModel, Field


class RecordDog(BaseModel):
'''Record some identifying information about a dog.'''

OpenAI functions example:
name: str = Field(..., description="The dog's name")
color: str = Field(..., description="The dog's color")
fav_food: Optional[str] = Field(None, description="The dog's favorite food")

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = create_structured_output_runnable(
RecordDog,
llm,
mode="openai-tools",
enforce_function_usage=True,
return_single=True
)
structured_llm.invoke("Harry was a chubby brown beagle who loved chicken")
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")

OpenAI tools example with dict schema (mode="openai-tools"):
.. code-block:: python

from typing import Optional

from langchain.chains import create_structured_output_runnable
from langchain_openai import ChatOpenAI


dog_schema = {
"type": "function",
"function": {
"name": "record_dog",
"description": "Record some identifying information about a dog.",
"parameters": {
"type": "object",
"properties": {
"name": {
"description": "The dog's name",
"type": "string"
},
"color": {
"description": "The dog's color",
"type": "string"
},
"fav_food": {
"description": "The dog's favorite food",
"type": "string"
}
},
"required": ["name", "color"]
}
}
}


llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = create_structured_output_runnable(
doc_schema,
llm,
mode="openai-tools",
enforce_function_usage=True,
return_single=True
)
structured_llm.invoke("Harry was a chubby brown beagle who loved chicken")
# -> {'name': 'Harry', 'color': 'brown', 'fav_food': 'chicken'}

OpenAI functions example (mode="openai-functions"):
.. code-block:: python

from typing import Optional
Expand Down Expand Up @@ -176,7 +267,7 @@ class Dog(BaseModel):
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
# -> Dog(name="Harry", color="brown", fav_food="chicken")

OpenAI json response format example:
OpenAI json response format example (mode="openai-json"):
.. code-block:: python

from typing import Optional
Expand Down Expand Up @@ -208,7 +299,22 @@ class Dog(BaseModel):
chain = create_structured_output_runnable(Dog, llm, prompt, mode="openai-json")
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
""" # noqa: E501
if mode == "openai-functions":
if mode == "openai-tools":
return _create_openai_tools_runnable(
output_schema,
llm,
prompt=prompt,
output_parser=output_parser,
enforce_tool_usage=enforce_function_usage,
return_single=return_single,
)

elif mode == "openai-functions":
# for backwards compatibility
enforce_single_function_usage = kwargs.get(
"enforce_single_function_usage", enforce_function_usage
)

return _create_openai_functions_structured_output_runnable(
output_schema,
llm,
Expand All @@ -223,11 +329,51 @@ class Dog(BaseModel):
)
else:
raise ValueError(
f"Invalid mode {mode}. Expected one of 'openai-functions', "
f"Invalid mode {mode}. Expected one of 'openai-tools', 'openai-functions', "
f"'openai-json'."
)


def _create_openai_tools_runnable(
tool: Union[Dict[str, Any], Type[BaseModel], Callable],
llm: Runnable,
*,
prompt: Optional[BasePromptTemplate],
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]],
enforce_tool_usage: bool,
return_single: bool,
) -> Runnable:
oai_tool = convert_to_openai_tool(tool)
llm_kwargs: Dict[str, Any] = {"tools": [oai_tool]}
if enforce_tool_usage:
llm_kwargs["tool_choice"] = {
"type": "function",
"function": {"name": oai_tool["function"]["name"]},
}
output_parser = output_parser or _get_openai_tool_output_parser(
tool, return_single=return_single
)
if prompt:
return prompt | llm.bind(**llm_kwargs) | output_parser
else:
return llm.bind(**llm_kwargs) | output_parser


def _get_openai_tool_output_parser(
tool: Union[Dict[str, Any], Type[BaseModel], Callable], return_single: bool = False
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
if isinstance(tool, type) and issubclass(tool, BaseModel):
output_parser: Union[
BaseOutputParser, BaseGenerationOutputParser
] = PydanticToolsParser(tools=[tool], return_single=return_single)
else:
key_name = convert_to_openai_tool(tool)["function"]["name"]
output_parser = JsonOutputKeyToolsParser(
return_single=return_single, key_name=key_name
)
return output_parser


def get_openai_output_parser(
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
Expand All @@ -244,11 +390,10 @@ def get_openai_output_parser(
not a Pydantic class, then the output parser will automatically extract
only the function arguments and not the function name.
"""
function_names = [convert_to_openai_function(f)["name"] for f in functions]
if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
if len(functions) > 1:
pydantic_schema: Union[Dict, Type[BaseModel]] = {
name: fn for name, fn in zip(function_names, functions)
convert_to_openai_function(fn)["name"]: fn for fn in functions
}
else:
pydantic_schema = functions[0]
Expand Down
38 changes: 28 additions & 10 deletions libs/langchain/langchain/output_parsers/openai_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class JsonOutputToolsParser(BaseGenerationOutputParser[Any]):
"""
return_id: bool = False
"""Whether to return the tool call id."""
return_single: bool = False
eyurtsev marked this conversation as resolved.
Show resolved Hide resolved
"""Whether to return only the first tool call."""

def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
generation = result[0]
Expand Down Expand Up @@ -65,6 +67,8 @@ def parse_result(self, result: List[Generation], *, partial: bool = False) -> An
final_tools.append(parsed)
if exceptions:
raise OutputParserException("\n\n".join(exceptions))
if self.return_single:
return final_tools[0] if final_tools else None
return final_tools


Expand All @@ -73,21 +77,29 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):

key_name: str
"""The type of tools to return."""
return_single: bool = False
"""Whether to return only the first tool call."""

def __init__(self, key_name: str, **kwargs: Any) -> None:
"""Allow init with positional args."""
super().__init__(key_name=key_name, **kwargs)

def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
results = super().parse_result(result, partial=partial)
results = [res for res in results if res["type"] == self.key_name]
if not self.return_id:
results = [res["args"] for res in results]
parsed_result = super().parse_result(result, partial=partial)
if self.return_single:
return results[0] if results else None
return results
single_result = (
parsed_result
if parsed_result and parsed_result["type"] == self.key_name
else None
)
if self.return_id:
return single_result
elif single_result:
return single_result["args"]
else:
return None
parsed_result = [res for res in parsed_result if res["type"] == self.key_name]
if not self.return_id:
parsed_result = [res["args"] for res in parsed_result]
return parsed_result


class PydanticToolsParser(JsonOutputToolsParser):
Expand All @@ -96,6 +108,12 @@ class PydanticToolsParser(JsonOutputToolsParser):
tools: List[Type[BaseModel]]

def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
results = super().parse_result(result, partial=partial)
parsed_result = super().parse_result(result, partial=partial)
name_dict = {tool.__name__: tool for tool in self.tools}
return [name_dict[res["type"]](**res["args"]) for res in results]
if self.return_single:
return (
name_dict[parsed_result["type"]](**parsed_result["args"])
if parsed_result
else None
)
return [name_dict[res["type"]](**res["args"]) for res in parsed_result]
Loading