From 3cf1df74f601f7fcb2ea08b00dc6e30f83f12784 Mon Sep 17 00:00:00 2001 From: NotBioWaste905 Date: Thu, 28 Nov 2024 13:38:44 +0300 Subject: [PATCH] Tutorials and structured output update --- chatsky/llm/llm_api.py | 8 ++- tutorials/llm/1_basics.py | 8 ++- tutorials/llm/2_prompt_usage.py | 9 ++- tutorials/llm/3_filtering_history.py | 9 ++- tutorials/llm/4_structured_output.py | 102 +++++++++++++-------------- 5 files changed, 73 insertions(+), 63 deletions(-) diff --git a/chatsky/llm/llm_api.py b/chatsky/llm/llm_api.py index 580f89464..5c995ba04 100644 --- a/chatsky/llm/llm_api.py +++ b/chatsky/llm/llm_api.py @@ -6,7 +6,7 @@ from typing import Union, Type, Optional from pydantic import BaseModel - +import logging from chatsky.core.message import Message from chatsky.core.context import Context from chatsky.llm.methods import BaseMethod @@ -45,8 +45,10 @@ async def respond( return Message(text=result) elif issubclass(message_schema, Message): # Case if the message_schema describes Message structure - structured_model = self.model.with_structured_output(message_schema) - return Message.model_validate(await structured_model.ainvoke(history)) + structured_model = self.model.with_structured_output(message_schema, method="json_mode") + model_result = await structured_model.ainvoke(history) + logging.debug(f"Generated response: {model_result}") + return Message.model_validate(model_result) elif issubclass(message_schema, BaseModel): # Case if the message_schema describes Message.text structure structured_model = self.model.with_structured_output(message_schema) diff --git a/tutorials/llm/1_basics.py b/tutorials/llm/1_basics.py index 91386de3d..7ccbaa548 100644 --- a/tutorials/llm/1_basics.py +++ b/tutorials/llm/1_basics.py @@ -10,7 +10,7 @@ # %pip install chatsky[llm] langchain-openai # %% -from langchain_ollama import ChatOllama +from langchain_openai import ChatOpenAI from chatsky.core.message import Message from chatsky import ( TRANSITIONS, @@ -27,7 +27,11 @@ from chatsky.responses.llm import LLMResponse from chatsky.conditions.llm import LLMCondition from chatsky.llm.methods import Contains +from dotenv import load_dotenv +import os +load_dotenv() +openai_api_key = os.getenv("OPENAI_API_KEY") # %% [markdown] """ @@ -45,7 +49,7 @@ # %% model = LLM_API( - ChatOllama(model="phi3:instruct", temperature=0), + ChatOpenAI(model="gpt-4o-mini", api_key=openai_api_key), system_prompt="You are an experienced barista in a local coffeshop. " "Answer your customer's questions about coffee and barista work.", ) diff --git a/tutorials/llm/2_prompt_usage.py b/tutorials/llm/2_prompt_usage.py index 7a212307d..87205fda7 100644 --- a/tutorials/llm/2_prompt_usage.py +++ b/tutorials/llm/2_prompt_usage.py @@ -25,7 +25,6 @@ """ # %pip install chatsky[llm] langchain-openai - # %% @@ -43,13 +42,17 @@ conditions as cnd, destinations as dst, ) -from langchain_ollama import ChatOllama +from langchain_openai import ChatOpenAI from chatsky.core.message import Message from chatsky.utils.testing import is_interactive_mode from chatsky.llm import LLM_API from chatsky.responses.llm import LLMResponse +from dotenv import load_dotenv +import os +load_dotenv() +openai_api_key = os.getenv("OPENAI_API_KEY") # %% [markdown] """ @@ -67,7 +70,7 @@ # this `system_prompt` will be always on the top of the history # during models response model = LLM_API( - ChatOllama(model="phi3:instruct", temperature=0), + ChatOpenAI(model="gpt-4o-mini", api_key=openai_api_key), system_prompt="You will represent different bank workers. " "Answer users' questions according to your role.", ) diff --git a/tutorials/llm/3_filtering_history.py b/tutorials/llm/3_filtering_history.py index c39c6e0bd..5b5a03a10 100644 --- a/tutorials/llm/3_filtering_history.py +++ b/tutorials/llm/3_filtering_history.py @@ -9,7 +9,6 @@ """ # %pip install chatsky[llm] langchain-openai - # %% from chatsky import ( TRANSITIONS, @@ -19,18 +18,22 @@ conditions as cnd, destinations as dst, ) -from langchain_ollama import ChatOllama +from langchain_openai import ChatOpenAI from chatsky.core.message import Message from chatsky.utils.testing import is_interactive_mode from chatsky.llm import LLM_API from chatsky.responses.llm import LLMResponse from chatsky.llm.filters import BaseHistoryFilter from chatsky.core.context import Context +from dotenv import load_dotenv +import os +load_dotenv() +openai_api_key = os.getenv("OPENAI_API_KEY") # %% model = LLM_API( - ChatOllama(model="phi3:instruct", temperature=0), + ChatOpenAI(model="gpt-4o-mini", api_key=openai_api_key), system_prompt="You are a database assistant and must help your user to " "recover the demanded data from your memory. Act as a note keeper.", ) diff --git a/tutorials/llm/4_structured_output.py b/tutorials/llm/4_structured_output.py index b05445bf4..93154b13c 100644 --- a/tutorials/llm/4_structured_output.py +++ b/tutorials/llm/4_structured_output.py @@ -2,9 +2,11 @@ """ # LLM: 4. Structured Output -Sometimes, we want to output structured data, such as a valid JSON object or -want to automatically fill particular fields in the output Message. -In Chatsky we can do that using Structured Output. +Chatsky provides two powerful ways to get structured output from LLMs: +1. Using BaseModel to get structured text content (like JSON) +2. Using Message subclass to add metadata to messages + +This tutorial demonstrates both approaches with practical examples. """ # %pip install chatsky[llm] langchain-openai langchain-anthropic @@ -17,53 +19,45 @@ Pipeline, Transition as Tr, conditions as cnd, - destinations as dst, ) -from langchain_ollama import ChatOllama +from langchain_openai import ChatOpenAI +from langchain_anthropic import ChatAnthropic from chatsky.core.message import Message from chatsky.utils.testing import is_interactive_mode from chatsky.llm import LLM_API from chatsky.responses.llm import LLMResponse +from dotenv import load_dotenv +from pydantic import BaseModel, Field -from langchain_core.pydantic_v1 import BaseModel, Field +load_dotenv() +openai_api_key = os.getenv("OPENAI_API_KEY") +anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") -# %% [markdown] -""" -In this tutorial we will define two models. -""" -# %% -assistant_model = LLM_API(ChatOllama(model="llama3.2:1b", temperature=0)) +# Initialize our models movie_model = LLM_API( - ChatOllama(model="kuqoi/qwen2-tools:latest", temperature=0) + ChatAnthropic(model="claude-3.5-sonnet", api_key=anthropic_api_key), + temperature=0 +) +review_model = LLM_API( + ChatOpenAI(model="gpt-4o-mini", api_key=openai_api_key, temperature=0), ) -# %% [markdown] -""" -For the structured output we will use two classes to show two possible ways of -using `message_schema` in responses. -The `Movie`, inherited from the `BaseModel` will act as a schema for the -response _text_, that will contain valid JSON containing desribed information. -The `ImportantMessage`, inherited from the `Message` class, will otherwise -define the fields of the output `Message`. In this example we will use this -to mark the message as important. -""" - - -# %% +# Define structured output schemas class Movie(BaseModel): name: str = Field(description="Name of the movie") genre: str = Field(description="Genre of the movie") plot: str = Field(description="Plot of the movie in chapters") cast: list = Field(description="List of the actors") - -class ImportantMessage(Message): - text: str = Field(description="Text of the note") +class MovieReview(Message): + """Schema for movie reviews (uses Message.misc for metadata)""" + text: str = Field(description="The actual review text") misc: dict = Field( - description="A dictionary with 'important' " - "key and true/false value in it" + description="A dictionary with the following keys and values:" + "k: rating v [int]: number between 0 and 5, " + "k: spoiler_alert v [boolean]: is there a spoilers in this review" ) @@ -72,41 +66,42 @@ class ImportantMessage(Message): script = { GLOBAL: { TRANSITIONS: [ - Tr( - dst=("greeting_flow", "start_node"), - cnd=cnd.ExactMatch("/start"), - ), - Tr(dst=("movie_flow", "main_node"), cnd=cnd.ExactMatch("/movie")), - Tr(dst=("note_flow", "main_node"), cnd=cnd.ExactMatch("/note")), + Tr(dst=("greeting_flow", "start_node"), cnd=cnd.ExactMatch("/start")), + Tr(dst=("movie_flow", "create"), cnd=cnd.ExactMatch("/create")), + Tr(dst=("movie_flow", "review"), cnd=cnd.Regexp("/review \w*")), ] }, "greeting_flow": { "start_node": { - RESPONSE: Message(), + RESPONSE: Message( + "Welcome to MovieBot! Try:\n" + "/create - Create a movie idea\n" + "/review - Write a movie review" + ), }, "fallback_node": { - RESPONSE: Message("I did not quite understand you..."), + RESPONSE: Message("I didn't understand. Try /create or /review"), TRANSITIONS: [Tr(dst="start_node")], }, }, "movie_flow": { - "main_node": { + "create": { RESPONSE: LLMResponse( - "movie_model", - prompt="Ask user to request you for movie ideas.", + model_name="movie_model", + prompt="Create a movie idea for the user.", message_schema=Movie, ), - TRANSITIONS: [Tr(dst=dst.Current())], - } - }, - "note_flow": { - "main_node": { + TRANSITIONS: [Tr(dst=("greeting_flow", "start_node"))], + }, + "review": { RESPONSE: LLMResponse( - "note_model", - prompt="Help user take notes and mark the important ones.", - message_schema=ImportantMessage, + model_name="review_model", + prompt="Generate a movie review based on user's input. " + "Include rating, and mark if it contains spoilers. " + "Use JSON with the `text` and `misc` fields to produce the output.", + message_schema=MovieReview, ), - TRANSITIONS: [Tr(dst=dst.Current())], + TRANSITIONS: [Tr(dst=("greeting_flow", "start_node"))], } }, } @@ -116,7 +111,10 @@ class ImportantMessage(Message): script=script, start_label=("greeting_flow", "start_node"), fallback_label=("greeting_flow", "fallback_node"), - models={"movie_model": movie_model, "note_model": assistant_model}, + models={ + "movie_model": movie_model, + "review_model": review_model + }, ) if __name__ == "__main__":