From 4ef1672cc4b08b766ec4b4ecbad40a1903650b1a Mon Sep 17 00:00:00 2001 From: Yassine Souissi Date: Sun, 31 Mar 2024 22:27:13 +0200 Subject: [PATCH 01/15] Add Image support to our llm --- app/domain/__init__.py | 2 ++ app/domain/iris_message.py | 7 ++++ app/domain/pyris_image.py | 25 +++++++++++++ app/llm/external/model.py | 30 ++++++++++++++-- app/llm/external/ollama.py | 23 +++++++++--- app/llm/external/openai_chat.py | 24 ++++++++++--- app/llm/external/openai_dalle.py | 60 ++++++++++++++++++++++++++++++++ 7 files changed, 158 insertions(+), 13 deletions(-) create mode 100644 app/domain/pyris_image.py create mode 100644 app/llm/external/openai_dalle.py diff --git a/app/domain/__init__.py b/app/domain/__init__.py index 2b67a350..90dad6a2 100644 --- a/app/domain/__init__.py +++ b/app/domain/__init__.py @@ -4,4 +4,6 @@ from ..domain.tutor_chat.tutor_chat_pipeline_execution_dto import ( TutorChatPipelineExecutionDTO, ) +from .pyris_image import PyrisImage from .iris_message import IrisMessage, IrisMessageRole + diff --git a/app/domain/iris_message.py b/app/domain/iris_message.py index 94969c96..b229237c 100644 --- a/app/domain/iris_message.py +++ b/app/domain/iris_message.py @@ -1,6 +1,7 @@ from enum import Enum from pydantic import BaseModel +from .pyris_image import PyrisImage class IrisMessageRole(str, Enum): @@ -12,6 +13,12 @@ class IrisMessageRole(str, Enum): class IrisMessage(BaseModel): text: str = "" role: IrisMessageRole + images: list[PyrisImage] | None + def __init__( + self, role: IrisMessageRole, text: str, images: list[PyrisImage] | None = None + ): + super().__init__(role=role, text=text) + self.images = images def __str__(self): return f"{self.role.lower()}: {self.text}" diff --git a/app/domain/pyris_image.py b/app/domain/pyris_image.py new file mode 100644 index 00000000..0a2ac773 --- /dev/null +++ b/app/domain/pyris_image.py @@ -0,0 +1,25 @@ +from datetime import datetime + + +class PyrisImage: + """ + Represents an image from the Pyris dataset + """ + prompt: str + base64: str + timestamp: datetime + mime_type: str = "jpeg", + raw_data: any = None, + def __init__( + self, + prompt: str, + base64: str, + timestamp: datetime, + mime_type: str = "jpeg", + raw_data: any = None, + ): + self.prompt = prompt + self.base64 = base64 + self.timestamp = timestamp + self.raw_data = raw_data + self.mime_type = mime_type diff --git a/app/llm/external/model.py b/app/llm/external/model.py index 04520e81..72fba37b 100644 --- a/app/llm/external/model.py +++ b/app/llm/external/model.py @@ -1,7 +1,7 @@ from abc import ABCMeta, abstractmethod from pydantic import BaseModel -from ...domain import IrisMessage +from ...domain import IrisMessage, PyrisImage from ...llm import CompletionArguments from ...llm.capability import CapabilityList @@ -23,7 +23,7 @@ def __subclasshook__(cls, subclass) -> bool: return hasattr(subclass, "complete") and callable(subclass.complete) @abstractmethod - def complete(self, prompt: str, arguments: CompletionArguments) -> str: + def complete(self, prompt: str, arguments: CompletionArguments, images: [PyrisImage] = None) -> str: """Create a completion from the prompt""" raise NotImplementedError( f"The LLM {self.__str__()} does not support completion" @@ -39,7 +39,7 @@ def __subclasshook__(cls, subclass) -> bool: @abstractmethod def chat( - self, messages: list[IrisMessage], arguments: CompletionArguments + self, messages: list[IrisMessage], arguments: CompletionArguments ) -> IrisMessage: """Create a completion from the chat messages""" raise NotImplementedError( @@ -60,3 +60,27 @@ def embed(self, text: str) -> list[float]: raise NotImplementedError( f"The LLM {self.__str__()} does not support embeddings" ) + + +class ImageGenerationModel(LanguageModel, metaclass=ABCMeta): + """Abstract class for the llm image generation wrappers""" + + @classmethod + def __subclasshook__(cls, subclass): + return hasattr(subclass, "generate_images") and callable( + subclass.generate_images + ) + + @abstractmethod + def generate_images( + self, + prompt: str, + n: int = 1, + size: str = "256x256", + quality: str = "standard", + **kwargs, + ) -> list: + """Create an image from the prompt""" + raise NotImplementedError( + f"The LLM {self.__str__()} does not support image generation" + ) diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index 03a832a2..c06dd2db 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -1,18 +1,27 @@ +import base64 from typing import Literal, Any from ollama import Client, Message -from ...domain import IrisMessage, IrisMessageRole +from ...domain import IrisMessage, IrisMessageRole, PyrisImage from ...llm import CompletionArguments from ...llm.external.model import ChatModel, CompletionModel, EmbeddingModel +def convert_to_ollama_images(images: list[PyrisImage]) -> list[bytes] | None: + if not images: + return None + return [base64.b64decode(image.base64) for image in images] def convert_to_ollama_messages(messages: list[IrisMessage]) -> list[Message]: return [ - Message(role=message.role.value, content=message.text) for message in messages + Message( + role=message.role.value, + content=message.text, + images=convert_to_ollama_images(message.images), + ) + for message in messages ] - def convert_to_iris_message(message: Message) -> IrisMessage: return IrisMessage(role=IrisMessageRole(message["role"]), text=message["content"]) @@ -30,8 +39,12 @@ class OllamaModel( def model_post_init(self, __context: Any) -> None: self._client = Client(host=self.host) # TODO: Add authentication (httpx auth?) - def complete(self, prompt: str, arguments: CompletionArguments) -> str: - response = self._client.generate(model=self.model, prompt=prompt) + def complete( + self, prompt: str, arguments: CompletionArguments, images: [PyrisImage] = None + ) -> str: + response = self._client.generate( + model=self.model, prompt=prompt, images=convert_to_ollama_images(images) + ) return response["response"] def chat( diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index 9e035810..8a82c1b6 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -1,4 +1,4 @@ -from typing import Literal, Any +from typing import Literal, Any, List, Dict from openai import OpenAI from openai.lib.azure import AzureOpenAI @@ -11,10 +11,24 @@ def convert_to_open_ai_messages( messages: list[IrisMessage], -) -> list[ChatCompletionMessageParam]: - return [ - {"role": message.role.value, "content": message.text} for message in messages - ] +) -> list[dict[str, Any]]: + openai_messages = [] + for message in messages: + if message.images: + content = [{"type": "text", "content": message.text}] + for image in message.images: + content.append( + { + "type": "image_url", + "image_url": f"data:image/{image.type};base64,{image.base64}", + "detail": "high", + } + ) + else: + content = message.text + openai_message = {"role": message.role.value, "content": content} + openai_messages.append(openai_message) + return openai_messages def convert_to_iris_message(message: ChatCompletionMessage) -> IrisMessage: diff --git a/app/llm/external/openai_dalle.py b/app/llm/external/openai_dalle.py new file mode 100644 index 00000000..9cd8dd6d --- /dev/null +++ b/app/llm/external/openai_dalle.py @@ -0,0 +1,60 @@ +import base64 +from datetime import datetime +from typing import Literal, Any + +import requests +from openai import OpenAI + +from ...domain.pyris_image import PyrisImage +from ...llm.external.model import ImageGenerationModel + + +class OpenAIDalleWrapper(ImageGenerationModel): + type: Literal["openai_dalle"] + model: str + _client: OpenAI + + def model_post_init(self, __context: Any) -> None: + self._client = OpenAI(api_key=self.api_key) + + def generate_images( + self, + prompt: str, + n: int = 1, + size: Literal[ + "256x256", "512x512", "1024x1024", "1792x1024", "1024x1792" + ] = "256x256", + quality: Literal["standard", "hd"] = "standard", + **kwargs + ) -> [PyrisImage]: + response = self._client.images.generate( + model=self.model, + prompt=prompt, + size=size, + quality=quality, + n=n, + response_format="url", + **kwargs + ) + + images = response.data + iris_images = [] + for image in images: + if image.revised_prompt is None: + image.revised_prompt = prompt + if image.b64_json is None: + image_response = requests.get(image.url) + image.b64_json = base64.b64encode(image_response.content).decode( + "utf-8" + ) + + iris_images.append( + PyrisImage( + prompt=image.revised_prompt, + base64=image.b64_json, + timestamp=datetime.fromtimestamp(response.created), + raw_data=image, + ) + ) + + return iris_images \ No newline at end of file From d15c6e61f430028d6107d65e52c49f3abac2a509 Mon Sep 17 00:00:00 2001 From: Yassine Souissi Date: Sun, 31 Mar 2024 22:29:54 +0200 Subject: [PATCH 02/15] flake8 --- app/domain/__init__.py | 1 - app/domain/iris_message.py | 3 ++- app/domain/pyris_image.py | 6 ++++-- app/llm/external/model.py | 6 ++++-- app/llm/external/ollama.py | 5 ++++- app/llm/external/openai_chat.py | 6 +++--- app/llm/external/openai_dalle.py | 2 +- 7 files changed, 18 insertions(+), 11 deletions(-) diff --git a/app/domain/__init__.py b/app/domain/__init__.py index 90dad6a2..5919de29 100644 --- a/app/domain/__init__.py +++ b/app/domain/__init__.py @@ -6,4 +6,3 @@ ) from .pyris_image import PyrisImage from .iris_message import IrisMessage, IrisMessageRole - diff --git a/app/domain/iris_message.py b/app/domain/iris_message.py index b229237c..a7468f7a 100644 --- a/app/domain/iris_message.py +++ b/app/domain/iris_message.py @@ -16,9 +16,10 @@ class IrisMessage(BaseModel): images: list[PyrisImage] | None def __init__( - self, role: IrisMessageRole, text: str, images: list[PyrisImage] | None = None + self, role: IrisMessageRole, text: str, images: list[PyrisImage] | None = None ): super().__init__(role=role, text=text) self.images = images + def __str__(self): return f"{self.role.lower()}: {self.text}" diff --git a/app/domain/pyris_image.py b/app/domain/pyris_image.py index 0a2ac773..ecbfdbbb 100644 --- a/app/domain/pyris_image.py +++ b/app/domain/pyris_image.py @@ -5,11 +5,13 @@ class PyrisImage: """ Represents an image from the Pyris dataset """ + prompt: str base64: str timestamp: datetime - mime_type: str = "jpeg", - raw_data: any = None, + mime_type: str = ("jpeg",) + raw_data: any = (None,) + def __init__( self, prompt: str, diff --git a/app/llm/external/model.py b/app/llm/external/model.py index 72fba37b..5808f876 100644 --- a/app/llm/external/model.py +++ b/app/llm/external/model.py @@ -23,7 +23,9 @@ def __subclasshook__(cls, subclass) -> bool: return hasattr(subclass, "complete") and callable(subclass.complete) @abstractmethod - def complete(self, prompt: str, arguments: CompletionArguments, images: [PyrisImage] = None) -> str: + def complete( + self, prompt: str, arguments: CompletionArguments, images: [PyrisImage] = None + ) -> str: """Create a completion from the prompt""" raise NotImplementedError( f"The LLM {self.__str__()} does not support completion" @@ -39,7 +41,7 @@ def __subclasshook__(cls, subclass) -> bool: @abstractmethod def chat( - self, messages: list[IrisMessage], arguments: CompletionArguments + self, messages: list[IrisMessage], arguments: CompletionArguments ) -> IrisMessage: """Create a completion from the chat messages""" raise NotImplementedError( diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index c06dd2db..2581bb04 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -7,11 +7,13 @@ from ...llm import CompletionArguments from ...llm.external.model import ChatModel, CompletionModel, EmbeddingModel + def convert_to_ollama_images(images: list[PyrisImage]) -> list[bytes] | None: if not images: return None return [base64.b64decode(image.base64) for image in images] + def convert_to_ollama_messages(messages: list[IrisMessage]) -> list[Message]: return [ Message( @@ -22,6 +24,7 @@ def convert_to_ollama_messages(messages: list[IrisMessage]) -> list[Message]: for message in messages ] + def convert_to_iris_message(message: Message) -> IrisMessage: return IrisMessage(role=IrisMessageRole(message["role"]), text=message["content"]) @@ -40,7 +43,7 @@ def model_post_init(self, __context: Any) -> None: self._client = Client(host=self.host) # TODO: Add authentication (httpx auth?) def complete( - self, prompt: str, arguments: CompletionArguments, images: [PyrisImage] = None + self, prompt: str, arguments: CompletionArguments, images: [PyrisImage] = None ) -> str: response = self._client.generate( model=self.model, prompt=prompt, images=convert_to_ollama_images(images) diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index 8a82c1b6..351caf72 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -1,8 +1,8 @@ -from typing import Literal, Any, List, Dict +from typing import Literal, Any from openai import OpenAI from openai.lib.azure import AzureOpenAI -from openai.types.chat import ChatCompletionMessageParam, ChatCompletionMessage +from openai.types.chat import ChatCompletionMessage from ...domain import IrisMessage, IrisMessageRole from ...llm import CompletionArguments @@ -20,7 +20,7 @@ def convert_to_open_ai_messages( content.append( { "type": "image_url", - "image_url": f"data:image/{image.type};base64,{image.base64}", + "image_url": f"data:image/{image.mime_type};base64,{image.base64}", "detail": "high", } ) diff --git a/app/llm/external/openai_dalle.py b/app/llm/external/openai_dalle.py index 9cd8dd6d..df863ffe 100644 --- a/app/llm/external/openai_dalle.py +++ b/app/llm/external/openai_dalle.py @@ -57,4 +57,4 @@ def generate_images( ) ) - return iris_images \ No newline at end of file + return iris_images From bcc54c2dbb3786a5ffe993f97845e965d6cf6ee3 Mon Sep 17 00:00:00 2001 From: Yassine Souissi Date: Sun, 31 Mar 2024 22:38:09 +0200 Subject: [PATCH 03/15] black --- app/domain/pyris_image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/domain/pyris_image.py b/app/domain/pyris_image.py index ecbfdbbb..7f92226c 100644 --- a/app/domain/pyris_image.py +++ b/app/domain/pyris_image.py @@ -9,8 +9,8 @@ class PyrisImage: prompt: str base64: str timestamp: datetime - mime_type: str = ("jpeg",) - raw_data: any = (None,) + mime_type: str = "jpeg" + raw_data: any = None def __init__( self, From fa4e7056e2375d6209bdc2efa70d77439f83773f Mon Sep 17 00:00:00 2001 From: Yassine Souissi Date: Mon, 8 Apr 2024 00:22:20 +0200 Subject: [PATCH 04/15] there is no image support in completion --- app/llm/external/model.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/app/llm/external/model.py b/app/llm/external/model.py index 5808f876..ad75ea66 100644 --- a/app/llm/external/model.py +++ b/app/llm/external/model.py @@ -1,7 +1,7 @@ from abc import ABCMeta, abstractmethod from pydantic import BaseModel -from ...domain import IrisMessage, PyrisImage +from ...domain import IrisMessage from ...llm import CompletionArguments from ...llm.capability import CapabilityList @@ -23,9 +23,7 @@ def __subclasshook__(cls, subclass) -> bool: return hasattr(subclass, "complete") and callable(subclass.complete) @abstractmethod - def complete( - self, prompt: str, arguments: CompletionArguments, images: [PyrisImage] = None - ) -> str: + def complete(self, prompt: str, arguments: CompletionArguments) -> str: """Create a completion from the prompt""" raise NotImplementedError( f"The LLM {self.__str__()} does not support completion" From 412d5a749bb275ca382e067e99d1b4d8e51cbefb Mon Sep 17 00:00:00 2001 From: Yassine Souissi Date: Thu, 25 Apr 2024 17:36:38 +0200 Subject: [PATCH 05/15] Update Image Support --- app/domain/iris_message.py | 10 ++------ app/domain/pyris_image.py | 36 ++++++++++----------------- app/llm/external/model.py | 6 +++-- app/llm/external/openai_chat.py | 14 +++++++---- app/llm/external/openai_completion.py | 5 +++- 5 files changed, 32 insertions(+), 39 deletions(-) diff --git a/app/domain/iris_message.py b/app/domain/iris_message.py index a7468f7a..82d02621 100644 --- a/app/domain/iris_message.py +++ b/app/domain/iris_message.py @@ -1,6 +1,6 @@ from enum import Enum - from pydantic import BaseModel +from typing import List, Optional from .pyris_image import PyrisImage @@ -13,13 +13,7 @@ class IrisMessageRole(str, Enum): class IrisMessage(BaseModel): text: str = "" role: IrisMessageRole - images: list[PyrisImage] | None - - def __init__( - self, role: IrisMessageRole, text: str, images: list[PyrisImage] | None = None - ): - super().__init__(role=role, text=text) - self.images = images + images: Optional[List[PyrisImage]] = None def __str__(self): return f"{self.role.lower()}: {self.text}" diff --git a/app/domain/pyris_image.py b/app/domain/pyris_image.py index 7f92226c..9e3f41f0 100644 --- a/app/domain/pyris_image.py +++ b/app/domain/pyris_image.py @@ -1,27 +1,17 @@ -from datetime import datetime +from pydantic import BaseModel +from typing import Optional -class PyrisImage: - """ - Represents an image from the Pyris dataset - """ - - prompt: str +class PyrisImage(BaseModel): base64: str - timestamp: datetime - mime_type: str = "jpeg" - raw_data: any = None + prompt: Optional[str] = None + mime_type: Optional[str] = "jpeg" - def __init__( - self, - prompt: str, - base64: str, - timestamp: datetime, - mime_type: str = "jpeg", - raw_data: any = None, - ): - self.prompt = prompt - self.base64 = base64 - self.timestamp = timestamp - self.raw_data = raw_data - self.mime_type = mime_type + class Config: + schema_extra = { + "example": { + "prompt": "Example prompt", + "base64": "base64EncodedString==", + "mime_type": "jpeg", + } + } diff --git a/app/llm/external/model.py b/app/llm/external/model.py index ad75ea66..5808f876 100644 --- a/app/llm/external/model.py +++ b/app/llm/external/model.py @@ -1,7 +1,7 @@ from abc import ABCMeta, abstractmethod from pydantic import BaseModel -from ...domain import IrisMessage +from ...domain import IrisMessage, PyrisImage from ...llm import CompletionArguments from ...llm.capability import CapabilityList @@ -23,7 +23,9 @@ def __subclasshook__(cls, subclass) -> bool: return hasattr(subclass, "complete") and callable(subclass.complete) @abstractmethod - def complete(self, prompt: str, arguments: CompletionArguments) -> str: + def complete( + self, prompt: str, arguments: CompletionArguments, images: [PyrisImage] = None + ) -> str: """Create a completion from the prompt""" raise NotImplementedError( f"The LLM {self.__str__()} does not support completion" diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index 351caf72..c0085140 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -12,20 +12,25 @@ def convert_to_open_ai_messages( messages: list[IrisMessage], ) -> list[dict[str, Any]]: + """ + Convert IrisMessages to OpenAI messages + """ openai_messages = [] for message in messages: if message.images: - content = [{"type": "text", "content": message.text}] + content = [{"type": "text", "text": message.text}] for image in message.images: content.append( { "type": "image_url", - "image_url": f"data:image/{image.mime_type};base64,{image.base64}", - "detail": "high", + "image_url": { + "url": f"data:image/{image.mime_type};base64,{image.base64}", + "detail": "high", + }, } ) else: - content = message.text + content = [{"type": "text", "text": message.text}] openai_message = {"role": message.role.value, "content": content} openai_messages.append(openai_message) return openai_messages @@ -50,7 +55,6 @@ def chat( messages=convert_to_open_ai_messages(messages), temperature=arguments.temperature, max_tokens=arguments.max_tokens, - stop=arguments.stop, ) return convert_to_iris_message(response.choices[0].message) diff --git a/app/llm/external/openai_completion.py b/app/llm/external/openai_completion.py index 97d6252f..0a61ef97 100644 --- a/app/llm/external/openai_completion.py +++ b/app/llm/external/openai_completion.py @@ -2,6 +2,7 @@ from openai import OpenAI from openai.lib.azure import AzureOpenAI +from ...domain import PyrisImage from ...llm import CompletionArguments from ...llm.external.model import CompletionModel @@ -11,7 +12,9 @@ class OpenAICompletionModel(CompletionModel): api_key: str _client: OpenAI - def complete(self, prompt: str, arguments: CompletionArguments) -> any: + def complete( + self, prompt: str, arguments: CompletionArguments, images: [PyrisImage] = None + ) -> any: response = self._client.completions.create( model=self.model, prompt=prompt, From e05206c9d650ee8e10479a8b955d776343cfc001 Mon Sep 17 00:00:00 2001 From: Yassine Souissi Date: Thu, 25 Apr 2024 23:33:39 +0200 Subject: [PATCH 06/15] Merge With Latest version of main --- app/domain/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/domain/__init__.py b/app/domain/__init__.py index 149df609..b1327c90 100644 --- a/app/domain/__init__.py +++ b/app/domain/__init__.py @@ -5,3 +5,4 @@ TutorChatPipelineExecutionDTO, ) from .pyris_message import PyrisMessage, IrisMessageRole +from .pyris_image import PyrisImage From 5d29c9364b96ac753aee661366407a4412d0ddae Mon Sep 17 00:00:00 2001 From: Yassine Souissi Date: Thu, 25 Apr 2024 23:50:35 +0200 Subject: [PATCH 07/15] Fix Warning --- app/domain/pyris_image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/domain/pyris_image.py b/app/domain/pyris_image.py index 9e3f41f0..92ae7d50 100644 --- a/app/domain/pyris_image.py +++ b/app/domain/pyris_image.py @@ -8,7 +8,7 @@ class PyrisImage(BaseModel): mime_type: Optional[str] = "jpeg" class Config: - schema_extra = { + json_schema_extra = { "example": { "prompt": "Example prompt", "base64": "base64EncodedString==", From 2e0969246bc611b14a442b576f07fb00e9a257f0 Mon Sep 17 00:00:00 2001 From: Yassine Souissi Date: Fri, 26 Apr 2024 19:48:13 +0200 Subject: [PATCH 08/15] Readjusted the image generation and recognition PR --- app/common/message_converters.py | 3 +- app/domain/__init__.py | 2 +- app/domain/data/image_message_content_dto.py | 14 ++- app/domain/iris_message.py | 19 ---- app/domain/pyris_image.py | 17 ---- app/llm/external/model.py | 24 +++++ app/llm/external/ollama.py | 66 +++++++++++--- app/llm/external/openai_chat.py | 47 ++++++++-- app/llm/external/openai_completion.py | 5 +- app/llm/external/openai_dalle.py | 89 +++++++++---------- .../request_handler/basic_request_handler.py | 12 ++- .../request_handler_interface.py | 9 +- 12 files changed, 196 insertions(+), 111 deletions(-) delete mode 100644 app/domain/iris_message.py delete mode 100644 app/domain/pyris_image.py diff --git a/app/common/message_converters.py b/app/common/message_converters.py index 3059a57b..4ca1dd80 100644 --- a/app/common/message_converters.py +++ b/app/common/message_converters.py @@ -1,4 +1,5 @@ from datetime import datetime +from typing import Literal from langchain_core.messages import BaseMessage @@ -47,7 +48,7 @@ def convert_langchain_message_to_iris_message( ) -def map_role_to_str(role: IrisMessageRole) -> str: +def map_role_to_str(role: IrisMessageRole) -> Literal["user", "assistant", "system"]: match role: case IrisMessageRole.USER: return "user" diff --git a/app/domain/__init__.py b/app/domain/__init__.py index b1327c90..c2f4199e 100644 --- a/app/domain/__init__.py +++ b/app/domain/__init__.py @@ -5,4 +5,4 @@ TutorChatPipelineExecutionDTO, ) from .pyris_message import PyrisMessage, IrisMessageRole -from .pyris_image import PyrisImage +from app.domain.data import image_message_content_dto diff --git a/app/domain/data/image_message_content_dto.py b/app/domain/data/image_message_content_dto.py index d48fd717..43360b7c 100644 --- a/app/domain/data/image_message_content_dto.py +++ b/app/domain/data/image_message_content_dto.py @@ -1,7 +1,15 @@ -from typing import Optional - from pydantic import BaseModel, Field +from typing import List, Optional class ImageMessageContentDTO(BaseModel): - image_data: Optional[str] = Field(alias="imageData", default=None) + base64: List[str] = Field(..., alias="base64") # List of base64-encoded strings + prompt: Optional[str] = Field(default=None, alias="prompt") + + class Config: + json_schema_extra = { + "example": { + "prompt": "Example prompt", + "base64": ["base64EncodedString==", "anotherBase64EncodedString=="], + } + } diff --git a/app/domain/iris_message.py b/app/domain/iris_message.py deleted file mode 100644 index 82d02621..00000000 --- a/app/domain/iris_message.py +++ /dev/null @@ -1,19 +0,0 @@ -from enum import Enum -from pydantic import BaseModel -from typing import List, Optional -from .pyris_image import PyrisImage - - -class IrisMessageRole(str, Enum): - USER = "user" - ASSISTANT = "assistant" - SYSTEM = "system" - - -class IrisMessage(BaseModel): - text: str = "" - role: IrisMessageRole - images: Optional[List[PyrisImage]] = None - - def __str__(self): - return f"{self.role.lower()}: {self.text}" diff --git a/app/domain/pyris_image.py b/app/domain/pyris_image.py deleted file mode 100644 index 92ae7d50..00000000 --- a/app/domain/pyris_image.py +++ /dev/null @@ -1,17 +0,0 @@ -from pydantic import BaseModel -from typing import Optional - - -class PyrisImage(BaseModel): - base64: str - prompt: Optional[str] = None - mime_type: Optional[str] = "jpeg" - - class Config: - json_schema_extra = { - "example": { - "prompt": "Example prompt", - "base64": "base64EncodedString==", - "mime_type": "jpeg", - } - } diff --git a/app/llm/external/model.py b/app/llm/external/model.py index 4d42745b..47b90962 100644 --- a/app/llm/external/model.py +++ b/app/llm/external/model.py @@ -60,3 +60,27 @@ def embed(self, text: str) -> list[float]: raise NotImplementedError( f"The LLM {self.__str__()} does not support embeddings" ) + + +class ImageGenerationModel(LanguageModel, metaclass=ABCMeta): + """Abstract class for the llm image generation wrappers""" + + @classmethod + def __subclasshook__(cls, subclass): + return hasattr(subclass, "generate_images") and callable( + subclass.generate_images + ) + + @abstractmethod + def generate_images( + self, + prompt: str, + n: int = 1, + size: str = "256x256", + quality: str = "standard", + **kwargs, + ) -> list: + """Create an image from the prompt""" + raise NotImplementedError( + f"The LLM {self.__str__()} does not support image generation" + ) diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index 72dbb04e..bb19d9c6 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -1,26 +1,65 @@ +import base64 from datetime import datetime -from typing import Literal, Any +from typing import Literal, Any, Optional from ollama import Client, Message from ...common.message_converters import map_role_to_str, map_str_to_role +from ...domain.data.json_message_content_dto import JsonMessageContentDTO from ...domain.data.text_message_content_dto import TextMessageContentDTO +from ...domain.data.image_message_content_dto import ImageMessageContentDTO from ...domain import PyrisMessage from ...llm import CompletionArguments from ...llm.external.model import ChatModel, CompletionModel, EmbeddingModel +def convert_to_ollama_images(base64_images: list[str]) -> list[bytes] | None: + """ + Convert a list of base64 images to a list of bytes + """ + if not base64_images: + return None + return [base64.b64decode(base64_image) for base64_image in base64_images] + + def convert_to_ollama_messages(messages: list[PyrisMessage]) -> list[Message]: - return [ - Message( - role=map_role_to_str(message.sender), - content=message.contents[0].text_content, - ) - for message in messages - ] + """ + Convert a list of PyrisMessage to a list of Message + """ + messages_to_return = [] + for message in messages: + match message.contents[0]: + case ImageMessageContentDTO(): + messages_to_return.append( + Message( + role=map_role_to_str(message.sender), + content=message.contents[0].text_content, + images=message.contents[0].base64, + ) + ) + case TextMessageContentDTO(): + messages_to_return.append( + Message( + role=map_role_to_str(message.sender), + content=message.contents[0].text_content, + ) + ) + case JsonMessageContentDTO(): + messages_to_return.append( + Message( + role=map_role_to_str(message.sender), + content=message.contents[0].text_content, + ) + ) + case _: + continue + return messages_to_return def convert_to_iris_message(message: Message) -> PyrisMessage: + """ + Convert a Message to a PyrisMessage + """ contents = [TextMessageContentDTO(text_content=message["content"])] return PyrisMessage( sender=map_str_to_role(message["role"]), @@ -42,8 +81,15 @@ class OllamaModel( def model_post_init(self, __context: Any) -> None: self._client = Client(host=self.host) # TODO: Add authentication (httpx auth?) - def complete(self, prompt: str, arguments: CompletionArguments) -> str: - response = self._client.generate(model=self.model, prompt=prompt) + def complete( + self, + prompt: str, + arguments: CompletionArguments, + image: Optional[ImageMessageContentDTO] = None, + ) -> str: + response = self._client.generate( + model=self.model, prompt=prompt, images=image.base64 if image else None + ) return response["response"] def chat( diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index 450efdd7..022478d9 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -5,9 +5,11 @@ from openai.lib.azure import AzureOpenAI from openai.types.chat import ChatCompletionMessageParam, ChatCompletionMessage -from ...common.message_converters import map_role_to_str, map_str_to_role +from ...common.message_converters import map_str_to_role from app.domain.data.text_message_content_dto import TextMessageContentDTO from ...domain import PyrisMessage +from ...domain.data.image_message_content_dto import ImageMessageContentDTO +from ...domain.data.json_message_content_dto import JsonMessageContentDTO from ...llm import CompletionArguments from ...llm.external.model import ChatModel @@ -15,16 +17,45 @@ def convert_to_open_ai_messages( messages: list[PyrisMessage], ) -> list[ChatCompletionMessageParam]: - return [ - { - "role": map_role_to_str(message.sender), - "content": message.contents[0].text_content, - } - for message in messages - ] + """ + Convert a list of PyrisMessage to a list of ChatCompletionMessageParam + """ + openai_messages = [] + for message in messages: + match message.contents[0]: + case ImageMessageContentDTO(): + content = [{"type": "text", "text": message.contents[0].prompt}] + for image_base64 in message.contents[0].base64: + content.append( + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{image_base64}", + "detail": "high", + }, + } + ) + case TextMessageContentDTO(): + content = [{"type": "text", "text": message.contents[0].text_content}] + case JsonMessageContentDTO(): + content = [ + { + "type": "json_object", + "json_object": message.contents[0].json_content, + } + ] + case _: + content = [{"type": "text", "text": ""}] + + openai_message = {"role": message.sender.value, "content": content} + openai_messages.append(openai_message) + return openai_messages def convert_to_iris_message(message: ChatCompletionMessage) -> PyrisMessage: + """ + Convert a ChatCompletionMessage to a PyrisMessage + """ return PyrisMessage( sender=map_str_to_role(message.role), contents=[TextMessageContentDTO(textContent=message.content)], diff --git a/app/llm/external/openai_completion.py b/app/llm/external/openai_completion.py index 0a61ef97..97d6252f 100644 --- a/app/llm/external/openai_completion.py +++ b/app/llm/external/openai_completion.py @@ -2,7 +2,6 @@ from openai import OpenAI from openai.lib.azure import AzureOpenAI -from ...domain import PyrisImage from ...llm import CompletionArguments from ...llm.external.model import CompletionModel @@ -12,9 +11,7 @@ class OpenAICompletionModel(CompletionModel): api_key: str _client: OpenAI - def complete( - self, prompt: str, arguments: CompletionArguments, images: [PyrisImage] = None - ) -> any: + def complete(self, prompt: str, arguments: CompletionArguments) -> any: response = self._client.completions.create( model=self.model, prompt=prompt, diff --git a/app/llm/external/openai_dalle.py b/app/llm/external/openai_dalle.py index df863ffe..e8f9817c 100644 --- a/app/llm/external/openai_dalle.py +++ b/app/llm/external/openai_dalle.py @@ -1,32 +1,25 @@ import base64 -from datetime import datetime -from typing import Literal, Any +from typing import List, Literal import requests -from openai import OpenAI -from ...domain.pyris_image import PyrisImage -from ...llm.external.model import ImageGenerationModel - - -class OpenAIDalleWrapper(ImageGenerationModel): - type: Literal["openai_dalle"] - model: str - _client: OpenAI - - def model_post_init(self, __context: Any) -> None: - self._client = OpenAI(api_key=self.api_key) - - def generate_images( - self, - prompt: str, - n: int = 1, - size: Literal[ - "256x256", "512x512", "1024x1024", "1792x1024", "1024x1792" - ] = "256x256", - quality: Literal["standard", "hd"] = "standard", - **kwargs - ) -> [PyrisImage]: +from app.domain.data.image_message_content_dto import ImageMessageContentDTO + + +def generate_images( + self, + prompt: str, + n: int = 1, + size: Literal[ + "256x256", "512x512", "1024x1024", "1792x1024", "1024x1792" + ] = "256x256", + quality: Literal["standard", "hd"] = "standard", + **kwargs, +) -> List[ImageMessageContentDTO]: + """ + Generate images from the prompt. + """ + try: response = self._client.images.generate( model=self.model, prompt=prompt, @@ -34,27 +27,33 @@ def generate_images( quality=quality, n=n, response_format="url", - **kwargs + **kwargs, ) - - images = response.data - iris_images = [] - for image in images: - if image.revised_prompt is None: - image.revised_prompt = prompt - if image.b64_json is None: + except Exception as e: + print(f"Failed to generate images: {e}") + return [] + + images = response.data + iris_images = [] + for image in images: + revised_prompt = ( + prompt if image.revised_prompt is None else image.revised_prompt + ) + base64_data = image.b64_json + if base64_data is None: + try: image_response = requests.get(image.url) - image.b64_json = base64.b64encode(image_response.content).decode( - "utf-8" - ) - - iris_images.append( - PyrisImage( - prompt=image.revised_prompt, - base64=image.b64_json, - timestamp=datetime.fromtimestamp(response.created), - raw_data=image, - ) + image_response.raise_for_status() + base64_data = base64.b64encode(image_response.content).decode("utf-8") + except requests.RequestException as e: + print(f"Failed to download or encode image: {e}") + continue + + iris_images.append( + ImageMessageContentDTO( + prompt=revised_prompt, + base64=base64_data, ) + ) - return iris_images + return iris_images diff --git a/app/llm/request_handler/basic_request_handler.py b/app/llm/request_handler/basic_request_handler.py index dc07d545..5756346f 100644 --- a/app/llm/request_handler/basic_request_handler.py +++ b/app/llm/request_handler/basic_request_handler.py @@ -1,4 +1,7 @@ +from typing import Optional + from app.domain import PyrisMessage +from app.domain.data.image_message_content_dto import ImageMessageContentDTO from app.llm.request_handler import RequestHandler from app.llm.completion_arguments import CompletionArguments from app.llm.llm_manager import LlmManager @@ -12,9 +15,14 @@ def __init__(self, model_id: str): self.model_id = model_id self.llm_manager = LlmManager() - def complete(self, prompt: str, arguments: CompletionArguments) -> str: + def complete( + self, + prompt: str, + arguments: CompletionArguments, + image: Optional[ImageMessageContentDTO] = None, + ) -> str: llm = self.llm_manager.get_llm_by_id(self.model_id) - return llm.complete(prompt, arguments) + return llm.complete(prompt, arguments, image) def chat( self, messages: list[PyrisMessage], arguments: CompletionArguments diff --git a/app/llm/request_handler/request_handler_interface.py b/app/llm/request_handler/request_handler_interface.py index 4acdbe6d..390a4cbc 100644 --- a/app/llm/request_handler/request_handler_interface.py +++ b/app/llm/request_handler/request_handler_interface.py @@ -1,6 +1,8 @@ from abc import ABCMeta, abstractmethod +from typing import Optional from ...domain import PyrisMessage +from ...domain.data.image_message_content_dto import ImageMessageContentDTO from ...llm import CompletionArguments @@ -19,7 +21,12 @@ def __subclasshook__(cls, subclass) -> bool: ) @abstractmethod - def complete(self, prompt: str, arguments: CompletionArguments) -> str: + def complete( + self, + prompt: str, + arguments: CompletionArguments, + image: Optional[ImageMessageContentDTO] = None, + ) -> str: """Create a completion from the prompt""" raise NotImplementedError From fe76c805de8e215641242fea78edecdf6b53c1b0 Mon Sep 17 00:00:00 2001 From: Yassine Souissi Date: Fri, 26 Apr 2024 20:37:59 +0200 Subject: [PATCH 09/15] Image interpretation tested works fine --- app/llm/external/openai_chat.py | 5 ++--- app/pipeline/chat/tutor_chat_pipeline.py | 8 +++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index 022478d9..d8c0af67 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -5,7 +5,7 @@ from openai.lib.azure import AzureOpenAI from openai.types.chat import ChatCompletionMessageParam, ChatCompletionMessage -from ...common.message_converters import map_str_to_role +from ...common.message_converters import map_str_to_role, map_role_to_str from app.domain.data.text_message_content_dto import TextMessageContentDTO from ...domain import PyrisMessage from ...domain.data.image_message_content_dto import ImageMessageContentDTO @@ -47,7 +47,7 @@ def convert_to_open_ai_messages( case _: content = [{"type": "text", "text": ""}] - openai_message = {"role": message.sender.value, "content": content} + openai_message = {"role": map_role_to_str(message.sender), "content": content} openai_messages.append(openai_message) return openai_messages @@ -76,7 +76,6 @@ def chat( messages=convert_to_open_ai_messages(messages), temperature=arguments.temperature, max_tokens=arguments.max_tokens, - stop=arguments.stop, ) return convert_to_iris_message(response.choices[0].message) diff --git a/app/pipeline/chat/tutor_chat_pipeline.py b/app/pipeline/chat/tutor_chat_pipeline.py index ed3e9347..51122770 100644 --- a/app/pipeline/chat/tutor_chat_pipeline.py +++ b/app/pipeline/chat/tutor_chat_pipeline.py @@ -1,3 +1,4 @@ +import base64 import logging from typing import List, Dict @@ -9,10 +10,11 @@ AIMessagePromptTemplate, ) from langchain_core.runnables import Runnable +from ...domain.data.image_message_content_dto import ImageMessageContentDTO from ...common import convert_iris_message_to_langchain_message -from ...domain import PyrisMessage -from ...llm import CapabilityRequestHandler, RequirementList +from ...domain import PyrisMessage, IrisMessageRole +from ...llm import CapabilityRequestHandler, RequirementList, BasicRequestHandler from ...domain.data.build_log_entry import BuildLogEntryDTO from ...domain.data.feedback_dto import FeedbackDTO from ..prompts.iris_tutor_chat_prompts import ( @@ -32,7 +34,6 @@ logger = logging.getLogger(__name__) - class TutorChatPipeline(Pipeline): """Tutor chat pipeline that answers exercises related questions from students.""" @@ -74,6 +75,7 @@ def __call__(self, dto: TutorChatPipelineExecutionDTO, **kwargs): :param dto: The pipeline execution data transfer object :param kwargs: The keyword arguments """ + # Set up the initial prompt self.prompt = ChatPromptTemplate.from_messages( [ From ec964c374f88636beb94ebadf1f14de073bf4a7b Mon Sep 17 00:00:00 2001 From: Yassine Souissi Date: Fri, 26 Apr 2024 20:39:39 +0200 Subject: [PATCH 10/15] Black --- app/pipeline/chat/tutor_chat_pipeline.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/app/pipeline/chat/tutor_chat_pipeline.py b/app/pipeline/chat/tutor_chat_pipeline.py index 51122770..5f36b1b8 100644 --- a/app/pipeline/chat/tutor_chat_pipeline.py +++ b/app/pipeline/chat/tutor_chat_pipeline.py @@ -1,4 +1,3 @@ -import base64 import logging from typing import List, Dict @@ -10,11 +9,10 @@ AIMessagePromptTemplate, ) from langchain_core.runnables import Runnable -from ...domain.data.image_message_content_dto import ImageMessageContentDTO from ...common import convert_iris_message_to_langchain_message -from ...domain import PyrisMessage, IrisMessageRole -from ...llm import CapabilityRequestHandler, RequirementList, BasicRequestHandler +from ...domain import PyrisMessage +from ...llm import CapabilityRequestHandler, RequirementList from ...domain.data.build_log_entry import BuildLogEntryDTO from ...domain.data.feedback_dto import FeedbackDTO from ..prompts.iris_tutor_chat_prompts import ( @@ -34,6 +32,7 @@ logger = logging.getLogger(__name__) + class TutorChatPipeline(Pipeline): """Tutor chat pipeline that answers exercises related questions from students.""" From 001b99d118c4fcb244c2c0a64f53a179de088166 Mon Sep 17 00:00:00 2001 From: Timor Morrien Date: Sat, 27 Apr 2024 03:52:32 +0200 Subject: [PATCH 11/15] Improve & fix image recognition --- app/domain/data/image_message_content_dto.py | 1 + app/domain/data/json_message_content_dto.py | 8 +-- app/domain/data/text_message_content_dto.py | 8 +-- app/domain/pyris_message.py | 4 +- app/llm/external/ollama.py | 53 ++++++++++---------- app/llm/external/openai_chat.py | 52 ++++++++++--------- 6 files changed, 69 insertions(+), 57 deletions(-) diff --git a/app/domain/data/image_message_content_dto.py b/app/domain/data/image_message_content_dto.py index 43360b7c..82893733 100644 --- a/app/domain/data/image_message_content_dto.py +++ b/app/domain/data/image_message_content_dto.py @@ -13,3 +13,4 @@ class Config: "base64": ["base64EncodedString==", "anotherBase64EncodedString=="], } } + populate_by_name = True diff --git a/app/domain/data/json_message_content_dto.py b/app/domain/data/json_message_content_dto.py index 73a0d7cb..cd4ccfcb 100644 --- a/app/domain/data/json_message_content_dto.py +++ b/app/domain/data/json_message_content_dto.py @@ -1,6 +1,8 @@ -from pydantic import BaseModel, Field, Json -from typing import Any, Optional +from pydantic import BaseModel, ConfigDict, Field, Json +from typing import Any class JsonMessageContentDTO(BaseModel): - json_content: Optional[Json[Any]] = Field(alias="jsonContent", default=None) + model_config = ConfigDict(populate_by_name=True) + + json_content: Json[Any] = Field(alias="jsonContent") diff --git a/app/domain/data/text_message_content_dto.py b/app/domain/data/text_message_content_dto.py index b7ece8f9..9442dbd3 100644 --- a/app/domain/data/text_message_content_dto.py +++ b/app/domain/data/text_message_content_dto.py @@ -1,7 +1,7 @@ -from typing import Optional - -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field class TextMessageContentDTO(BaseModel): - text_content: Optional[str] = Field(alias="textContent", default=None) + model_config = ConfigDict(populate_by_name=True) + + text_content: str = Field(alias="textContent") diff --git a/app/domain/pyris_message.py b/app/domain/pyris_message.py index 5f44cd9d..056f77ef 100644 --- a/app/domain/pyris_message.py +++ b/app/domain/pyris_message.py @@ -2,7 +2,7 @@ from enum import Enum from typing import List -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from app.domain.data.message_content_dto import MessageContentDTO @@ -14,6 +14,8 @@ class IrisMessageRole(str, Enum): class PyrisMessage(BaseModel): + model_config = ConfigDict(populate_by_name=True) + sent_at: datetime | None = Field(alias="sentAt", default=None) sender: IrisMessageRole contents: List[MessageContentDTO] = [] diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index bb19d9c6..91d32746 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -24,35 +24,36 @@ def convert_to_ollama_images(base64_images: list[str]) -> list[bytes] | None: def convert_to_ollama_messages(messages: list[PyrisMessage]) -> list[Message]: """ - Convert a list of PyrisMessage to a list of Message + Convert a list of PyrisMessages to a list of Ollama Messages """ messages_to_return = [] for message in messages: - match message.contents[0]: - case ImageMessageContentDTO(): - messages_to_return.append( - Message( - role=map_role_to_str(message.sender), - content=message.contents[0].text_content, - images=message.contents[0].base64, - ) - ) - case TextMessageContentDTO(): - messages_to_return.append( - Message( - role=map_role_to_str(message.sender), - content=message.contents[0].text_content, - ) - ) - case JsonMessageContentDTO(): - messages_to_return.append( - Message( - role=map_role_to_str(message.sender), - content=message.contents[0].text_content, - ) - ) - case _: - continue + if len(message.contents) == 0: + continue + text_content = "" + images = [] + for content in message.contents: + match content: + case ImageMessageContentDTO(): + for image in content.base64: + images.append(image) + case TextMessageContentDTO(): + if len(text_content) > 0: + text_content += "\n" + text_content += content.text_content + case JsonMessageContentDTO(): + if len(text_content) > 0: + text_content += "\n" + text_content += content.json_content + case _: + continue + messages_to_return.append( + Message( + role=map_role_to_str(message.sender), + content=text_content, + images=convert_to_ollama_images(images), + ) + ) return messages_to_return diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index d8c0af67..bd5b06bb 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -22,32 +22,38 @@ def convert_to_open_ai_messages( """ openai_messages = [] for message in messages: - match message.contents[0]: - case ImageMessageContentDTO(): - content = [{"type": "text", "text": message.contents[0].prompt}] - for image_base64 in message.contents[0].base64: - content.append( + openai_content = [] + for content in message.contents: + match content: + case ImageMessageContentDTO(): + for image_base64 in content.base64: + openai_content.append( + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{image_base64}", + "detail": "high", + }, + } + ) + case TextMessageContentDTO(): + openai_content.append( + {"type": "text", "text": content.text_content} + ) + case JsonMessageContentDTO(): + openai_content.append( { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{image_base64}", - "detail": "high", - }, + "type": "json_object", + "json_object": content.json_content, } ) - case TextMessageContentDTO(): - content = [{"type": "text", "text": message.contents[0].text_content}] - case JsonMessageContentDTO(): - content = [ - { - "type": "json_object", - "json_object": message.contents[0].json_content, - } - ] - case _: - content = [{"type": "text", "text": ""}] - - openai_message = {"role": map_role_to_str(message.sender), "content": content} + case _: + pass + + openai_message = { + "role": map_role_to_str(message.sender), + "content": openai_content, + } openai_messages.append(openai_message) return openai_messages From 6cf2f4ec9ac54cdd3823502cc14ce027d1c6e7d2 Mon Sep 17 00:00:00 2001 From: Yassine Souissi <74144843+yassinsws@users.noreply.github.com> Date: Sat, 27 Apr 2024 17:24:58 +0200 Subject: [PATCH 12/15] Update image_message_content_dto.py --- app/domain/data/image_message_content_dto.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/app/domain/data/image_message_content_dto.py b/app/domain/data/image_message_content_dto.py index 82893733..9415de04 100644 --- a/app/domain/data/image_message_content_dto.py +++ b/app/domain/data/image_message_content_dto.py @@ -3,14 +3,5 @@ class ImageMessageContentDTO(BaseModel): - base64: List[str] = Field(..., alias="base64") # List of base64-encoded strings - prompt: Optional[str] = Field(default=None, alias="prompt") - - class Config: - json_schema_extra = { - "example": { - "prompt": "Example prompt", - "base64": ["base64EncodedString==", "anotherBase64EncodedString=="], - } - } - populate_by_name = True + base64: List[str] + prompt: Optional[str] From 6fd7d264993f02c8860ca0df6aa8df01daa03ac7 Mon Sep 17 00:00:00 2001 From: Yassine Souissi Date: Sat, 27 Apr 2024 22:11:19 +0200 Subject: [PATCH 13/15] Black --- app/domain/data/image_message_content_dto.py | 6 +++--- app/llm/external/openai_dalle.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app/domain/data/image_message_content_dto.py b/app/domain/data/image_message_content_dto.py index 9415de04..e3662a29 100644 --- a/app/domain/data/image_message_content_dto.py +++ b/app/domain/data/image_message_content_dto.py @@ -1,7 +1,7 @@ -from pydantic import BaseModel, Field +from pydantic import BaseModel from typing import List, Optional class ImageMessageContentDTO(BaseModel): - base64: List[str] - prompt: Optional[str] + base64: List[str] + prompt: Optional[str] diff --git a/app/llm/external/openai_dalle.py b/app/llm/external/openai_dalle.py index e8f9817c..5ea74a10 100644 --- a/app/llm/external/openai_dalle.py +++ b/app/llm/external/openai_dalle.py @@ -52,7 +52,7 @@ def generate_images( iris_images.append( ImageMessageContentDTO( prompt=revised_prompt, - base64=base64_data, + base64=[base64_data], ) ) From 2c4e2ea459d54327b726a8de56daece7cf6d6f26 Mon Sep 17 00:00:00 2001 From: Yassine Souissi Date: Sun, 28 Apr 2024 20:14:41 +0200 Subject: [PATCH 14/15] Changed [base64] to base64 in imageContentDTO --- app/domain/data/image_message_content_dto.py | 4 +-- app/llm/external/ollama.py | 5 ++- app/llm/external/openai_chat.py | 34 +++++++++++++------- app/llm/external/openai_dalle.py | 2 +- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/app/domain/data/image_message_content_dto.py b/app/domain/data/image_message_content_dto.py index e3662a29..a73e2654 100644 --- a/app/domain/data/image_message_content_dto.py +++ b/app/domain/data/image_message_content_dto.py @@ -1,7 +1,7 @@ from pydantic import BaseModel -from typing import List, Optional +from typing import Optional class ImageMessageContentDTO(BaseModel): - base64: List[str] + base64: str prompt: Optional[str] diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index 91d32746..f2363b23 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -35,8 +35,7 @@ def convert_to_ollama_messages(messages: list[PyrisMessage]) -> list[Message]: for content in message.contents: match content: case ImageMessageContentDTO(): - for image in content.base64: - images.append(image) + images.append(content.base64) case TextMessageContentDTO(): if len(text_content) > 0: text_content += "\n" @@ -89,7 +88,7 @@ def complete( image: Optional[ImageMessageContentDTO] = None, ) -> str: response = self._client.generate( - model=self.model, prompt=prompt, images=image.base64 if image else None + model=self.model, prompt=prompt, images=[image.base64] if image else None ) return response["response"] diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index bd5b06bb..ab386516 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -3,7 +3,8 @@ from openai import OpenAI from openai.lib.azure import AzureOpenAI -from openai.types.chat import ChatCompletionMessageParam, ChatCompletionMessage +from openai.types.chat import ChatCompletionMessage +from pydantic import Json from ...common.message_converters import map_str_to_role, map_role_to_str from app.domain.data.text_message_content_dto import TextMessageContentDTO @@ -16,7 +17,17 @@ def convert_to_open_ai_messages( messages: list[PyrisMessage], -) -> list[ChatCompletionMessageParam]: +) -> list[ + dict[ + str, + Literal["user", "assistant", "system"] + | list[ + dict[str, str | dict[str, str]] + | dict[str, str] + | dict[str, str | Json | Any] + ], + ] +]: """ Convert a list of PyrisMessage to a list of ChatCompletionMessageParam """ @@ -26,16 +37,15 @@ def convert_to_open_ai_messages( for content in message.contents: match content: case ImageMessageContentDTO(): - for image_base64 in content.base64: - openai_content.append( - { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{image_base64}", - "detail": "high", - }, - } - ) + openai_content.append( + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{content.base64}", + "detail": "high", + }, + } + ) case TextMessageContentDTO(): openai_content.append( {"type": "text", "text": content.text_content} diff --git a/app/llm/external/openai_dalle.py b/app/llm/external/openai_dalle.py index 5ea74a10..e8f9817c 100644 --- a/app/llm/external/openai_dalle.py +++ b/app/llm/external/openai_dalle.py @@ -52,7 +52,7 @@ def generate_images( iris_images.append( ImageMessageContentDTO( prompt=revised_prompt, - base64=[base64_data], + base64=base64_data, ) ) From 272b01c0d227037028d779dd6eb24d959880ab9d Mon Sep 17 00:00:00 2001 From: Timor Morrien Date: Mon, 29 Apr 2024 12:53:42 +0200 Subject: [PATCH 15/15] Improve type definition --- app/llm/external/openai_chat.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index ab386516..894b3b18 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -3,8 +3,7 @@ from openai import OpenAI from openai.lib.azure import AzureOpenAI -from openai.types.chat import ChatCompletionMessage -from pydantic import Json +from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageParam from ...common.message_converters import map_str_to_role, map_role_to_str from app.domain.data.text_message_content_dto import TextMessageContentDTO @@ -17,17 +16,7 @@ def convert_to_open_ai_messages( messages: list[PyrisMessage], -) -> list[ - dict[ - str, - Literal["user", "assistant", "system"] - | list[ - dict[str, str | dict[str, str]] - | dict[str, str] - | dict[str, str | Json | Any] - ], - ] -]: +) -> list[ChatCompletionMessageParam]: """ Convert a list of PyrisMessage to a list of ChatCompletionMessageParam """