diff --git a/CHANGELOG.md b/CHANGELOG.md index f96a80275..3b2d777d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Images are now resized to the size of the output camera. + - Fixed an issue in `DailyTransport` that would not allow the processor to shutdown if no participant ever joined the room. diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 27f85a182..1278a7de1 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -55,7 +55,7 @@ class ImageRawFrame(DataFrame): """ image: bytes size: Tuple[int, int] - format: str + format: str | None def __str__(self): return f"{self.name}(size: {self.size}, format: {self.format})" diff --git a/src/pipecat/services/moondream.py b/src/pipecat/services/moondream.py index 444dea1ed..508b8d24f 100644 --- a/src/pipecat/services/moondream.py +++ b/src/pipecat/services/moondream.py @@ -78,7 +78,7 @@ async def run_vision(self, frame: VisionImageRawFrame) -> AsyncGenerator[Frame, logger.debug(f"Analyzing image: {frame}") def get_image_description(frame: VisionImageRawFrame): - image = Image.frombytes(frame.format, (frame.size[0], frame.size[1]), frame.image) + image = Image.frombytes(frame.format, frame.size, frame.image) image_embeds = self._model.encode_image(image) description = self._model.answer_question( image_embeds=image_embeds, diff --git a/src/pipecat/transports/base_output.py b/src/pipecat/transports/base_output.py index c9e3678b1..1a3246fa6 100644 --- a/src/pipecat/transports/base_output.py +++ b/src/pipecat/transports/base_output.py @@ -7,9 +7,11 @@ import asyncio import itertools +from multiprocessing.context import _force_start_method import queue import time +from PIL import Image from typing import List from pipecat.processors.frame_processor import FrameDirection, FrameProcessor @@ -149,15 +151,17 @@ def _sink_thread_handler(self): async def send_image(self, frame: ImageRawFrame | SpriteFrame): await self.process_frame(frame, FrameDirection.DOWNSTREAM) - def _draw_image(self, image: ImageRawFrame): + def _draw_image(self, frame: ImageRawFrame): desired_size = (self._params.camera_out_width, self._params.camera_out_height) - if image.size != desired_size: + if frame.size != desired_size: + image = Image.frombytes(frame.format, frame.size, frame.image) + resized_image = image.resize(desired_size) logger.warning( - f"{image} does not have the expected size {desired_size}, ignoring") - return + f"{frame} does not have the expected size {desired_size}, resizing") + frame = ImageRawFrame(resized_image.tobytes(), resized_image.size, resized_image.format) - self.write_frame_to_camera(image) + self.write_frame_to_camera(frame) def _set_camera_image(self, image: ImageRawFrame): if self._params.camera_out_is_live: