transports: resize output image if it doesn't match camera

pipecat-ai · May 15, 2024 · 3d6783d · 3d6783d
1 parent 349fc52
commit 3d6783d
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 7 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- Images are now resized to the size of the output camera.
+
 - Fixed an issue in `DailyTransport` that would not allow the processor to
   shutdown if no participant ever joined the room.
 

diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py
@@ -55,7 +55,7 @@ class ImageRawFrame(DataFrame):
     """
     image: bytes
     size: Tuple[int, int]
-    format: str
+    format: str | None
 
     def __str__(self):
         return f"{self.name}(size: {self.size}, format: {self.format})"

diff --git a/src/pipecat/services/moondream.py b/src/pipecat/services/moondream.py
@@ -78,7 +78,7 @@ async def run_vision(self, frame: VisionImageRawFrame) -> AsyncGenerator[Frame,
         logger.debug(f"Analyzing image: {frame}")
 
         def get_image_description(frame: VisionImageRawFrame):
-            image = Image.frombytes(frame.format, (frame.size[0], frame.size[1]), frame.image)
+            image = Image.frombytes(frame.format, frame.size, frame.image)
             image_embeds = self._model.encode_image(image)
             description = self._model.answer_question(
                 image_embeds=image_embeds,

diff --git a/src/pipecat/transports/base_output.py b/src/pipecat/transports/base_output.py
@@ -7,9 +7,11 @@
 
 import asyncio
 import itertools
+from multiprocessing.context import _force_start_method
 import queue
 import time
 
+from PIL import Image
 from typing import List
 
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -149,15 +151,17 @@ def _sink_thread_handler(self):
     async def send_image(self, frame: ImageRawFrame | SpriteFrame):
         await self.process_frame(frame, FrameDirection.DOWNSTREAM)
 
-    def _draw_image(self, image: ImageRawFrame):
+    def _draw_image(self, frame: ImageRawFrame):
         desired_size = (self._params.camera_out_width, self._params.camera_out_height)
 
-        if image.size != desired_size:
+        if frame.size != desired_size:
+            image = Image.frombytes(frame.format, frame.size, frame.image)
+            resized_image = image.resize(desired_size)
             logger.warning(
-                f"{image} does not have the expected size {desired_size}, ignoring")
-            return
+                f"{frame} does not have the expected size {desired_size}, resizing")
+            frame = ImageRawFrame(resized_image.tobytes(), resized_image.size, resized_image.format)
 
-        self.write_frame_to_camera(image)
+        self.write_frame_to_camera(frame)
 
     def _set_camera_image(self, image: ImageRawFrame):
         if self._params.camera_out_is_live: