Skip to content

Commit

Permalink
transports: resize output image if it doesn't match camera
Browse files Browse the repository at this point in the history
  • Loading branch information
aconchillo committed May 15, 2024
1 parent 349fc52 commit 3d6783d
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 7 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- Images are now resized to the size of the output camera.

- Fixed an issue in `DailyTransport` that would not allow the processor to
shutdown if no participant ever joined the room.

Expand Down
2 changes: 1 addition & 1 deletion src/pipecat/frames/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class ImageRawFrame(DataFrame):
"""
image: bytes
size: Tuple[int, int]
format: str
format: str | None

def __str__(self):
return f"{self.name}(size: {self.size}, format: {self.format})"
Expand Down
2 changes: 1 addition & 1 deletion src/pipecat/services/moondream.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ async def run_vision(self, frame: VisionImageRawFrame) -> AsyncGenerator[Frame,
logger.debug(f"Analyzing image: {frame}")

def get_image_description(frame: VisionImageRawFrame):
image = Image.frombytes(frame.format, (frame.size[0], frame.size[1]), frame.image)
image = Image.frombytes(frame.format, frame.size, frame.image)
image_embeds = self._model.encode_image(image)
description = self._model.answer_question(
image_embeds=image_embeds,
Expand Down
14 changes: 9 additions & 5 deletions src/pipecat/transports/base_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

import asyncio
import itertools
from multiprocessing.context import _force_start_method
import queue
import time

from PIL import Image
from typing import List

from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
Expand Down Expand Up @@ -149,15 +151,17 @@ def _sink_thread_handler(self):
async def send_image(self, frame: ImageRawFrame | SpriteFrame):
await self.process_frame(frame, FrameDirection.DOWNSTREAM)

def _draw_image(self, image: ImageRawFrame):
def _draw_image(self, frame: ImageRawFrame):
desired_size = (self._params.camera_out_width, self._params.camera_out_height)

if image.size != desired_size:
if frame.size != desired_size:
image = Image.frombytes(frame.format, frame.size, frame.image)
resized_image = image.resize(desired_size)
logger.warning(
f"{image} does not have the expected size {desired_size}, ignoring")
return
f"{frame} does not have the expected size {desired_size}, resizing")
frame = ImageRawFrame(resized_image.tobytes(), resized_image.size, resized_image.format)

self.write_frame_to_camera(image)
self.write_frame_to_camera(frame)

def _set_camera_image(self, image: ImageRawFrame):
if self._params.camera_out_is_live:
Expand Down

0 comments on commit 3d6783d

Please sign in to comment.