Skip to content

Commit

Permalink
Working vision example
Browse files Browse the repository at this point in the history
  • Loading branch information
chadbailey59 committed Mar 19, 2024
1 parent 6c9425d commit 0b4b63d
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 21 deletions.
8 changes: 4 additions & 4 deletions src/dailyai/pipeline/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,14 @@ class VideoImageFrame(Frame):
participantId: str
image: bytes

def __str__(self):
return f"{self.__class__.__name__}, participantId: {self.participantId}, image size: {len(self.image)} B"
# def __str__(self):
# return f"{self.__class__.__name__}, participantId: {self.participantId}, image size: {len(self.image)} B"


@dataclass()
class VisionFrame(Frame):
prompt: str
image: bytes

def __str__(self):
return f"{self.__class__.__name__}, prompt: {self.prompt}, image size: {len(self.image)} B"
# def __str__(self):
# return f"{self.__class__.__name__}, prompt: {self.prompt}, image size: {len(self.image)} B"
2 changes: 2 additions & 0 deletions src/dailyai/services/ai_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
if isinstance(frame, VisionFrame):
async for frame in self.run_vision(frame.prompt, frame.image):
yield frame
else:
yield frame


class FrameLogger(AIService):
Expand Down
8 changes: 3 additions & 5 deletions src/dailyai/services/daily_transport_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,14 +230,12 @@ def _post_run(self):
self.client.release()

def _handle_video_frame(self, participant_id, video_frame):
# TODO-CB: What about multiple participants?
if (not participant_id in self._participant_frame_times) or (time.time() > self._participant_frame_times[participant_id] + 1.0/self._receive_video_fps):
print(f"### sending frame now")
self._participant_frame_times[participant_id] = time.time()
asyncio.run_coroutine_threadsafe(

future = asyncio.run_coroutine_threadsafe(
self.receive_queue.put(
VideoImageFrame(participant_id, video_frame)), self._loop
)
VideoImageFrame(participant_id, video_frame)), self._loop)

def on_first_other_participant_joined(self):
pass
Expand Down
1 change: 1 addition & 0 deletions src/dailyai/services/elevenlabs_ai_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]:
"xi-api-key": self._api_key,
"Content-Type": "application/json",
}

async with self._aiohttp_session.post(
url, json=payload, headers=headers, params=querystring
) as r:
Expand Down
25 changes: 22 additions & 3 deletions src/dailyai/services/open_ai_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,24 @@ def __init__(
self._client = AsyncOpenAI(api_key=api_key)

async def run_vision(self, prompt: str, image: bytes):
base64_image = base64.b64encode(image).decode('utf-8')
IMAGE_WIDTH = image.width
IMAGE_HEIGHT = image.height
COLOR_FORMAT = image.color_format
a_image = Image.frombytes(
'RGBA', (IMAGE_WIDTH, IMAGE_HEIGHT), image.buffer)
new_image = a_image.convert('RGB')

# Uncomment these lines to write the frame to a jpg in the same directory.
# current_path = os.getcwd()
# image_path = os.path.join(current_path, "image.jpg")
# image.save(image_path, format="JPEG")

jpeg_buffer = io.BytesIO()

new_image.save(jpeg_buffer, format='JPEG')

jpeg_bytes = jpeg_buffer.getvalue()
base64_image = base64.b64encode(jpeg_bytes).decode('utf-8')
messages = [
{
"role": "user",
Expand All @@ -94,5 +111,7 @@ async def run_vision(self, prompt: str, image: bytes):
)
)
async for chunk in chunks:
print(f"!!! chunk: {chunk}")
yield TextFrame(chunk)
if len(chunk.choices) == 0:
continue
if chunk.choices[0].delta.content:
yield TextFrame(chunk.choices[0].delta.content)
10 changes: 2 additions & 8 deletions src/examples/foundational/12-describe-video.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from dailyai.services.daily_transport_service import DailyTransportService
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
from dailyai.services.open_ai_services import OpenAILLMService, OpenAIVisionService
from dailyai.services.deepgram_ai_services import DeepgramTTSService
from dailyai.services.ai_services import FrameLogger
from dailyai.pipeline.aggregators import (
LLMAssistantContextAggregator,
Expand Down Expand Up @@ -59,10 +60,7 @@ async def main(room_url: str, token):
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
model="gpt-4-turbo-preview")
fl = FrameLogger("!!! before VIFP")
fl2 = FrameLogger("Outer")
fl3 = FrameLogger("### Before VS")
fl4 = FrameLogger("$$$ After VS")

messages = [
{
"role": "system",
Expand All @@ -80,13 +78,9 @@ async def main(room_url: str, token):
vifp = VideoImageFrameProcessor()
pipeline = Pipeline(
processors=[
fl,
vifp,
fl3,
vs,
fl4,
llm,
fl2,
tts,
tma_out,
],
Expand Down
1 change: 0 additions & 1 deletion src/examples/starter-apps/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ async def main(room_url: str, token):

@transport.event_handler("on_first_other_participant_joined")
async def on_first_other_participant_joined(transport):
print(f"!!! in here, pipeline.source is {pipeline.source}")
await pipeline.queue_frames([LLMMessagesQueueFrame(messages)])

async def run_conversation():
Expand Down

0 comments on commit 0b4b63d

Please sign in to comment.