Skip to content

Commit

Permalink
Merge pull request #115 from daily-co/add-vision-and-moondream-service
Browse files Browse the repository at this point in the history
add vision and moondream service
  • Loading branch information
aconchillo authored Apr 10, 2024
2 parents 0219230 + 3c20f91 commit 26d401e
Show file tree
Hide file tree
Showing 13 changed files with 322 additions and 24 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ Currently implemented services:
- Transport
- Daily
- Local (in progress, intended as a quick start example service)
- Vision
- Moondream

If you'd like to [implement a service]((https://github.com/daily-co/daily-ai-sdk/tree/main/src/dailyai/services)), we welcome PRs! Our goal is to support lots of services in all of the above categories, plus new categories (like real-time video) as they emerge.

Expand All @@ -63,7 +65,7 @@ pip install "dailyai[option,...]"

Your project may or may not need these, so they're made available as optional requirements. Here is a list:

- **AI services**: `anthropic`, `azure`, `fal`, `openai`, `playht`, `silero`, `whisper`
- **AI services**: `anthropic`, `azure`, `fal`, `moondream`, `openai`, `playht`, `silero`, `whisper`
- **Transports**: `daily`, `local`, `websocket`

## Code examples
Expand Down
84 changes: 84 additions & 0 deletions examples/foundational/12-describe-video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import asyncio
import aiohttp
import logging
import os

from typing import AsyncGenerator

from dailyai.pipeline.aggregators import FrameProcessor, UserResponseAggregator, VisionImageFrameAggregator

from dailyai.pipeline.frames import Frame, TextFrame, UserImageRequestFrame
from dailyai.pipeline.pipeline import Pipeline
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
from dailyai.services.moondream_ai_service import MoondreamService
from dailyai.transports.daily_transport import DailyTransport

from runner import configure

from dotenv import load_dotenv
load_dotenv(override=True)

logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
logger = logging.getLogger("dailyai")
logger.setLevel(logging.DEBUG)


class UserImageRequester(FrameProcessor):
participant_id: str

def set_participant_id(self, participant_id: str):
self.participant_id = participant_id

async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
if self.participant_id and isinstance(frame, TextFrame):
yield UserImageRequestFrame(self.participant_id)
yield frame


async def main(room_url: str, token):
async with aiohttp.ClientSession() as session:
transport = DailyTransport(
room_url,
token,
"Describe participant video",
duration_minutes=5,
mic_enabled=True,
mic_sample_rate=16000,
vad_enabled=True,
start_transcription=True,
video_rendering_enabled=True
)

tts = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
)

user_response = UserResponseAggregator()

image_requester = UserImageRequester()

vision_aggregator = VisionImageFrameAggregator()

moondream = MoondreamService()

tts = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
)

@transport.event_handler("on_first_other_participant_joined")
async def on_first_other_participant_joined(transport, participant):
await transport.say("Hi there! Feel free to ask me what I see.", tts)
transport.render_participant_video(participant["id"], framerate=0)
image_requester.set_participant_id(participant["id"])

pipeline = Pipeline([user_response, image_requester, vision_aggregator, moondream, tts])

await transport.run(pipeline)

if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))
1 change: 0 additions & 1 deletion examples/foundational/14-render-remote-participant.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import asyncio
import io
import logging

from typing import AsyncGenerator
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import asyncio
import io
import logging
import tkinter as tk

Expand Down
32 changes: 30 additions & 2 deletions linux-py3.10-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ cryptography==42.0.5
# via pyjwt
ctranslate2==4.1.0
# via faster-whisper
daily-python==0.7.2
daily-python==0.7.3
# via dailyai (pyproject.toml)
deprecated==1.2.14
# via opentelemetry-api
Expand All @@ -62,6 +62,8 @@ distro==1.9.0
# via
# anthropic
# openai
einops==0.7.0
# via dailyai (pyproject.toml)
exceptiongroup==1.2.0
# via anyio
fal==0.12.7
Expand All @@ -70,11 +72,12 @@ fastapi==0.99.1
# via fal
faster-whisper==1.0.1
# via dailyai (pyproject.toml)
filelock==3.13.3
filelock==3.13.4
# via
# huggingface-hub
# pyht
# torch
# transformers
# triton
# virtualenv
flask==3.0.3
Expand Down Expand Up @@ -114,7 +117,9 @@ httpx==0.27.0
huggingface-hub==0.22.2
# via
# faster-whisper
# timm
# tokenizers
# transformers
humanfriendly==10.0
# via coloredlogs
idna==3.6
Expand Down Expand Up @@ -160,6 +165,8 @@ numpy==1.26.4
# ctranslate2
# dailyai (pyproject.toml)
# onnxruntime
# torchvision
# transformers
nvidia-cublas-cu12==12.1.3.1
# via
# nvidia-cudnn-cu12
Expand Down Expand Up @@ -208,12 +215,14 @@ packaging==24.0
# fal
# huggingface-hub
# onnxruntime
# transformers
pathspec==0.11.2
# via fal
pillow==10.2.0
# via
# dailyai (pyproject.toml)
# fal
# torchvision
platformdirs==4.2.0
# via
# isolate
Expand Down Expand Up @@ -251,16 +260,25 @@ pyyaml==6.0.1
# ctranslate2
# huggingface-hub
# isolate
# timm
# transformers
regex==2023.12.25
# via transformers
requests==2.31.0
# via
# huggingface-hub
# pyht
# transformers
rich==13.7.1
# via
# fal
# rich-click
rich-click==1.7.4
# via fal
safetensors==0.4.2
# via
# timm
# transformers
six==1.16.0
# via python-dateutil
sniffio==1.3.1
Expand All @@ -279,20 +297,30 @@ sympy==1.12
# torch
tblib==3.0.0
# via isolate
timm==0.9.16
# via dailyai (pyproject.toml)
tokenizers==0.15.2
# via
# anthropic
# faster-whisper
# transformers
torch==2.2.2
# via
# dailyai (pyproject.toml)
# timm
# torchaudio
# torchvision
torchaudio==2.2.2
# via dailyai (pyproject.toml)
torchvision==0.17.2
# via timm
tqdm==4.66.2
# via
# huggingface-hub
# openai
# transformers
transformers==4.39.3
# via dailyai (pyproject.toml)
triton==2.2.0
# via torch
types-python-dateutil==2.9.0.20240316
Expand Down
32 changes: 30 additions & 2 deletions macos-py3.10-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ cryptography==42.0.5
# via pyjwt
ctranslate2==4.1.0
# via faster-whisper
daily-python==0.7.2
daily-python==0.7.3
# via dailyai (pyproject.toml)
deprecated==1.2.14
# via opentelemetry-api
Expand All @@ -62,6 +62,8 @@ distro==1.9.0
# via
# anthropic
# openai
einops==0.7.0
# via dailyai (pyproject.toml)
exceptiongroup==1.2.0
# via anyio
fal==0.12.7
Expand All @@ -70,11 +72,12 @@ fastapi==0.99.1
# via fal
faster-whisper==1.0.1
# via dailyai (pyproject.toml)
filelock==3.13.3
filelock==3.13.4
# via
# huggingface-hub
# pyht
# torch
# transformers
# virtualenv
flask==3.0.3
# via
Expand Down Expand Up @@ -113,7 +116,9 @@ httpx==0.27.0
huggingface-hub==0.22.2
# via
# faster-whisper
# timm
# tokenizers
# transformers
humanfriendly==10.0
# via coloredlogs
idna==3.6
Expand Down Expand Up @@ -159,6 +164,8 @@ numpy==1.26.4
# ctranslate2
# dailyai (pyproject.toml)
# onnxruntime
# torchvision
# transformers
onnxruntime==1.17.1
# via faster-whisper
openai==1.14.3
Expand All @@ -176,12 +183,14 @@ packaging==24.0
# fal
# huggingface-hub
# onnxruntime
# transformers
pathspec==0.11.2
# via fal
pillow==10.2.0
# via
# dailyai (pyproject.toml)
# fal
# torchvision
platformdirs==4.2.0
# via
# isolate
Expand Down Expand Up @@ -219,16 +228,25 @@ pyyaml==6.0.1
# ctranslate2
# huggingface-hub
# isolate
# timm
# transformers
regex==2023.12.25
# via transformers
requests==2.31.0
# via
# huggingface-hub
# pyht
# transformers
rich==13.7.1
# via
# fal
# rich-click
rich-click==1.7.4
# via fal
safetensors==0.4.2
# via
# timm
# transformers
six==1.16.0
# via python-dateutil
sniffio==1.3.1
Expand All @@ -247,20 +265,30 @@ sympy==1.12
# torch
tblib==3.0.0
# via isolate
timm==0.9.16
# via dailyai (pyproject.toml)
tokenizers==0.15.2
# via
# anthropic
# faster-whisper
# transformers
torch==2.2.2
# via
# dailyai (pyproject.toml)
# timm
# torchaudio
# torchvision
torchaudio==2.2.2
# via dailyai (pyproject.toml)
torchvision==0.17.2
# via timm
tqdm==4.66.2
# via
# huggingface-hub
# openai
# transformers
transformers==4.39.3
# via dailyai (pyproject.toml)
types-python-dateutil==2.9.0.20240316
# via fal
typing-extensions==4.10.0
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ daily = [ "daily-python~=0.7.0" ]
examples = [ "python-dotenv~=1.0.0", "flask~=3.0.0", "flask_cors~=4.0.0" ]
fal = [ "fal~=0.12.0" ]
local = [ "pyaudio~=0.2.0" ]
moondream = [ "einops~=0.7.0", "timm~=0.9.0", "transformers~=4.39.0" ]
openai = [ "openai~=1.14.0" ]
playht = [ "pyht~=0.0.26" ]
silero = [ "torch~=2.2.0", "torchaudio~=2.2.0" ]
Expand Down
Loading

0 comments on commit 26d401e

Please sign in to comment.