From 7b49c9ade37189b1aba3daa45416d1f67e2ce916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 11 Apr 2024 14:09:01 -0700 Subject: [PATCH 1/4] services: FalImageGenService now uses fal-client library --- dot-env.template | 3 +- examples/foundational/03-still-frame.py | 3 +- examples/foundational/03a-image-local.py | 3 +- .../foundational/05-sync-speech-and-image.py | 3 +- .../05a-local-sync-speech-and-text.py | 3 +- examples/foundational/08-bots-arguing.py | 3 +- examples/starter-apps/storybot.py | 3 +- pyproject.toml | 2 +- src/dailyai/services/fal_ai_services.py | 40 ++++++------------- 9 files changed, 21 insertions(+), 42 deletions(-) diff --git a/dot-env.template b/dot-env.template index 06b8a36eb..b50d48b16 100644 --- a/dot-env.template +++ b/dot-env.template @@ -22,8 +22,7 @@ ELEVENLABS_API_KEY=... ELEVENLABS_VOICE_ID=... # Fal -FAL_KEY_ID=... -FAL_KEY_SECRET=... +FAL_KEY=... # PlayHT PLAY_HT_USER_ID=... diff --git a/examples/foundational/03-still-frame.py b/examples/foundational/03-still-frame.py index 3e371da44..51ef47de8 100644 --- a/examples/foundational/03-still-frame.py +++ b/examples/foundational/03-still-frame.py @@ -35,8 +35,7 @@ async def main(room_url): image_size="square_hd" ), aiohttp_session=session, - key_id=os.getenv("FAL_KEY_ID"), - key_secret=os.getenv("FAL_KEY_SECRET"), + key=os.getenv("FAL_KEY"), ) pipeline = Pipeline([imagegen]) diff --git a/examples/foundational/03a-image-local.py b/examples/foundational/03a-image-local.py index 1263ebd5f..f213f505b 100644 --- a/examples/foundational/03a-image-local.py +++ b/examples/foundational/03a-image-local.py @@ -39,8 +39,7 @@ async def main(): image_size="square_hd" ), aiohttp_session=session, - key_id=os.getenv("FAL_KEY_ID"), - key_secret=os.getenv("FAL_KEY_SECRET"), + key=os.getenv("FAL_KEY"), ) pipeline = Pipeline([imagegen]) diff --git a/examples/foundational/05-sync-speech-and-image.py b/examples/foundational/05-sync-speech-and-image.py index c036f06f6..377e8579b 100644 --- a/examples/foundational/05-sync-speech-and-image.py +++ b/examples/foundational/05-sync-speech-and-image.py @@ -89,8 +89,7 @@ async def main(room_url): image_size="square_hd" ), aiohttp_session=session, - key_id=os.getenv("FAL_KEY_ID"), - key_secret=os.getenv("FAL_KEY_SECRET"), + key=os.getenv("FAL_KEY"), ) gated_aggregator = GatedAggregator( diff --git a/examples/foundational/05a-local-sync-speech-and-text.py b/examples/foundational/05a-local-sync-speech-and-text.py index 9410f0601..7c4cf0186 100644 --- a/examples/foundational/05a-local-sync-speech-and-text.py +++ b/examples/foundational/05a-local-sync-speech-and-text.py @@ -49,8 +49,7 @@ async def main(): image_size="1024x1024" ), aiohttp_session=session, - key_id=os.getenv("FAL_KEY_ID"), - key_secret=os.getenv("FAL_KEY_SECRET"), + key=os.getenv("FAL_KEY"), ) # Get a complete audio chunk from the given text. Splitting this into its own diff --git a/examples/foundational/08-bots-arguing.py b/examples/foundational/08-bots-arguing.py index 82b7c0f81..ea6208827 100644 --- a/examples/foundational/08-bots-arguing.py +++ b/examples/foundational/08-bots-arguing.py @@ -55,8 +55,7 @@ async def main(room_url: str): image_size="1024x1024" ), aiohttp_session=session, - key_id=os.getenv("FAL_KEY_ID"), - key_secret=os.getenv("FAL_KEY_SECRET"), + key=os.getenv("FAL_KEY"), ) bot1_messages = [ diff --git a/examples/starter-apps/storybot.py b/examples/starter-apps/storybot.py index 3d54c4dd1..69be94095 100644 --- a/examples/starter-apps/storybot.py +++ b/examples/starter-apps/storybot.py @@ -208,8 +208,7 @@ async def main(room_url: str, token): image_size = "1024x1024", }, aiohttp_session=session, - key_id=os.getenv("FAL_KEY_ID"), - key_secret=os.getenv("FAL_KEY_SECRET"), + key=os.getenv("FAL_KEY"), ) lra = LLMAssistantResponseAggregator(messages) ura = LLMUserResponseAggregator(messages) diff --git a/pyproject.toml b/pyproject.toml index c42452d4d..920c9aa8c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ anthropic = [ "anthropic~=0.20.0" ] azure = [ "azure-cognitiveservices-speech~=1.36.0" ] daily = [ "daily-python~=0.7.0" ] examples = [ "python-dotenv~=1.0.0", "flask~=3.0.0", "flask_cors~=4.0.0" ] -fal = [ "fal~=0.12.0" ] +fal = [ "fal-client~=0.2.0" ] local = [ "pyaudio~=0.2.0" ] moondream = [ "einops~=0.7.0", "timm~=0.9.0", "transformers~=4.39.0" ] openai = [ "openai~=1.14.0" ] diff --git a/src/dailyai/services/fal_ai_services.py b/src/dailyai/services/fal_ai_services.py index 5c1d15151..a924607d2 100644 --- a/src/dailyai/services/fal_ai_services.py +++ b/src/dailyai/services/fal_ai_services.py @@ -6,14 +6,15 @@ from pydantic import BaseModel from typing import Optional, Union, Dict + from dailyai.services.ai_services import ImageGenService try: - import fal + import fal_client except ModuleNotFoundError as e: print(f"Exception: {e}") print( - "In order to use Fal, you need to `pip install dailyai[fal]`. Also, set `FAL_KEY_ID` and `FAL_KEY_SECRET` environment variables.") + "In order to use Fal, you need to `pip install dailyai[fal]`. Also, set `FAL_KEY` environment variable.") raise Exception(f"Missing module: {e}") @@ -33,40 +34,25 @@ def __init__( aiohttp_session: aiohttp.ClientSession, params: InputParams, model="fal-ai/fast-sdxl", - key_id=None, - key_secret=None + key=None, ): super().__init__() self._model = model self._params = params self._aiohttp_session = aiohttp_session - if key_id: - os.environ["FAL_KEY_ID"] = key_id - if key_secret: - os.environ["FAL_KEY_SECRET"] = key_secret + if key: + os.environ["FAL_KEY"] = key async def run_image_gen(self, prompt: str) -> tuple[str, bytes, tuple[int, int]]: - def get_image_url(prompt): - handler = fal.apps.submit( # type: ignore - self._model, - arguments={ - "prompt": prompt, - **self._params.dict(), - }, - ) - for event in handler.iter_events(): - if isinstance(event, fal.apps.InProgress): # type: ignore - pass - - result = handler.get() - - image_url = result["images"][0]["url"] if result else None - if not image_url: - raise Exception("Image generation failed") + response = await fal_client.run_async( + self._model, + arguments={"prompt": prompt, **self._params.dict()} + ) - return image_url + image_url = response["images"][0]["url"] if response else None - image_url = await asyncio.to_thread(get_image_url, prompt) + if not image_url: + raise Exception("Image generation failed") # Load the image from the url async with self._aiohttp_session.get(image_url) as response: From 103092dbb2648e7711e5c2b5e9c86f401a9de5a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 11 Apr 2024 14:13:59 -0700 Subject: [PATCH 2/4] update linux-py3.10 requirements --- linux-py3.10-requirements.txt | 126 +++++----------------------------- 1 file changed, 16 insertions(+), 110 deletions(-) diff --git a/linux-py3.10-requirements.txt b/linux-py3.10-requirements.txt index 80f010a9c..af59e3ef6 100644 --- a/linux-py3.10-requirements.txt +++ b/linux-py3.10-requirements.txt @@ -4,10 +4,12 @@ # # pip-compile --all-extras pyproject.toml # -aiohttp==3.9.3 +aiohttp==3.9.4 # via dailyai (pyproject.toml) aiosignal==1.3.1 # via aiohttp +annotated-types==0.6.0 + # via pydantic anthropic==0.20.0 # via dailyai (pyproject.toml) anyio==4.3.0 @@ -15,13 +17,10 @@ anyio==4.3.0 # anthropic # httpx # openai - # starlette async-timeout==4.0.3 # via aiohttp attrs==23.2.0 - # via - # aiohttp - # fal + # via aiohttp av==11.0.0 # via faster-whisper azure-cognitiveservices-speech==1.36.0 @@ -33,31 +32,16 @@ certifi==2024.2.2 # httpcore # httpx # requests -cffi==1.16.0 - # via cryptography charset-normalizer==3.3.2 # via requests click==8.1.7 - # via - # fal - # flask - # rich-click -colorama==0.4.6 - # via fal + # via flask coloredlogs==15.0.1 # via onnxruntime -cryptography==42.0.5 - # via pyjwt ctranslate2==4.1.0 # via faster-whisper daily-python==0.7.3 # via dailyai (pyproject.toml) -deprecated==1.2.14 - # via opentelemetry-api -dill==0.3.7 - # via fal -distlib==0.3.8 - # via virtualenv distro==1.9.0 # via # anthropic @@ -66,10 +50,8 @@ einops==0.7.0 # via dailyai (pyproject.toml) exceptiongroup==1.2.0 # via anyio -fal==0.12.7 +fal-client==0.2.2 # via dailyai (pyproject.toml) -fastapi==0.99.1 - # via fal faster-whisper==1.0.1 # via dailyai (pyproject.toml) filelock==3.13.4 @@ -79,7 +61,6 @@ filelock==3.13.4 # torch # transformers # triton - # virtualenv flask==3.0.3 # via # dailyai (pyproject.toml) @@ -96,15 +77,8 @@ fsspec==2024.3.1 # via # huggingface-hub # torch -grpc-interceptor==0.15.4 - # via fal grpcio==1.62.1 - # via - # fal - # grpc-interceptor - # isolate - # isolate-proto - # pyht + # via pyht h11==0.14.0 # via httpcore httpcore==1.0.5 @@ -112,8 +86,10 @@ httpcore==1.0.5 httpx==0.27.0 # via # anthropic - # fal + # fal-client # openai +httpx-sse==0.4.0 + # via fal-client huggingface-hub==0.22.2 # via # faster-whisper @@ -122,38 +98,24 @@ huggingface-hub==0.22.2 # transformers humanfriendly==10.0 # via coloredlogs -idna==3.6 +idna==3.7 # via # anyio # httpx # requests # yarl -importlib-metadata==7.0.0 - # via opentelemetry-api -isolate[build]==0.12.7 - # via - # fal - # isolate-proto -isolate-proto==0.3.4 - # via fal itsdangerous==2.1.2 # via flask jinja2==3.1.3 # via # flask # torch -markdown-it-py==3.0.0 - # via rich markupsafe==2.1.5 # via # jinja2 # werkzeug -mdurl==0.1.2 - # via markdown-it-py mpmath==1.3.0 # via sympy -msgpack==1.0.8 - # via fal multidict==6.0.5 # via # aiohttp @@ -202,64 +164,35 @@ onnxruntime==1.17.1 # via faster-whisper openai==1.14.3 # via dailyai (pyproject.toml) -opentelemetry-api==1.24.0 - # via - # fal - # opentelemetry-sdk -opentelemetry-sdk==1.24.0 - # via fal -opentelemetry-semantic-conventions==0.45b0 - # via opentelemetry-sdk packaging==24.0 # via - # fal # huggingface-hub # onnxruntime # transformers -pathspec==0.11.2 - # via fal pillow==10.2.0 # via # dailyai (pyproject.toml) - # fal # torchvision -platformdirs==4.2.0 - # via - # isolate - # virtualenv -portalocker==2.8.2 - # via fal protobuf==4.25.3 # via - # isolate - # isolate-proto # onnxruntime # pyht pyaudio==0.2.14 # via dailyai (pyproject.toml) -pycparser==2.22 - # via cffi -pydantic==1.10.15 +pydantic==2.7.0 # via # anthropic - # fal - # fastapi # openai -pygments==2.17.2 - # via rich +pydantic-core==2.18.1 + # via pydantic pyht==0.0.27 # via dailyai (pyproject.toml) -pyjwt[crypto]==2.8.0 - # via fal -python-dateutil==2.9.0.post0 - # via fal python-dotenv==1.0.1 # via dailyai (pyproject.toml) pyyaml==6.0.1 # via # ctranslate2 # huggingface-hub - # isolate # timm # transformers regex==2023.12.25 @@ -269,34 +202,20 @@ requests==2.31.0 # huggingface-hub # pyht # transformers -rich==13.7.1 - # via - # fal - # rich-click -rich-click==1.7.4 - # via fal safetensors==0.4.2 # via # timm # transformers -six==1.16.0 - # via python-dateutil sniffio==1.3.1 # via # anthropic # anyio # httpx # openai -starlette==0.27.0 - # via fastapi -structlog==22.3.0 - # via fal sympy==1.12 # via # onnxruntime # torch -tblib==3.0.0 - # via isolate timm==0.9.16 # via dailyai (pyproject.toml) tokenizers==0.15.2 @@ -323,37 +242,24 @@ transformers==4.39.3 # via dailyai (pyproject.toml) triton==2.2.0 # via torch -types-python-dateutil==2.9.0.20240316 - # via fal typing-extensions==4.10.0 # via # anthropic # anyio # dailyai (pyproject.toml) - # fal - # fastapi # huggingface-hub # openai - # opentelemetry-sdk # pydantic - # rich-click + # pydantic-core # torch urllib3==2.2.1 # via requests -virtualenv==20.25.1 - # via isolate websockets==12.0 - # via - # dailyai (pyproject.toml) - # fal + # via dailyai (pyproject.toml) werkzeug==3.0.2 # via flask -wrapt==1.16.0 - # via deprecated yarl==1.9.4 # via aiohttp -zipp==3.18.1 - # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools From b0faafc184b11478d2b17a0ba4c20eb91e283faf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 11 Apr 2024 14:16:19 -0700 Subject: [PATCH 3/4] update macos-py3.10 requirements --- macos-py3.10-requirements.txt | 128 +++++----------------------------- 1 file changed, 17 insertions(+), 111 deletions(-) diff --git a/macos-py3.10-requirements.txt b/macos-py3.10-requirements.txt index ae0a24e0a..d97541db8 100644 --- a/macos-py3.10-requirements.txt +++ b/macos-py3.10-requirements.txt @@ -4,10 +4,12 @@ # # pip-compile --all-extras pyproject.toml # -aiohttp==3.9.3 +aiohttp==3.9.4 # via dailyai (pyproject.toml) aiosignal==1.3.1 # via aiohttp +annotated-types==0.6.0 + # via pydantic anthropic==0.20.0 # via dailyai (pyproject.toml) anyio==4.3.0 @@ -15,13 +17,10 @@ anyio==4.3.0 # anthropic # httpx # openai - # starlette async-timeout==4.0.3 # via aiohttp attrs==23.2.0 - # via - # aiohttp - # fal + # via aiohttp av==11.0.0 # via faster-whisper azure-cognitiveservices-speech==1.36.0 @@ -33,31 +32,16 @@ certifi==2024.2.2 # httpcore # httpx # requests -cffi==1.16.0 - # via cryptography charset-normalizer==3.3.2 # via requests click==8.1.7 - # via - # fal - # flask - # rich-click -colorama==0.4.6 - # via fal + # via flask coloredlogs==15.0.1 # via onnxruntime -cryptography==42.0.5 - # via pyjwt -ctranslate2==4.1.0 +ctranslate2==4.2.0 # via faster-whisper daily-python==0.7.3 # via dailyai (pyproject.toml) -deprecated==1.2.14 - # via opentelemetry-api -dill==0.3.7 - # via fal -distlib==0.3.8 - # via virtualenv distro==1.9.0 # via # anthropic @@ -66,10 +50,8 @@ einops==0.7.0 # via dailyai (pyproject.toml) exceptiongroup==1.2.0 # via anyio -fal==0.12.7 +fal-client==0.2.2 # via dailyai (pyproject.toml) -fastapi==0.99.1 - # via fal faster-whisper==1.0.1 # via dailyai (pyproject.toml) filelock==3.13.4 @@ -78,7 +60,6 @@ filelock==3.13.4 # pyht # torch # transformers - # virtualenv flask==3.0.3 # via # dailyai (pyproject.toml) @@ -95,15 +76,8 @@ fsspec==2024.3.1 # via # huggingface-hub # torch -grpc-interceptor==0.15.4 - # via fal grpcio==1.62.1 - # via - # fal - # grpc-interceptor - # isolate - # isolate-proto - # pyht + # via pyht h11==0.14.0 # via httpcore httpcore==1.0.5 @@ -111,8 +85,10 @@ httpcore==1.0.5 httpx==0.27.0 # via # anthropic - # fal + # fal-client # openai +httpx-sse==0.4.0 + # via fal-client huggingface-hub==0.22.2 # via # faster-whisper @@ -121,38 +97,24 @@ huggingface-hub==0.22.2 # transformers humanfriendly==10.0 # via coloredlogs -idna==3.6 +idna==3.7 # via # anyio # httpx # requests # yarl -importlib-metadata==7.0.0 - # via opentelemetry-api -isolate[build]==0.12.7 - # via - # fal - # isolate-proto -isolate-proto==0.3.4 - # via fal itsdangerous==2.1.2 # via flask jinja2==3.1.3 # via # flask # torch -markdown-it-py==3.0.0 - # via rich markupsafe==2.1.5 # via # jinja2 # werkzeug -mdurl==0.1.2 - # via markdown-it-py mpmath==1.3.0 # via sympy -msgpack==1.0.8 - # via fal multidict==6.0.5 # via # aiohttp @@ -170,64 +132,35 @@ onnxruntime==1.17.1 # via faster-whisper openai==1.14.3 # via dailyai (pyproject.toml) -opentelemetry-api==1.24.0 - # via - # fal - # opentelemetry-sdk -opentelemetry-sdk==1.24.0 - # via fal -opentelemetry-semantic-conventions==0.45b0 - # via opentelemetry-sdk packaging==24.0 # via - # fal # huggingface-hub # onnxruntime # transformers -pathspec==0.11.2 - # via fal pillow==10.2.0 # via # dailyai (pyproject.toml) - # fal # torchvision -platformdirs==4.2.0 - # via - # isolate - # virtualenv -portalocker==2.8.2 - # via fal protobuf==4.25.3 # via - # isolate - # isolate-proto # onnxruntime # pyht pyaudio==0.2.14 # via dailyai (pyproject.toml) -pycparser==2.22 - # via cffi -pydantic==1.10.15 +pydantic==2.7.0 # via # anthropic - # fal - # fastapi # openai -pygments==2.17.2 - # via rich +pydantic-core==2.18.1 + # via pydantic pyht==0.0.27 # via dailyai (pyproject.toml) -pyjwt[crypto]==2.8.0 - # via fal -python-dateutil==2.9.0.post0 - # via fal python-dotenv==1.0.1 # via dailyai (pyproject.toml) pyyaml==6.0.1 # via # ctranslate2 # huggingface-hub - # isolate # timm # transformers regex==2023.12.25 @@ -237,34 +170,20 @@ requests==2.31.0 # huggingface-hub # pyht # transformers -rich==13.7.1 - # via - # fal - # rich-click -rich-click==1.7.4 - # via fal safetensors==0.4.2 # via # timm # transformers -six==1.16.0 - # via python-dateutil sniffio==1.3.1 # via # anthropic # anyio # httpx # openai -starlette==0.27.0 - # via fastapi -structlog==22.3.0 - # via fal sympy==1.12 # via # onnxruntime # torch -tblib==3.0.0 - # via isolate timm==0.9.16 # via dailyai (pyproject.toml) tokenizers==0.15.2 @@ -289,37 +208,24 @@ tqdm==4.66.2 # transformers transformers==4.39.3 # via dailyai (pyproject.toml) -types-python-dateutil==2.9.0.20240316 - # via fal typing-extensions==4.10.0 # via # anthropic # anyio # dailyai (pyproject.toml) - # fal - # fastapi # huggingface-hub # openai - # opentelemetry-sdk # pydantic - # rich-click + # pydantic-core # torch urllib3==2.2.1 # via requests -virtualenv==20.25.1 - # via isolate websockets==12.0 - # via - # dailyai (pyproject.toml) - # fal + # via dailyai (pyproject.toml) werkzeug==3.0.2 # via flask -wrapt==1.16.0 - # via deprecated yarl==1.9.4 # via aiohttp -zipp==3.18.1 - # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools From cbc51babbe5255bc1378d6d5ec434f7b62866249 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 11 Apr 2024 14:22:44 -0700 Subject: [PATCH 4/4] services: use asyncio to_thread in moondreamservice --- src/dailyai/services/moondream_ai_service.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/dailyai/services/moondream_ai_service.py b/src/dailyai/services/moondream_ai_service.py index 2b9835404..704d4c51b 100644 --- a/src/dailyai/services/moondream_ai_service.py +++ b/src/dailyai/services/moondream_ai_service.py @@ -1,3 +1,5 @@ +import asyncio + from dailyai.pipeline.frames import ImageFrame, VisionImageFrame from dailyai.services.ai_services import VisionService @@ -43,10 +45,15 @@ def __init__( self._model.eval() async def run_vision(self, frame: VisionImageFrame) -> str: - image = Image.frombytes("RGB", (frame.size[0], frame.size[1]), frame.image) - image_embeds = self._model.encode_image(image) - description = self._model.answer_question( - image_embeds=image_embeds, - question=frame.text, - tokenizer=self._tokenizer) + def get_image_description(frame: VisionImageFrame): + image = Image.frombytes("RGB", (frame.size[0], frame.size[1]), frame.image) + image_embeds = self._model.encode_image(image) + description = self._model.answer_question( + image_embeds=image_embeds, + question=frame.text, + tokenizer=self._tokenizer) + return description + + description = await asyncio.to_thread(get_image_description, frame) + return description