Skip to content

Commit

Permalink
Modularize tricky dependencies (#95)
Browse files Browse the repository at this point in the history
* removed pyaudio from threaded transport

* modularized torch and torchaudio

* modularized local transport

* Working Dockerfile as well

* docker updates for fly.io
  • Loading branch information
chadbailey59 authored Apr 3, 2024
1 parent c210148 commit 2f59e38
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 37 deletions.
30 changes: 30 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# flyctl launch added from .gitignore
**/.vscode
**/env
**/__pycache__
**/*~
**/venv
#*#

# Distribution / packaging
**/.Python
**/build
**/develop-eggs
**/dist
**/downloads
**/eggs
**/.eggs
**/lib
**/lib64
**/parts
**/sdist
**/var
**/wheels
**/share/python-wheels
**/*.egg-info
**/.installed.cfg
**/*.egg
**/MANIFEST
**/.DS_Store
**/.env
fly.toml
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ share/python-wheels/
MANIFEST
.DS_Store
.env
fly.toml
5 changes: 3 additions & 2 deletions examples/server/Dockerfile → Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ COPY *.py /app
COPY pyproject.toml /app

COPY src/ /app/src/
COPY examples/ /app/examples/

WORKDIR /app
RUN ls --recursive /app/
RUN pip3 install --upgrade -r requirements.txt
RUN python -m build .
RUN pip3 install .

RUN pip3 install gunicorn
# If running on Ubuntu, Azure TTS requires some extra config
# https://learn.microsoft.com/en-us/azure/ai-services/speech-service/quickstarts/setup-platform?pivots=programming-language-python&tabs=linux%2Cubuntu%2Cdotnetcli%2Cdotnet%2Cjre%2Cmaven%2Cnodejs%2Cmac%2Cpypi

Expand All @@ -36,4 +37,4 @@ WORKDIR /app

EXPOSE 8000
# run
CMD ["gunicorn", "--workers=2", "--log-level", "debug", "--capture-output", "daily-bot-manager:app", "--bind=0.0.0.0:8000"]
CMD ["gunicorn", "--workers=2", "--log-level", "debug", "--chdir", "examples/server", "--capture-output", "daily-bot-manager:app", "--bind=0.0.0.0:8000"]
10 changes: 5 additions & 5 deletions examples/server/daily-bot-manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
CORS(app)

APPS = {
"chatbot": "examples/starter-apps/chatbot.py",
"patient-intake": "examples/starter-apps/patient-intake.py",
"storybot": "examples/starter-apps/storybot.py",
"translator": "examples/starter-apps/translator.py"
"chatbot": "../starter-apps/chatbot.py",
"patient-intake": "../starter-apps/patient-intake.py",
"storybot": "../starter-apps/storybot.py",
"translator": "../starter-apps/translator.py"
}

daily_api_key = os.getenv("DAILY_API_KEY")
Expand Down Expand Up @@ -157,7 +157,7 @@ def start(botname):
else:
return jsonify({"room_url": room_url, "token": token})
except BaseException as e:
return "There was a problem starting the bot: {e}", 500
return f"There was a problem starting the bot: {e}", 500


@app.route("/healthz")
Expand Down
3 changes: 1 addition & 2 deletions examples/starter-apps/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ async def main(room_url: str, token):
mic_enabled=True,
mic_sample_rate=16000,
camera_enabled=False,
vad_enabled=True,
)
tts = AzureTTSService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
Expand All @@ -98,7 +97,7 @@ async def main(room_url: str, token):
tp = TranslationProcessor("Spanish")
lfra = LLMFullResponseAggregator()
ts = TranslationSubtitles("spanish")
pipeline = Pipeline([sa, tp, llm, lfra, ts])
pipeline = Pipeline([sa, tp, llm, lfra, ts, tts])

transport.transcription_settings["extra"]["endpointing"] = True
transport.transcription_settings["extra"]["punctuate"] = True
Expand Down
5 changes: 2 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,14 @@ dependencies = [
"daily-python",
"fal",
"faster_whisper",
"flask",
"flask_cors",
"google-cloud-texttospeech",
"numpy",
"openai",
"Pillow",
"pyht",
"python-dotenv",
"torch",
"torchaudio",
"pyaudio",
"typing-extensions",
"websockets"
]
Expand Down
8 changes: 7 additions & 1 deletion src/dailyai/transports/local_transport.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import asyncio
import numpy as np
import tkinter as tk
import pyaudio

from dailyai.transports.threaded_transport import ThreadedTransport


class LocalTransport(ThreadedTransport):
def __init__(self, **kwargs):
super().__init__(**kwargs)
try:
global pyaudio
import pyaudio
except ModuleNotFoundError as e:
print(f"Exception: {e}")
print("In order to use the local transport, you'll need to `pip install pyaudio`. On MacOS, you'll also need to `brew install portaudio`.")
raise Exception(f"Missing module: {e}")
self._sample_width = kwargs.get("sample_width") or 2
self._n_channels = kwargs.get("n_channels") or 1
self._tk_root = kwargs.get("tk_root") or None
Expand Down
44 changes: 20 additions & 24 deletions src/dailyai/transports/threaded_transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
import itertools
import logging
import numpy as np
import pyaudio
import torch

import queue
import threading
import time
Expand All @@ -29,22 +28,6 @@
from dailyai.services.ai_services import TTSService
from dailyai.transports.abstract_transport import AbstractTransport

torch.set_num_threads(1)

model, utils = torch.hub.load(
repo_or_dir="snakers4/silero-vad", model="silero_vad", force_reload=False
)

(get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils

# Taken from utils_vad.py


def validate(model, inputs: torch.Tensor):
with torch.no_grad():
outs = model(inputs)
return outs


# Provided by Alexander Veysov

Expand All @@ -58,12 +41,7 @@ def int2float(sound):
return sound


FORMAT = pyaudio.paInt16
CHANNELS = 1
SAMPLE_RATE = 16000
CHUNK = int(SAMPLE_RATE / 10)

audio = pyaudio.PyAudio()


class VADState(Enum):
Expand All @@ -90,6 +68,24 @@ def __init__(
"Sorry, you can't use speaker_enabled and vad_enabled at the same time. Please set one to False."
)

if self._vad_enabled:
try:
global torch, torchaudio
import torch
# We don't use torchaudio here, but we need to try importing it because
# Silero uses it
import torchaudio
torch.set_num_threads(1)

(self.model, self.utils) = torch.hub.load(
repo_or_dir="snakers4/silero-vad", model="silero_vad", force_reload=False
)

except ModuleNotFoundError as e:
print(f"Exception: {e}")
print("In order to use VAD, you'll need to install the `torch` and `torchaudio` modules.")
raise Exception(f"Missing module(s): {e}")

self._vad_samples = 1536
vad_frame_s = self._vad_samples / SAMPLE_RATE
self._vad_start_frames = round(self._vad_start_s / vad_frame_s)
Expand Down Expand Up @@ -276,7 +272,7 @@ def _vad(self):
audio_chunk = self.read_audio_frames(self._vad_samples)
audio_int16 = np.frombuffer(audio_chunk, np.int16)
audio_float32 = int2float(audio_int16)
new_confidence = model(
new_confidence = self.model(
torch.from_numpy(audio_float32), 16000).item()
speaking = new_confidence > 0.5

Expand Down

0 comments on commit 2f59e38

Please sign in to comment.