-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdocker-compose.yml
107 lines (100 loc) · 2.79 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
version: "3.8"
services:
web:
build:
dockerfile: ./services/rtc/Dockerfile
ports:
- "8088:8088"
rtc-peer:
restart: "on-failure"
build:
dockerfile: ./services/rtc-peer/Dockerfile
depends_on:
- web
- asr-faster-whisper
# - asr-seamlessm4t
- asr-seamlessm4t
- chat-llama-cpp-python
environment:
BRIDGE_WEBRTC_URL: web:8088
BRIDGE_WEBRTC_ROOM: test
BRIDGE_TRANSCRIPTION: http://asr-faster-whisper:8000/v1/transcribe
# BRIDGE_TRANSLATOR_audio_en: http://asr-faster-whisper:8000/v1/transcribe
BRIDGE_TRANSLATOR_text_eng_en: http://asr-seamlessm4t:8000/v1/transcribe
# TRANSCRIPTION_SERVICE: http://asr-whisperx:8000/transcribe
# TRANSLATOR_SERVICE: http://asr-seamlessm4t:8000/translate
BRIDGE_ASSISTANT_Bridge: http://chat-llama-cpp-python:8000/v1
chat-llama-cpp-python:
build:
dockerfile: docker/cuda_simple/Dockerfile
context: ./services/chat-llama-cpp-python
args:
MODEL_ACCOUNT: TheBloke
MODEL_TAG: llama
ports:
- "8089:8000"
command:
- "--n_gpu_layers=35"
- "--hf_model=TheBloke/Airoboros-L2-13B-2.2-GGUF/airoboros-l2-13b-2.2.Q5_K_M.gguf"
- "--n_gpu_layers=43"
environment:
USE_MLOCK: 0
TORCH_HOME: /cache/torch
CUDA_DEVICE_ORDER: PCI_BUS_ID
CUDA_VISIBLE_DEVICES: 1,0
HF_HOME: /cache/huggingface
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
volumes:
- torch-cache:/cache/torch
- huggingface-cache:/cache/huggingface
asr-faster-whisper:
restart: "on-failure"
build:
dockerfile: ./services/asr-faster-whisper/Dockerfile
environment:
MODEL_SIZE: large-v2
MODEL_DEVICE: cuda
MODEL_COMPUTE_TYPE: float16
TORCH_HOME: /cache/torch
CUDA_DEVICE_ORDER: PCI_BUS_ID
CUDA_VISIBLE_DEVICES: 0
HF_HOME: /cache/huggingface
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
volumes:
- torch-cache:/cache/torch
- huggingface-cache:/cache/huggingface
asr-seamlessm4t:
restart: "on-failure"
build:
dockerfile: ./services/asr-seamlessm4t/Dockerfile
environment:
MODEL_SIZE: seamlessM4T_large
MODEL_DEVICE: cuda
MODEL_COMPUTE_TYPE: float32
TORCH_HOME: /cache/torch
HF_HOME: /cache/huggingface
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
volumes:
- torch-cache:/cache/torch
- huggingface-cache:/cache/huggingface
volumes:
torch-cache: {}
huggingface-cache: {}