-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocker-compose.yml
41 lines (38 loc) · 1.87 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
version: '3.8'
services:
controller_service:
build:
context: ./controller # Path to your agent service
ports:
- "8000:8000"
container_name: controller_service
networks:
- app-network # Add the service to the network
llamafile_service:
build:
context: ./llamafile # Path to your LLM backend service
args:
MODEL_URL: ${MODEL_URL} # Fetch the model URL from the .env file
MODEL_NAME: ${MODEL_NAME} # Fetch the model name from the .env file
MODEL_PORT: ${MODEL_PORT} # Fetch the model port from the .env file
MODEL_THREADS: ${MODEL_THREADS} # Fetch model threads from the .env file
MODEL_BATCH: ${MODEL_BATCH} # Fetch model batch size from the .env file
MODEL_CONTEXT: ${MODEL_CONTEXT} # Fetch model context from the .env file
MODEL_NUM_PROCESSORS: ${MODEL_NUM_PROCESSORS} # Fetch model processors from the .env file
ports:
- "${MODEL_PORT}:${MODEL_PORT}" # Use MODEL_PORT from .env for the container and host port
runtime: nvidia # Specify the NVIDIA runtime for GPUs
environment:
- NVIDIA_VISIBLE_DEVICES=all # Expose all GPUs
- MODEL_URL=${MODEL_URL} # Pass model URL to the container via environment variable
- MODEL_NAME=${MODEL_NAME} # Pass model name to the container via environment variable
- MODEL_PORT=${MODEL_PORT} # Pass the port to the container
- MODEL_THREADS=${MODEL_THREADS} # Pass the number of threads to the container
- MODEL_BATCH=${MODEL_BATCH} # Pass the batch size to the container
- MODEL_CONTEXT=${MODEL_CONTEXT} # Pass the context size to the container
- MODEL_NUM_PROCESSORS=${MODEL_NUM_PROCESSORS} # Pass the number of processors to the container
networks:
- app-network # Add the service to the network
networks:
app-network:
driver: bridge # Default network driver