Skip to content

Commit

Permalink
Enchance health check in docker-compose
Browse files Browse the repository at this point in the history
Fix service launch issue

1. Update Gaudi TGI image from 2.0.6 to 2.3.1
2. Change the hpu-gaudi TGI health check condition.

The previous health-check wait for certain time to wait service started.
It is a walk-around as health-check interface does not work well.

Now we can check TGI service health-check interface.
https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/health

Signed-off-by: Wang, Xigui <[email protected]>
  • Loading branch information
xiguiw committed Jan 20, 2025
1 parent 239995d commit dd5cb74
Show file tree
Hide file tree
Showing 18 changed files with 70 additions and 29 deletions.
5 changes: 5 additions & 0 deletions AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
audioqna-xeon-backend-server:
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
Expand Down
7 changes: 6 additions & 1 deletion AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ services:
- SYS_NICE
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi-gaudi-server
ports:
- "3006:80"
Expand All @@ -54,6 +54,11 @@ services:
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
runtime: habana
cap_add:
- SYS_NICE
Expand Down
1 change: 1 addition & 0 deletions AudioQnA/tests/test_compose_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ function start_services() {
export LLM_SERVER_PORT=3006

export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
export host_ip=${ip_address}
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

# Start Docker Containers
Expand Down
1 change: 1 addition & 0 deletions AudioQnA/tests/test_compose_on_xeon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ function start_services() {
export LLM_SERVER_PORT=3006

export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
export host_ip=${ip_address}

# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

Expand Down
5 changes: 5 additions & 0 deletions AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:3006/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
wav2lip-service:
image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
Expand Down
5 changes: 5 additions & 0 deletions ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ services:
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
VLLM_TORCH_PROFILER_DIR: "/mnt"
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
chatqna-xeon-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
Expand Down
5 changes: 5 additions & 0 deletions ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ services:
OMPI_MCA_btl_vader_single_copy_mechanism: none
LLM_MODEL_ID: ${LLM_MODEL_ID}
VLLM_TORCH_PROFILER_DIR: "/mnt"
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:8007/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
runtime: habana
cap_add:
- SYS_NICE
Expand Down
1 change: 1 addition & 0 deletions ChatQnA/tests/test_compose_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ function start_services() {
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export host_ip=${ip_address}

# Start Docker Containers
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
Expand Down
1 change: 1 addition & 0 deletions ChatQnA/tests/test_compose_on_xeon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ function start_services() {
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export host_ip=${ip_address}

# Start Docker Containers
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
Expand Down
10 changes: 5 additions & 5 deletions CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

services:
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi-gaudi-server
ports:
- "8028:80"
Expand All @@ -21,10 +21,10 @@ services:
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
healthcheck:
test: ["CMD-SHELL", "sleep 500 && exit 0"]
interval: 1s
timeout: 505s
retries: 1
test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
runtime: habana
cap_add:
- SYS_NICE
Expand Down
10 changes: 5 additions & 5 deletions CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

services:
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: codetrans-tgi-service
ports:
- "8008:80"
Expand All @@ -21,10 +21,10 @@ services:
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
healthcheck:
test: ["CMD-SHELL", "sleep 500 && exit 0"]
interval: 1s
timeout: 505s
retries: 1
test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
runtime: habana
cap_add:
- SYS_NICE
Expand Down
10 changes: 5 additions & 5 deletions FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

services:
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi-gaudi-server
ports:
- ${LLM_ENDPOINT_PORT:-8008}:80
Expand Down Expand Up @@ -31,10 +31,10 @@ services:
- SYS_NICE
ipc: host
healthcheck:
test: ["CMD-SHELL", "sleep 500 && exit 0"]
interval: 1s
timeout: 505s
retries: 1
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 4096
llm_faqgen:
image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
Expand Down
7 changes: 6 additions & 1 deletion MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ services:
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
restart: unless-stopped
tgi-gaudi:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi-llava-gaudi-server
ports:
- "8399:80"
Expand All @@ -110,6 +110,11 @@ services:
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:8399/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
runtime: habana
cap_add:
- SYS_NICE
Expand Down
10 changes: 5 additions & 5 deletions SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ services:
LOGFLAG: ${LOGFLAG}
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi-gaudi-server
ports:
- "3006:80"
Expand All @@ -118,10 +118,10 @@ services:
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
healthcheck:
test: ["CMD-SHELL", "sleep 800 && exit 0"]
interval: 1s
timeout: 805s
retries: 1
test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
runtime: habana
cap_add:
- SYS_NICE
Expand Down
10 changes: 5 additions & 5 deletions Translation/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

services:
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi-gaudi-server
ports:
- "8008:80"
Expand All @@ -21,10 +21,10 @@ services:
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
healthcheck:
test: ["CMD-SHELL", "sleep 500 && exit 0"]
interval: 1s
timeout: 505s
retries: 1
test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
runtime: habana
cap_add:
- SYS_NICE
Expand Down
3 changes: 2 additions & 1 deletion Translation/tests/test_compose_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ function build_docker_images() {
service_list="translation translation-ui llm-textgen nginx"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
docker images && sleep 1s
}

Expand All @@ -41,6 +41,7 @@ function start_services() {
export BACKEND_SERVICE_NAME=translation
export BACKEND_SERVICE_IP=${ip_address}
export BACKEND_SERVICE_PORT=8888
export host_ip=${ip_address}

sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

Expand Down
7 changes: 6 additions & 1 deletion VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

services:
llava-tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi-llava-gaudi-server
ports:
- "8399:80"
Expand All @@ -22,6 +22,11 @@ services:
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
healthcheck:
test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"]
interval: 10s
timeout: 10s
retries: 60
runtime: habana
cap_add:
- SYS_NICE
Expand Down
1 change: 1 addition & 0 deletions VisualQnA/tests/test_compose_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ function start_services() {
export BACKEND_SERVICE_IP=${ip_address}
export BACKEND_SERVICE_PORT=8888
export NGINX_PORT=80
export host_ip=${ip_address}

sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

Expand Down

0 comments on commit dd5cb74

Please sign in to comment.