Skip to content

Commit

Permalink
Refactor docsum (opea-project#1336)
Browse files Browse the repository at this point in the history
Signed-off-by: Xinyao Wang <[email protected]>
  • Loading branch information
XinyaoWa authored Jan 13, 2025
1 parent ca15fe9 commit ff1310b
Show file tree
Hide file tree
Showing 16 changed files with 94 additions and 75 deletions.
5 changes: 3 additions & 2 deletions DocSum/docker_compose/amd/gpu/rocm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally and install the python pac
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/langchain/Dockerfile .
docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile .
```

Then run the command `docker images`, you will have the following four Docker Images:
Expand Down Expand Up @@ -81,6 +81,7 @@ export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export DOCSUM_LLM_SERVER_PORT="8008"
export DOCSUM_BACKEND_SERVER_PORT="8888"
export DOCSUM_FRONTEND_PORT="5173"
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
```

Note: Please replace with `host_ip` with your external IP address, do not use localhost.
Expand Down Expand Up @@ -126,7 +127,7 @@ docker compose up -d
2. LLM Microservice

```bash
curl http://${host_ip}:9000/v1/chat/docsum \
curl http://${host_ip}:9000/v1/docsum \
-X POST \
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
-H 'Content-Type: application/json'
Expand Down
16 changes: 13 additions & 3 deletions DocSum/docker_compose/amd/gpu/rocm/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ services:
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
host_ip: ${host_ip}
DOCSUM_TGI_SERVICE_PORT: ${DOCSUM_TGI_SERVICE_PORT}
volumes:
- "/var/opea/docsum-service/data:/data"
shm_size: 1g
Expand All @@ -27,13 +29,19 @@ services:
security_opt:
- seccomp:unconfined
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${DOCSUM_TGI_SERVICE_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}

docsum-llm-server:
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
container_name: docsum-llm-server
depends_on:
- docsum-tgi-service
docsum-tgi-service:
condition: service_healthy
ports:
- "${DOCSUM_LLM_SERVER_PORT}:9000"
ipc: host
Expand All @@ -51,11 +59,13 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

whisper:
Expand Down
4 changes: 2 additions & 2 deletions DocSum/docker_compose/intel/cpu/xeon/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ You will have the following Docker Images:

1. `opea/docsum-ui:latest`
2. `opea/docsum:latest`
3. `opea/llm-docsum-tgi:latest`
3. `opea/llm-docsum:latest`
4. `opea/whisper:latest`

### Validate Microservices
Expand All @@ -140,7 +140,7 @@ You will have the following Docker Images:
2. LLM Microservice

```bash
curl http://${host_ip}:9000/v1/chat/docsum \
curl http://${host_ip}:9000/v1/docsum \
-X POST \
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
-H 'Content-Type: application/json'
Expand Down
21 changes: 15 additions & 6 deletions DocSum/docker_compose/intel/cpu/xeon/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,45 @@ services:
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-server
ports:
- "8008:80"
- ${LLM_ENDPOINT_PORT:-8008}:80
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
volumes:
- "./data:/data"
shm_size: 1g
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}

llm-docsum-tgi:
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
container_name: llm-docsum-server
depends_on:
- tgi-server
tgi-server:
condition: service_healthy
ports:
- "9000:9000"
- ${DOCSUM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
LLM_ENDPOINT: ${LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_MODEL_ID: ${LLM_MODEL_ID}
LOGFLAG: True
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

whisper:
Expand Down
4 changes: 2 additions & 2 deletions DocSum/docker_compose/intel/hpu/gaudi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ You will have the following Docker Images:

1. `opea/docsum-ui:latest`
2. `opea/docsum:latest`
3. `opea/llm-docsum-tgi:latest`
3. `opea/llm-docsum:latest`
4. `opea/whisper:latest`

### Validate Microservices
Expand All @@ -132,7 +132,7 @@ You will have the following Docker Images:
2. LLM Microservice

```bash
curl http://${host_ip}:9000/v1/chat/docsum \
curl http://${host_ip}:9000/v1/docsum \
-X POST \
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
-H 'Content-Type: application/json'
Expand Down
36 changes: 24 additions & 12 deletions DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,59 @@
# SPDX-License-Identifier: Apache-2.0

services:
tgi-server:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
tgi-gaudi-server:
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
container_name: tgi-gaudi-server
ports:
- "8008:80"
- ${LLM_ENDPOINT_PORT:-8008}:80
volumes:
- "./data:/data"
environment:
HABANA_VISIBLE_DEVICES: all
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
volumes:
- "./data:/data"
host_ip: ${host_ip}
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
runtime: habana
cap_add:
- SYS_NICE
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}

llm-docsum-tgi:
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
container_name: llm-docsum-gaudi-server
depends_on:
- tgi-server
tgi-gaudi-server:
condition: service_healthy
ports:
- "9000:9000"
- ${DOCSUM_PORT:-9000}:9000
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
LOGFLAG: True
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

whisper:
Expand All @@ -66,7 +78,7 @@ services:
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
container_name: docsum-gaudi-backend-server
depends_on:
- tgi-server
- tgi-gaudi-server
- llm-docsum-tgi
ports:
- "8888:8888"
Expand Down
6 changes: 5 additions & 1 deletion DocSum/docker_compose/set_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@ export MAX_INPUT_TOKENS=1024
export MAX_TOTAL_TOKENS=2048

export no_proxy="${no_proxy},${host_ip}"
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"

export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"

export LLM_ENDPOINT_PORT=8008
export DOCSUM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
6 changes: 3 additions & 3 deletions DocSum/docker_image_build/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ services:
dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
extends: docsum
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
llm-docsum-tgi:
llm-docsum:
build:
context: GenAIComps
dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
dockerfile: comps/llms/src/doc-summarization/Dockerfile
extends: docsum
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
2 changes: 1 addition & 1 deletion DocSum/docsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def add_remote_service(self):
name="llm",
host=LLM_SERVICE_HOST_IP,
port=LLM_SERVICE_PORT,
endpoint="/v1/chat/docsum",
endpoint="/v1/docsum",
use_remote_service=True,
service_type=ServiceType.LLM,
)
Expand Down
2 changes: 1 addition & 1 deletion DocSum/kubernetes/gmc/docsum_gaudi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ spec:
internalService:
serviceName: docsum-llm-uservice
config:
endpoint: /v1/chat/docsum
endpoint: /v1/docsum
PORT: "9009"
TGI_LLM_ENDPOINT: tgi-gaudi-svc
- name: TgiGaudi
Expand Down
2 changes: 1 addition & 1 deletion DocSum/kubernetes/gmc/docsum_xeon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ spec:
internalService:
serviceName: docsum-llm-uservice
config:
endpoint: /v1/chat/docsum
endpoint: /v1/docsum
PORT: "9009"
TGI_LLM_ENDPOINT: tgi-svc
- name: Tgi
Expand Down
23 changes: 9 additions & 14 deletions DocSum/tests/test_compose_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,17 @@ export TAG=${IMAGE_TAG}
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
export no_proxy="${no_proxy},${host_ip}"
export LLM_ENDPOINT_PORT=8008
export DOCSUM_PORT=9000
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
export LOGFLAG=True

WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
Expand All @@ -37,10 +41,10 @@ function build_docker_images() {
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../

echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
service_list="docsum docsum-gradio-ui whisper llm-docsum"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
docker images && sleep 1s
}

Expand All @@ -49,15 +53,6 @@ function start_services() {

docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
sleep 3m

until [[ "$n" -ge 100 ]]; do
docker logs tgi-gaudi-server > ${LOG_PATH}/tgi_service_start.log
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
}

get_base64_str() {
Expand Down Expand Up @@ -156,13 +151,13 @@ function validate_microservices() {
validate_services_json \
"${host_ip}:8008/generate" \
"generated_text" \
"tgi-gaudi" \
"tgi-gaudi-server" \
"tgi-gaudi-server" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'

# llm microservice
validate_services_json \
"${host_ip}:9000/v1/chat/docsum" \
"${host_ip}:9000/v1/docsum" \
"data: " \
"llm-docsum-tgi" \
"llm-docsum-gaudi-server" \
Expand Down
Loading

0 comments on commit ff1310b

Please sign in to comment.