From ff1310b11a49e0a006fd89a70c6dae9f079c066e Mon Sep 17 00:00:00 2001 From: XinyaoWa Date: Mon, 13 Jan 2025 15:49:48 +0800 Subject: [PATCH] Refactor docsum (#1336) Signed-off-by: Xinyao Wang --- DocSum/docker_compose/amd/gpu/rocm/README.md | 5 +-- .../docker_compose/amd/gpu/rocm/compose.yaml | 16 +++++++-- .../docker_compose/intel/cpu/xeon/README.md | 4 +-- .../intel/cpu/xeon/compose.yaml | 21 +++++++---- .../docker_compose/intel/hpu/gaudi/README.md | 4 +-- .../intel/hpu/gaudi/compose.yaml | 36 ++++++++++++------- DocSum/docker_compose/set_env.sh | 6 +++- DocSum/docker_image_build/build.yaml | 6 ++-- DocSum/docsum.py | 2 +- DocSum/kubernetes/gmc/docsum_gaudi.yaml | 2 +- DocSum/kubernetes/gmc/docsum_xeon.yaml | 2 +- DocSum/tests/test_compose_on_gaudi.sh | 23 +++++------- DocSum/tests/test_compose_on_rocm.sh | 17 +++------ DocSum/tests/test_compose_on_xeon.sh | 21 +++++------ .../docker_compose/intel/cpu/xeon/README.md | 2 +- docker_images_list.md | 2 +- 16 files changed, 94 insertions(+), 75 deletions(-) diff --git a/DocSum/docker_compose/amd/gpu/rocm/README.md b/DocSum/docker_compose/amd/gpu/rocm/README.md index 0a40d17f3f..b45a496755 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/README.md +++ b/DocSum/docker_compose/amd/gpu/rocm/README.md @@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally and install the python pac ```bash git clone https://github.com/opea-project/GenAIComps.git cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/langchain/Dockerfile . +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . ``` Then run the command `docker images`, you will have the following four Docker Images: @@ -81,6 +81,7 @@ export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} export DOCSUM_LLM_SERVER_PORT="8008" export DOCSUM_BACKEND_SERVER_PORT="8888" export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OPEADocSum_TGI" ``` Note: Please replace with `host_ip` with your external IP address, do not use localhost. @@ -126,7 +127,7 @@ docker compose up -d 2. LLM Microservice ```bash - curl http://${host_ip}:9000/v1/chat/docsum \ + curl http://${host_ip}:9000/v1/docsum \ -X POST \ -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ -H 'Content-Type: application/json' diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml index fa36310ad3..c7b7c785ac 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml @@ -13,6 +13,8 @@ services: https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + host_ip: ${host_ip} + DOCSUM_TGI_SERVICE_PORT: ${DOCSUM_TGI_SERVICE_PORT} volumes: - "/var/opea/docsum-service/data:/data" shm_size: 1g @@ -27,13 +29,19 @@ services: security_opt: - seccomp:unconfined ipc: host + healthcheck: + test: ["CMD-SHELL", "curl -f http://${host_ip}:${DOCSUM_TGI_SERVICE_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS} docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest} + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} container_name: docsum-llm-server depends_on: - - docsum-tgi-service + docsum-tgi-service: + condition: service_healthy ports: - "${DOCSUM_LLM_SERVER_PORT}:9000" ipc: host @@ -51,11 +59,13 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME} + LOGFLAG: ${LOGFLAG:-False} restart: unless-stopped whisper: diff --git a/DocSum/docker_compose/intel/cpu/xeon/README.md b/DocSum/docker_compose/intel/cpu/xeon/README.md index 98aaad9181..9465c0c976 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/README.md +++ b/DocSum/docker_compose/intel/cpu/xeon/README.md @@ -123,7 +123,7 @@ You will have the following Docker Images: 1. `opea/docsum-ui:latest` 2. `opea/docsum:latest` -3. `opea/llm-docsum-tgi:latest` +3. `opea/llm-docsum:latest` 4. `opea/whisper:latest` ### Validate Microservices @@ -140,7 +140,7 @@ You will have the following Docker Images: 2. LLM Microservice ```bash - curl http://${host_ip}:9000/v1/chat/docsum \ + curl http://${host_ip}:9000/v1/docsum \ -X POST \ -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ -H 'Content-Type: application/json' diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml index 42e89ee252..2c4344cc23 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml @@ -6,36 +6,45 @@ services: image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-server ports: - - "8008:80" + - ${LLM_ENDPOINT_PORT:-8008}:80 environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + host_ip: ${host_ip} + LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} + healthcheck: + test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 volumes: - "./data:/data" shm_size: 1g command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS} llm-docsum-tgi: - image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest} + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} container_name: llm-docsum-server depends_on: - - tgi-server + tgi-server: + condition: service_healthy ports: - - "9000:9000" + - ${DOCSUM_PORT:-9000}:9000 ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_ENDPOINT: ${LLM_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} LLM_MODEL_ID: ${LLM_MODEL_ID} - LOGFLAG: True + DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME} + LOGFLAG: ${LOGFLAG:-False} restart: unless-stopped whisper: diff --git a/DocSum/docker_compose/intel/hpu/gaudi/README.md b/DocSum/docker_compose/intel/hpu/gaudi/README.md index 65a1799d35..d150b3f28e 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/README.md +++ b/DocSum/docker_compose/intel/hpu/gaudi/README.md @@ -115,7 +115,7 @@ You will have the following Docker Images: 1. `opea/docsum-ui:latest` 2. `opea/docsum:latest` -3. `opea/llm-docsum-tgi:latest` +3. `opea/llm-docsum:latest` 4. `opea/whisper:latest` ### Validate Microservices @@ -132,7 +132,7 @@ You will have the following Docker Images: 2. LLM Microservice ```bash - curl http://${host_ip}:9000/v1/chat/docsum \ + curl http://${host_ip}:9000/v1/docsum \ -X POST \ -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ -H 'Content-Type: application/json' diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml index e9ab3e1634..c812b64715 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml @@ -2,47 +2,59 @@ # SPDX-License-Identifier: Apache-2.0 services: - tgi-server: - image: ghcr.io/huggingface/tgi-gaudi:2.0.6 + tgi-gaudi-server: + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 container_name: tgi-gaudi-server ports: - - "8008:80" + - ${LLM_ENDPOINT_PORT:-8008}:80 + volumes: + - "./data:/data" environment: - HABANA_VISIBLE_DEVICES: all no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none ENABLE_HPU_GRAPH: true LIMIT_HPU_GRAPH: true USE_FLASH_ATTENTION: true FLASH_ATTENTION_RECOMPUTE: true - volumes: - - "./data:/data" + host_ip: ${host_ip} + LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} runtime: habana cap_add: - SYS_NICE ipc: host + healthcheck: + test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS} llm-docsum-tgi: - image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest} + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} container_name: llm-docsum-gaudi-server depends_on: - - tgi-server + tgi-gaudi-server: + condition: service_healthy ports: - - "9000:9000" + - ${DOCSUM_PORT:-9000}:9000 ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} + LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} - LOGFLAG: True + DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME} + LOGFLAG: ${LOGFLAG:-False} restart: unless-stopped whisper: @@ -66,7 +78,7 @@ services: image: ${REGISTRY:-opea}/docsum:${TAG:-latest} container_name: docsum-gaudi-backend-server depends_on: - - tgi-server + - tgi-gaudi-server - llm-docsum-tgi ports: - "8888:8888" diff --git a/DocSum/docker_compose/set_env.sh b/DocSum/docker_compose/set_env.sh index ffe52a04f9..3307955cc8 100644 --- a/DocSum/docker_compose/set_env.sh +++ b/DocSum/docker_compose/set_env.sh @@ -10,10 +10,14 @@ export MAX_INPUT_TOKENS=1024 export MAX_TOTAL_TOKENS=2048 export no_proxy="${no_proxy},${host_ip}" -export TGI_LLM_ENDPOINT="http://${host_ip}:8008" export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export ASR_SERVICE_HOST_IP=${host_ip} export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" + +export LLM_ENDPOINT_PORT=8008 +export DOCSUM_PORT=9000 +export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" +export DocSum_COMPONENT_NAME="OPEADocSum_TGI" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index 2fa2e0e0d4..095fd28c93 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -41,9 +41,9 @@ services: dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile extends: docsum image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - llm-docsum-tgi: + llm-docsum: build: context: GenAIComps - dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile + dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum - image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest} + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} diff --git a/DocSum/docsum.py b/DocSum/docsum.py index d1689d92a0..1d71f24ad1 100644 --- a/DocSum/docsum.py +++ b/DocSum/docsum.py @@ -146,7 +146,7 @@ def add_remote_service(self): name="llm", host=LLM_SERVICE_HOST_IP, port=LLM_SERVICE_PORT, - endpoint="/v1/chat/docsum", + endpoint="/v1/docsum", use_remote_service=True, service_type=ServiceType.LLM, ) diff --git a/DocSum/kubernetes/gmc/docsum_gaudi.yaml b/DocSum/kubernetes/gmc/docsum_gaudi.yaml index 9b7a1ef30f..66c55ae92b 100644 --- a/DocSum/kubernetes/gmc/docsum_gaudi.yaml +++ b/DocSum/kubernetes/gmc/docsum_gaudi.yaml @@ -23,7 +23,7 @@ spec: internalService: serviceName: docsum-llm-uservice config: - endpoint: /v1/chat/docsum + endpoint: /v1/docsum PORT: "9009" TGI_LLM_ENDPOINT: tgi-gaudi-svc - name: TgiGaudi diff --git a/DocSum/kubernetes/gmc/docsum_xeon.yaml b/DocSum/kubernetes/gmc/docsum_xeon.yaml index 09a72e0f1f..26fe2980f2 100644 --- a/DocSum/kubernetes/gmc/docsum_xeon.yaml +++ b/DocSum/kubernetes/gmc/docsum_xeon.yaml @@ -23,7 +23,7 @@ spec: internalService: serviceName: docsum-llm-uservice config: - endpoint: /v1/chat/docsum + endpoint: /v1/docsum PORT: "9009" TGI_LLM_ENDPOINT: tgi-svc - name: Tgi diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh index 6287ade8cf..10e4d0c9fa 100644 --- a/DocSum/tests/test_compose_on_gaudi.sh +++ b/DocSum/tests/test_compose_on_gaudi.sh @@ -17,13 +17,17 @@ export TAG=${IMAGE_TAG} export MAX_INPUT_TOKENS=2048 export MAX_TOTAL_TOKENS=4096 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export TGI_LLM_ENDPOINT="http://${host_ip}:8008" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export ASR_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" export no_proxy="${no_proxy},${host_ip}" +export LLM_ENDPOINT_PORT=8008 +export DOCSUM_PORT=9000 +export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" +export DocSum_COMPONENT_NAME="OPEADocSum_TGI" +export LOGFLAG=True WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" @@ -37,10 +41,10 @@ function build_docker_images() { git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi" + service_list="docsum docsum-gradio-ui whisper llm-docsum" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 + docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 docker images && sleep 1s } @@ -49,15 +53,6 @@ function start_services() { docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log sleep 3m - - until [[ "$n" -ge 100 ]]; do - docker logs tgi-gaudi-server > ${LOG_PATH}/tgi_service_start.log - if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then - break - fi - sleep 5s - n=$((n+1)) - done } get_base64_str() { @@ -156,13 +151,13 @@ function validate_microservices() { validate_services_json \ "${host_ip}:8008/generate" \ "generated_text" \ - "tgi-gaudi" \ + "tgi-gaudi-server" \ "tgi-gaudi-server" \ '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' # llm microservice validate_services_json \ - "${host_ip}:9000/v1/chat/docsum" \ + "${host_ip}:9000/v1/docsum" \ "data: " \ "llm-docsum-tgi" \ "llm-docsum-gaudi-server" \ diff --git a/DocSum/tests/test_compose_on_rocm.sh b/DocSum/tests/test_compose_on_rocm.sh index 5f3083d8fb..dc0baa26cb 100644 --- a/DocSum/tests/test_compose_on_rocm.sh +++ b/DocSum/tests/test_compose_on_rocm.sh @@ -22,7 +22,6 @@ export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export HOST_IP=${ip_address} export host_ip=${ip_address} export DOCSUM_TGI_SERVICE_PORT="8008" -export DOCSUM_TGI_LLM_ENDPOINT="http://${host_ip}:8008" export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export DOCSUM_LLM_SERVER_PORT="9000" export DOCSUM_BACKEND_SERVER_PORT="8888" @@ -33,13 +32,15 @@ export ASR_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum" export DOCSUM_CARD_ID="card1" export DOCSUM_RENDER_ID="renderD136" +export DocSum_COMPONENT_NAME="OPEADocSum_TGI" +export LOGFLAG=True function build_docker_images() { cd $WORKPATH/docker_image_build git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi" + service_list="docsum docsum-gradio-ui whisper llm-docsum" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-generation-inference:1.4 @@ -52,15 +53,7 @@ function start_services() { # Start Docker Containers docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - - until [[ "$n" -ge 100 ]]; do - docker logs docsum-tgi-service > "${LOG_PATH}"/tgi_service_start.log - if grep -q Connected "${LOG_PATH}"/tgi_service_start.log; then - break - fi - sleep 5s - n=$((n+1)) - done + sleep 3m } function validate_services() { @@ -144,7 +137,7 @@ function validate_microservices() { # llm microservice validate_services \ - "${host_ip}:9000/v1/chat/docsum" \ + "${host_ip}:9000/v1/docsum" \ "data: " \ "docsum-llm-server" \ "docsum-llm-server" \ diff --git a/DocSum/tests/test_compose_on_xeon.sh b/DocSum/tests/test_compose_on_xeon.sh index 91d5ece1bd..d353fcefdb 100644 --- a/DocSum/tests/test_compose_on_xeon.sh +++ b/DocSum/tests/test_compose_on_xeon.sh @@ -17,13 +17,17 @@ export TAG=${IMAGE_TAG} export MAX_INPUT_TOKENS=2048 export MAX_TOTAL_TOKENS=4096 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export TGI_LLM_ENDPOINT="http://${host_ip}:8008" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export ASR_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" export no_proxy="${no_proxy},${host_ip}" +export LLM_ENDPOINT_PORT=8008 +export DOCSUM_PORT=9000 +export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" +export DocSum_COMPONENT_NAME="OPEADocSum_TGI" +export LOGFLAG=True WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" @@ -36,7 +40,7 @@ function build_docker_images() { git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi" + service_list="docsum docsum-gradio-ui whisper llm-docsum" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-generation-inference:1.4 @@ -48,15 +52,6 @@ function start_services() { docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log sleep 3m - - until [[ "$n" -ge 100 ]]; do - docker logs tgi-server > ${LOG_PATH}/tgi_service_start.log - if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then - break - fi - sleep 5s - n=$((n+1)) - done } get_base64_str() { @@ -158,13 +153,13 @@ function validate_microservices() { validate_services_json \ "${host_ip}:8008/generate" \ "generated_text" \ - "tgi" \ + "tgi-server" \ "tgi-server" \ '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' # llm microservice validate_services_json \ - "${host_ip}:9000/v1/chat/docsum" \ + "${host_ip}:9000/v1/docsum" \ "data: " \ "llm-docsum-tgi" \ "llm-docsum-server" \ diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md index da1e79688a..8faa43e3c2 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md @@ -293,7 +293,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det 10. DocSum LLM Microservice ```bash - curl http://${host_ip}:9003/v1/chat/docsum\ + curl http://${host_ip}:9003/v1/docsum\ -X POST \ -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \ -H 'Content-Type: application/json' diff --git a/docker_images_list.md b/docker_images_list.md index f3fd5f6c7d..dd934ae827 100644 --- a/docker_images_list.md +++ b/docker_images_list.md @@ -68,7 +68,7 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the | [opea/guardrails]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/guardrails/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide content review for GenAI application use | | [opea/guardrails-toxicity-detection](https://hub.docker.com/r/opea/guardrails-toxicity-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/toxicity_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use | | [opea/guardrails-pii-detection](https://hub.docker.com/r/opea/guardrails-pii-detection) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/pii_detection/Dockerfile) | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use | -| [opea/llm-docsum-tgi](https://hub.docker.com/r/opea/llm-docsum-tgi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/summarization/tgi/langchain/Dockerfile) | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary. | +| [opea/llm-docsum]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/doc-summarization/Dockerfile) | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary. | | [opea/llm-faqgen]() | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/faq-generation/Dockerfile) | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ. | | [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/text-generation/Dockerfile) | The docker image exposed the OPEA LLM microservice upon TGI docker image for GenAI application use | | [opea/llava-gaudi](https://hub.docker.com/r/opea/llava-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi |