From ff1310b11a49e0a006fd89a70c6dae9f079c066e Mon Sep 17 00:00:00 2001
From: XinyaoWa <xinyao.wang@intel.com>
Date: Mon, 13 Jan 2025 15:49:48 +0800
Subject: [PATCH] Refactor docsum (#1336)

Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
---
 DocSum/docker_compose/amd/gpu/rocm/README.md  |  5 +--
 .../docker_compose/amd/gpu/rocm/compose.yaml  | 16 +++++++--
 .../docker_compose/intel/cpu/xeon/README.md   |  4 +--
 .../intel/cpu/xeon/compose.yaml               | 21 +++++++----
 .../docker_compose/intel/hpu/gaudi/README.md  |  4 +--
 .../intel/hpu/gaudi/compose.yaml              | 36 ++++++++++++-------
 DocSum/docker_compose/set_env.sh              |  6 +++-
 DocSum/docker_image_build/build.yaml          |  6 ++--
 DocSum/docsum.py                              |  2 +-
 DocSum/kubernetes/gmc/docsum_gaudi.yaml       |  2 +-
 DocSum/kubernetes/gmc/docsum_xeon.yaml        |  2 +-
 DocSum/tests/test_compose_on_gaudi.sh         | 23 +++++-------
 DocSum/tests/test_compose_on_rocm.sh          | 17 +++------
 DocSum/tests/test_compose_on_xeon.sh          | 21 +++++------
 .../docker_compose/intel/cpu/xeon/README.md   |  2 +-
 docker_images_list.md                         |  2 +-
 16 files changed, 94 insertions(+), 75 deletions(-)

diff --git a/DocSum/docker_compose/amd/gpu/rocm/README.md b/DocSum/docker_compose/amd/gpu/rocm/README.md
index 0a40d17f3f..b45a496755 100644
--- a/DocSum/docker_compose/amd/gpu/rocm/README.md
+++ b/DocSum/docker_compose/amd/gpu/rocm/README.md
@@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally and install the python pac
 ```bash
 git clone https://github.com/opea-project/GenAIComps.git
 cd GenAIComps
-docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/langchain/Dockerfile .
+docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile .
 ```
 
 Then run the command `docker images`, you will have the following four Docker Images:
@@ -81,6 +81,7 @@ export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export DOCSUM_LLM_SERVER_PORT="8008"
 export DOCSUM_BACKEND_SERVER_PORT="8888"
 export DOCSUM_FRONTEND_PORT="5173"
+export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
 ```
 
 Note: Please replace with `host_ip` with your external IP address, do not use localhost.
@@ -126,7 +127,7 @@ docker compose up -d
 2. LLM Microservice
 
    ```bash
-   curl http://${host_ip}:9000/v1/chat/docsum \
+   curl http://${host_ip}:9000/v1/docsum \
      -X POST \
      -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
      -H 'Content-Type: application/json'
diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml
index fa36310ad3..c7b7c785ac 100644
--- a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml
@@ -13,6 +13,8 @@ services:
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
       HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
+      host_ip: ${host_ip}
+      DOCSUM_TGI_SERVICE_PORT: ${DOCSUM_TGI_SERVICE_PORT}
     volumes:
       - "/var/opea/docsum-service/data:/data"
     shm_size: 1g
@@ -27,13 +29,19 @@ services:
     security_opt:
       - seccomp:unconfined
     ipc: host
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://${host_ip}:${DOCSUM_TGI_SERVICE_PORT}/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
     command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
 
   docsum-llm-server:
-    image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
+    image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
     container_name: docsum-llm-server
     depends_on:
-      - docsum-tgi-service
+      docsum-tgi-service:
+        condition: service_healthy
     ports:
       - "${DOCSUM_LLM_SERVER_PORT}:9000"
     ipc: host
@@ -51,11 +59,13 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
+      LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
       HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
       MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
       MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
       LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
+      DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
+      LOGFLAG: ${LOGFLAG:-False}
     restart: unless-stopped
 
   whisper:
diff --git a/DocSum/docker_compose/intel/cpu/xeon/README.md b/DocSum/docker_compose/intel/cpu/xeon/README.md
index 98aaad9181..9465c0c976 100644
--- a/DocSum/docker_compose/intel/cpu/xeon/README.md
+++ b/DocSum/docker_compose/intel/cpu/xeon/README.md
@@ -123,7 +123,7 @@ You will have the following Docker Images:
 
 1. `opea/docsum-ui:latest`
 2. `opea/docsum:latest`
-3. `opea/llm-docsum-tgi:latest`
+3. `opea/llm-docsum:latest`
 4. `opea/whisper:latest`
 
 ### Validate Microservices
@@ -140,7 +140,7 @@ You will have the following Docker Images:
 2. LLM Microservice
 
    ```bash
-   curl http://${host_ip}:9000/v1/chat/docsum \
+   curl http://${host_ip}:9000/v1/docsum \
      -X POST \
      -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
      -H 'Content-Type: application/json'
diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
index 42e89ee252..2c4344cc23 100644
--- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
@@ -6,36 +6,45 @@ services:
     image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-server
     ports:
-      - "8008:80"
+      - ${LLM_ENDPOINT_PORT:-8008}:80
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      host_ip: ${host_ip}
+      LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
     volumes:
       - "./data:/data"
     shm_size: 1g
     command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0  --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
 
   llm-docsum-tgi:
-    image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
+    image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
     container_name: llm-docsum-server
     depends_on:
-      - tgi-server
+      tgi-server:
+        condition: service_healthy
     ports:
-      - "9000:9000"
+      - ${DOCSUM_PORT:-9000}:9000
     ipc: host
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
       MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      LOGFLAG: True
+      DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
+      LOGFLAG: ${LOGFLAG:-False}
     restart: unless-stopped
 
   whisper:
diff --git a/DocSum/docker_compose/intel/hpu/gaudi/README.md b/DocSum/docker_compose/intel/hpu/gaudi/README.md
index 65a1799d35..d150b3f28e 100644
--- a/DocSum/docker_compose/intel/hpu/gaudi/README.md
+++ b/DocSum/docker_compose/intel/hpu/gaudi/README.md
@@ -115,7 +115,7 @@ You will have the following Docker Images:
 
 1. `opea/docsum-ui:latest`
 2. `opea/docsum:latest`
-3. `opea/llm-docsum-tgi:latest`
+3. `opea/llm-docsum:latest`
 4. `opea/whisper:latest`
 
 ### Validate Microservices
@@ -132,7 +132,7 @@ You will have the following Docker Images:
 2. LLM Microservice
 
    ```bash
-   curl http://${host_ip}:9000/v1/chat/docsum \
+   curl http://${host_ip}:9000/v1/docsum \
      -X POST \
      -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
      -H 'Content-Type: application/json'
diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
index e9ab3e1634..c812b64715 100644
--- a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -2,47 +2,59 @@
 # SPDX-License-Identifier: Apache-2.0
 
 services:
-  tgi-server:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+  tgi-gaudi-server:
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
     container_name: tgi-gaudi-server
     ports:
-      - "8008:80"
+      - ${LLM_ENDPOINT_PORT:-8008}:80
+    volumes:
+      - "./data:/data"
     environment:
-      HABANA_VISIBLE_DEVICES: all
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
       ENABLE_HPU_GRAPH: true
       LIMIT_HPU_GRAPH: true
       USE_FLASH_ATTENTION: true
       FLASH_ATTENTION_RECOMPUTE: true
-    volumes:
-      - "./data:/data"
+      host_ip: ${host_ip}
+      LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
     runtime: habana
     cap_add:
       - SYS_NICE
     ipc: host
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
     command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
 
   llm-docsum-tgi:
-    image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
+    image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
     container_name: llm-docsum-gaudi-server
     depends_on:
-      - tgi-server
+      tgi-gaudi-server:
+        condition: service_healthy
     ports:
-      - "9000:9000"
+      - ${DOCSUM_PORT:-9000}:9000
     ipc: host
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
       MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      LOGFLAG: True
+      DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
+      LOGFLAG: ${LOGFLAG:-False}
     restart: unless-stopped
 
   whisper:
@@ -66,7 +78,7 @@ services:
     image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
     container_name: docsum-gaudi-backend-server
     depends_on:
-      - tgi-server
+      - tgi-gaudi-server
       - llm-docsum-tgi
     ports:
       - "8888:8888"
diff --git a/DocSum/docker_compose/set_env.sh b/DocSum/docker_compose/set_env.sh
index ffe52a04f9..3307955cc8 100644
--- a/DocSum/docker_compose/set_env.sh
+++ b/DocSum/docker_compose/set_env.sh
@@ -10,10 +10,14 @@ export MAX_INPUT_TOKENS=1024
 export MAX_TOTAL_TOKENS=2048
 
 export no_proxy="${no_proxy},${host_ip}"
-export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export ASR_SERVICE_HOST_IP=${host_ip}
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
+
+export LLM_ENDPOINT_PORT=8008
+export DOCSUM_PORT=9000
+export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
+export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml
index 2fa2e0e0d4..095fd28c93 100644
--- a/DocSum/docker_image_build/build.yaml
+++ b/DocSum/docker_image_build/build.yaml
@@ -41,9 +41,9 @@ services:
       dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
     extends: docsum
     image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
-  llm-docsum-tgi:
+  llm-docsum:
     build:
       context: GenAIComps
-      dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
+      dockerfile: comps/llms/src/doc-summarization/Dockerfile
     extends: docsum
-    image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
+    image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
diff --git a/DocSum/docsum.py b/DocSum/docsum.py
index d1689d92a0..1d71f24ad1 100644
--- a/DocSum/docsum.py
+++ b/DocSum/docsum.py
@@ -146,7 +146,7 @@ def add_remote_service(self):
             name="llm",
             host=LLM_SERVICE_HOST_IP,
             port=LLM_SERVICE_PORT,
-            endpoint="/v1/chat/docsum",
+            endpoint="/v1/docsum",
             use_remote_service=True,
             service_type=ServiceType.LLM,
         )
diff --git a/DocSum/kubernetes/gmc/docsum_gaudi.yaml b/DocSum/kubernetes/gmc/docsum_gaudi.yaml
index 9b7a1ef30f..66c55ae92b 100644
--- a/DocSum/kubernetes/gmc/docsum_gaudi.yaml
+++ b/DocSum/kubernetes/gmc/docsum_gaudi.yaml
@@ -23,7 +23,7 @@ spec:
         internalService:
           serviceName: docsum-llm-uservice
           config:
-            endpoint: /v1/chat/docsum
+            endpoint: /v1/docsum
             PORT: "9009"
             TGI_LLM_ENDPOINT: tgi-gaudi-svc
       - name: TgiGaudi
diff --git a/DocSum/kubernetes/gmc/docsum_xeon.yaml b/DocSum/kubernetes/gmc/docsum_xeon.yaml
index 09a72e0f1f..26fe2980f2 100644
--- a/DocSum/kubernetes/gmc/docsum_xeon.yaml
+++ b/DocSum/kubernetes/gmc/docsum_xeon.yaml
@@ -23,7 +23,7 @@ spec:
         internalService:
           serviceName: docsum-llm-uservice
           config:
-            endpoint: /v1/chat/docsum
+            endpoint: /v1/docsum
             PORT: "9009"
             TGI_LLM_ENDPOINT: tgi-svc
       - name: Tgi
diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh
index 6287ade8cf..10e4d0c9fa 100644
--- a/DocSum/tests/test_compose_on_gaudi.sh
+++ b/DocSum/tests/test_compose_on_gaudi.sh
@@ -17,13 +17,17 @@ export TAG=${IMAGE_TAG}
 export MAX_INPUT_TOKENS=2048
 export MAX_TOTAL_TOKENS=4096
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export ASR_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
 export no_proxy="${no_proxy},${host_ip}"
+export LLM_ENDPOINT_PORT=8008
+export DOCSUM_PORT=9000
+export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
+export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
+export LOGFLAG=True
 
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
@@ -37,10 +41,10 @@ function build_docker_images() {
     git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
+    service_list="docsum docsum-gradio-ui whisper llm-docsum"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
     docker images && sleep 1s
 }
 
@@ -49,15 +53,6 @@ function start_services() {
 
     docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
     sleep 3m
-
-    until [[ "$n" -ge 100 ]]; do
-        docker logs tgi-gaudi-server > ${LOG_PATH}/tgi_service_start.log
-        if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
-            break
-        fi
-        sleep 5s
-        n=$((n+1))
-    done
 }
 
 get_base64_str() {
@@ -156,13 +151,13 @@ function validate_microservices() {
     validate_services_json \
         "${host_ip}:8008/generate" \
         "generated_text" \
-        "tgi-gaudi" \
+        "tgi-gaudi-server" \
         "tgi-gaudi-server" \
         '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
 
     # llm microservice
     validate_services_json \
-        "${host_ip}:9000/v1/chat/docsum" \
+        "${host_ip}:9000/v1/docsum" \
         "data: " \
         "llm-docsum-tgi" \
         "llm-docsum-gaudi-server" \
diff --git a/DocSum/tests/test_compose_on_rocm.sh b/DocSum/tests/test_compose_on_rocm.sh
index 5f3083d8fb..dc0baa26cb 100644
--- a/DocSum/tests/test_compose_on_rocm.sh
+++ b/DocSum/tests/test_compose_on_rocm.sh
@@ -22,7 +22,6 @@ export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export HOST_IP=${ip_address}
 export host_ip=${ip_address}
 export DOCSUM_TGI_SERVICE_PORT="8008"
-export DOCSUM_TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export DOCSUM_LLM_SERVER_PORT="9000"
 export DOCSUM_BACKEND_SERVER_PORT="8888"
@@ -33,13 +32,15 @@ export ASR_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"
 export DOCSUM_CARD_ID="card1"
 export DOCSUM_RENDER_ID="renderD136"
+export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
+export LOGFLAG=True
 
 function build_docker_images() {
     cd $WORKPATH/docker_image_build
     git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
+    service_list="docsum docsum-gradio-ui whisper llm-docsum"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-generation-inference:1.4
@@ -52,15 +53,7 @@ function start_services() {
 
     # Start Docker Containers
     docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log
-
-    until [[ "$n" -ge 100 ]]; do
-        docker logs docsum-tgi-service > "${LOG_PATH}"/tgi_service_start.log
-        if grep -q Connected "${LOG_PATH}"/tgi_service_start.log; then
-            break
-        fi
-        sleep 5s
-        n=$((n+1))
-    done
+    sleep 3m
 }
 
 function validate_services() {
@@ -144,7 +137,7 @@ function validate_microservices() {
 
     # llm microservice
     validate_services \
-        "${host_ip}:9000/v1/chat/docsum" \
+        "${host_ip}:9000/v1/docsum" \
         "data: " \
         "docsum-llm-server" \
         "docsum-llm-server" \
diff --git a/DocSum/tests/test_compose_on_xeon.sh b/DocSum/tests/test_compose_on_xeon.sh
index 91d5ece1bd..d353fcefdb 100644
--- a/DocSum/tests/test_compose_on_xeon.sh
+++ b/DocSum/tests/test_compose_on_xeon.sh
@@ -17,13 +17,17 @@ export TAG=${IMAGE_TAG}
 export MAX_INPUT_TOKENS=2048
 export MAX_TOTAL_TOKENS=4096
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export ASR_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
 export no_proxy="${no_proxy},${host_ip}"
+export LLM_ENDPOINT_PORT=8008
+export DOCSUM_PORT=9000
+export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
+export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
+export LOGFLAG=True
 
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
@@ -36,7 +40,7 @@ function build_docker_images() {
     git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
+    service_list="docsum docsum-gradio-ui whisper llm-docsum"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-generation-inference:1.4
@@ -48,15 +52,6 @@ function start_services() {
 
     docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
     sleep 3m
-
-    until [[ "$n" -ge 100 ]]; do
-        docker logs tgi-server > ${LOG_PATH}/tgi_service_start.log
-        if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
-            break
-        fi
-        sleep 5s
-        n=$((n+1))
-    done
 }
 
 get_base64_str() {
@@ -158,13 +153,13 @@ function validate_microservices() {
     validate_services_json \
         "${host_ip}:8008/generate" \
         "generated_text" \
-        "tgi" \
+        "tgi-server" \
         "tgi-server" \
         '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
 
     # llm microservice
     validate_services_json \
-        "${host_ip}:9000/v1/chat/docsum" \
+        "${host_ip}:9000/v1/docsum" \
         "data: " \
         "llm-docsum-tgi" \
         "llm-docsum-server" \
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
index da1e79688a..8faa43e3c2 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
@@ -293,7 +293,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det
 10. DocSum LLM Microservice
 
     ```bash
-    curl http://${host_ip}:9003/v1/chat/docsum\
+    curl http://${host_ip}:9003/v1/docsum\
       -X POST \
       -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \
       -H 'Content-Type: application/json'
diff --git a/docker_images_list.md b/docker_images_list.md
index f3fd5f6c7d..dd934ae827 100644
--- a/docker_images_list.md
+++ b/docker_images_list.md
@@ -68,7 +68,7 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
 | [opea/guardrails]()                                                                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/guardrails/Dockerfile)                          | The docker image exposed the OPEA guardrail microservice to provide content review for GenAI application use                                                                                                           |
 | [opea/guardrails-toxicity-detection](https://hub.docker.com/r/opea/guardrails-toxicity-detection)                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/toxicity_detection/Dockerfile)                  | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use                                                                                                       |
 | [opea/guardrails-pii-detection](https://hub.docker.com/r/opea/guardrails-pii-detection)                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/pii_detection/Dockerfile)                       | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use                                                                                                            |
-| [opea/llm-docsum-tgi](https://hub.docker.com/r/opea/llm-docsum-tgi)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/summarization/tgi/langchain/Dockerfile)                   | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary. |
+| [opea/llm-docsum]()                                                                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/doc-summarization/Dockerfile)                         | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary. |
 | [opea/llm-faqgen]()                                                                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/faq-generation/Dockerfile)                            | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ.          |
 | [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen)                                                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/src/text-generation/Dockerfile)                           | The docker image exposed the OPEA LLM microservice upon TGI docker image for GenAI application use                                                                                                                     |
 | [opea/llava-gaudi](https://hub.docker.com/r/opea/llava-hpu)                                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/src/integrations/dependency/llava/Dockerfile.intel_hpu)   | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi                                                                              |