Align OpenAI API for FaqGen, DocSum

Algin all the inputs to OpenAI API format for FaqGen, DocSum, related with GenAIComps PR opea-project/GenAIComps#1161 Signed-off-by: Xinyao Wang <[email protected]>
opea-project · Jan 17, 2025 · 1df6f92 · 1df6f92
1 parent 71e3c57
commit 1df6f92
Show file tree

Hide file tree

Showing 13 changed files with 35 additions and 24 deletions.
diff --git a/.github/workflows/_run-docker-compose.yml b/.github/workflows/_run-docker-compose.yml
@@ -134,6 +134,7 @@ jobs:
           SERVING_TOKEN: ${{ secrets.SERVING_TOKEN }}
           IMAGE_REPO: ${{ inputs.registry }}
           IMAGE_TAG: ${{ inputs.tag }}
+          opea_branch: "llm_openai_api"
           example: ${{ inputs.example }}
           hardware: ${{ inputs.hardware }}
           test_case: ${{ matrix.test_case }}

diff --git a/.github/workflows/pr-docker-compose-e2e.yml b/.github/workflows/pr-docker-compose-e2e.yml
@@ -4,7 +4,7 @@
 name: E2E test with docker compose
 
 on:
-  pull_request_target:
+  pull_request:
     branches: ["main", "*rc"]
     types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
     paths:

diff --git a/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml b/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml
@@ -22,6 +22,7 @@ jobs:
         run: |
           cd ..
           git clone https://github.com/opea-project/GenAIComps.git
+          cd GenAIComps && git checkout llm_openai_api
 
       - name: Check for Missing Dockerfile Paths in GenAIComps
         run: |

diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -8,7 +8,7 @@ services:
     ports:
       - ${LLM_ENDPOINT_PORT:-8008}:80
     volumes:
-      - "./data:/data"
+      - "${DATA_PATH:-data}:/data"
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}

diff --git a/DocSum/docsum.py b/DocSum/docsum.py
@@ -15,9 +15,9 @@
     ChatCompletionResponse,
     ChatCompletionResponseChoice,
     ChatMessage,
+    DocSumChatCompletionRequest,
     UsageInfo,
 )
-from comps.cores.proto.docarray import DocSumLLMParams
 from fastapi import File, Request, UploadFile
 from fastapi.responses import StreamingResponse
 
@@ -34,14 +34,20 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
     if self.services[cur_node].service_type == ServiceType.LLM:
         for key_to_replace in ["text", "asr_result"]:
             if key_to_replace in inputs:
-                inputs["query"] = inputs[key_to_replace]
+                inputs["messages"] = inputs[key_to_replace]
                 del inputs[key_to_replace]
 
         docsum_parameters = kwargs.get("docsum_parameters", None)
         if docsum_parameters:
             docsum_parameters = docsum_parameters.model_dump()
-            del docsum_parameters["query"]
+            del docsum_parameters["messages"]
             inputs.update(docsum_parameters)
+        if "id" in inputs:
+            del inputs["id"]
+        if "max_new_tokens" in inputs:
+            del inputs["max_new_tokens"]
+        if "input" in inputs:
+            del inputs["input"]
     elif self.services[cur_node].service_type == ServiceType.ASR:
         if "video" in inputs:
             audio_base64 = video2audio(inputs["video"])
@@ -217,13 +223,13 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File(
                 initial_inputs_data = {}
                 initial_inputs_data[data_type] = prompt
             else:
-                initial_inputs_data = {"query": prompt}
+                initial_inputs_data = {"messages": prompt}
 
         else:
             raise ValueError(f"Unknown request type: {request.headers.get('content-type')}")
 
-        docsum_parameters = DocSumLLMParams(
-            query="",
+        docsum_parameters = DocSumChatCompletionRequest(
+            messages="",
             max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
             top_k=chat_request.top_k if chat_request.top_k else 10,
             top_p=chat_request.top_p if chat_request.top_p else 0.95,

diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh
@@ -28,6 +28,7 @@ export DOCSUM_PORT=9000
 export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
 export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
 export LOGFLAG=True
+export DATA_PATH="/data/cache"
 
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
@@ -51,6 +52,7 @@ function build_docker_images() {
 function start_services() {
     cd $WORKPATH/docker_compose/intel/hpu/gaudi
 
+    sed -i "s|container_name: docsum-gaudi-backend-server|container_name: docsum-gaudi-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
     sleep 3m
 }
@@ -158,10 +160,10 @@ function validate_microservices() {
     # llm microservice
     validate_services_json \
         "${host_ip}:9000/v1/docsum" \
-        "data: " \
+        "text" \
         "llm-docsum-tgi" \
         "llm-docsum-gaudi-server" \
-        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
 
     # whisper microservice
     ulimit -s 65536

diff --git a/DocSum/tests/test_compose_on_rocm.sh b/DocSum/tests/test_compose_on_rocm.sh
@@ -50,7 +50,7 @@ function build_docker_images() {
 function start_services() {
     cd "$WORKPATH"/docker_compose/amd/gpu/rocm
     sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env
-
+    sed -i "s|container_name: docsum-backend-server|container_name: docsum-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     # Start Docker Containers
     docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log
     sleep 3m
@@ -138,10 +138,10 @@ function validate_microservices() {
     # llm microservice
     validate_services \
         "${host_ip}:9000/v1/docsum" \
-        "data: " \
+        "text" \
         "docsum-llm-server" \
         "docsum-llm-server" \
-        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
 
 }
 

diff --git a/DocSum/tests/test_compose_on_xeon.sh b/DocSum/tests/test_compose_on_xeon.sh
@@ -49,7 +49,7 @@ function build_docker_images() {
 
 function start_services() {
     cd $WORKPATH/docker_compose/intel/cpu/xeon/
-
+    sed -i "s|container_name: docsum-xeon-backend-server|container_name: docsum-xeon-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
     sleep 3m
 }
@@ -160,10 +160,10 @@ function validate_microservices() {
     # llm microservice
     validate_services_json \
         "${host_ip}:9000/v1/docsum" \
-        "data: " \
+        "text" \
         "llm-docsum-tgi" \
         "llm-docsum-server" \
-        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
 
     # whisper microservice
     ulimit -s 65536

diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -8,7 +8,7 @@ services:
     ports:
       - ${LLM_ENDPOINT_PORT:-8008}:80
     volumes:
-      - "./data:/data"
+      - "${DATA_PATH:-data}:/data"
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}

diff --git a/FaqGen/faqgen.py b/FaqGen/faqgen.py
@@ -113,7 +113,7 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File(
             model=chat_request.model if chat_request.model else None,
         )
         result_dict, runtime_graph = await self.megaservice.schedule(
-            initial_inputs={"query": prompt}, llm_parameters=parameters
+            initial_inputs={"messages": prompt}, llm_parameters=parameters
         )
         for node, response in result_dict.items():
             # Here it suppose the last microservice in the megaservice is LLM.

diff --git a/FaqGen/tests/test_compose_on_gaudi.sh b/FaqGen/tests/test_compose_on_gaudi.sh
@@ -13,6 +13,7 @@ export TAG=${IMAGE_TAG}
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
+export DATA_PATH="/data/cache"
 
 function build_docker_images() {
     cd $WORKPATH/docker_image_build
@@ -91,10 +92,10 @@ function validate_microservices() {
     # llm microservice
     validate_services \
         "${ip_address}:9000/v1/faqgen" \
-        "data: " \
+        "text" \
         "llm" \
         "llm-faqgen-server" \
-        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
 }
 
 function validate_megaservice() {

diff --git a/FaqGen/tests/test_compose_on_rocm.sh b/FaqGen/tests/test_compose_on_rocm.sh
@@ -95,10 +95,10 @@ function validate_microservices() {
     # llm microservice
     validate_services \
         "${ip_address}:9000/v1/faqgen" \
-        "data: " \
+        "text" \
         "llm" \
         "faqgen-llm-server" \
-        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
 }
 
 function validate_megaservice() {

diff --git a/FaqGen/tests/test_compose_on_xeon.sh b/FaqGen/tests/test_compose_on_xeon.sh
@@ -91,10 +91,10 @@ function validate_microservices() {
     # llm microservice
     validate_services \
         "${ip_address}:9000/v1/faqgen" \
-        "data: " \
+        "text" \
         "llm" \
         "llm-faqgen-server" \
-        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
 }
 
 function validate_megaservice() {
-Original file line number
+Diff line change
@@ Expand Up / @@ -22,6 +22,7 @@ jobs: @@
             run: |
               cd ..
               git clone https://github.com/opea-project/GenAIComps.git
+              cd GenAIComps && git checkout llm_openai_api
           - name: Check for Missing Dockerfile Paths in GenAIComps
             run: |
@@ Expand Down @@