diff --git a/.github/workflows/_run-docker-compose.yml b/.github/workflows/_run-docker-compose.yml index daf87add8..a610df424 100644 --- a/.github/workflows/_run-docker-compose.yml +++ b/.github/workflows/_run-docker-compose.yml @@ -134,6 +134,7 @@ jobs: SERVING_TOKEN: ${{ secrets.SERVING_TOKEN }} IMAGE_REPO: ${{ inputs.registry }} IMAGE_TAG: ${{ inputs.tag }} + opea_branch: "llm_openai_api" example: ${{ inputs.example }} hardware: ${{ inputs.hardware }} test_case: ${{ matrix.test_case }} diff --git a/.github/workflows/pr-docker-compose-e2e.yml b/.github/workflows/pr-docker-compose-e2e.yml index fe052f90a..446afa925 100644 --- a/.github/workflows/pr-docker-compose-e2e.yml +++ b/.github/workflows/pr-docker-compose-e2e.yml @@ -4,7 +4,7 @@ name: E2E test with docker compose on: - pull_request_target: + pull_request: branches: ["main", "*rc"] types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped paths: diff --git a/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml b/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml index dc62caaa3..89984e656 100644 --- a/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml +++ b/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml @@ -22,6 +22,7 @@ jobs: run: | cd .. git clone https://github.com/opea-project/GenAIComps.git + cd GenAIComps && git checkout llm_openai_api - name: Check for Missing Dockerfile Paths in GenAIComps run: | diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml index c812b6471..8f3606955 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml @@ -8,7 +8,7 @@ services: ports: - ${LLM_ENDPOINT_PORT:-8008}:80 volumes: - - "./data:/data" + - "${DATA_PATH:-data}:/data" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/DocSum/docsum.py b/DocSum/docsum.py index 1d71f24ad..34e58c1df 100644 --- a/DocSum/docsum.py +++ b/DocSum/docsum.py @@ -15,9 +15,9 @@ ChatCompletionResponse, ChatCompletionResponseChoice, ChatMessage, + DocSumChatCompletionRequest, UsageInfo, ) -from comps.cores.proto.docarray import DocSumLLMParams from fastapi import File, Request, UploadFile from fastapi.responses import StreamingResponse @@ -34,14 +34,20 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k if self.services[cur_node].service_type == ServiceType.LLM: for key_to_replace in ["text", "asr_result"]: if key_to_replace in inputs: - inputs["query"] = inputs[key_to_replace] + inputs["messages"] = inputs[key_to_replace] del inputs[key_to_replace] docsum_parameters = kwargs.get("docsum_parameters", None) if docsum_parameters: docsum_parameters = docsum_parameters.model_dump() - del docsum_parameters["query"] + del docsum_parameters["messages"] inputs.update(docsum_parameters) + if "id" in inputs: + del inputs["id"] + if "max_new_tokens" in inputs: + del inputs["max_new_tokens"] + if "input" in inputs: + del inputs["input"] elif self.services[cur_node].service_type == ServiceType.ASR: if "video" in inputs: audio_base64 = video2audio(inputs["video"]) @@ -217,13 +223,13 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File( initial_inputs_data = {} initial_inputs_data[data_type] = prompt else: - initial_inputs_data = {"query": prompt} + initial_inputs_data = {"messages": prompt} else: raise ValueError(f"Unknown request type: {request.headers.get('content-type')}") - docsum_parameters = DocSumLLMParams( - query="", + docsum_parameters = DocSumChatCompletionRequest( + messages="", max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, top_k=chat_request.top_k if chat_request.top_k else 10, top_p=chat_request.top_p if chat_request.top_p else 0.95, diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh index db0977b04..4683c4acb 100644 --- a/DocSum/tests/test_compose_on_gaudi.sh +++ b/DocSum/tests/test_compose_on_gaudi.sh @@ -28,6 +28,7 @@ export DOCSUM_PORT=9000 export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" export DocSum_COMPONENT_NAME="OpeaDocSumTgi" export LOGFLAG=True +export DATA_PATH="/data/cache" WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" @@ -51,6 +52,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi + sed -i "s|container_name: docsum-gaudi-backend-server|container_name: docsum-gaudi-backend-server\n volumes:\n - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log sleep 3m } @@ -158,10 +160,10 @@ function validate_microservices() { # llm microservice validate_services_json \ "${host_ip}:9000/v1/docsum" \ - "data: " \ + "text" \ "llm-docsum-tgi" \ "llm-docsum-gaudi-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' # whisper microservice ulimit -s 65536 diff --git a/DocSum/tests/test_compose_on_rocm.sh b/DocSum/tests/test_compose_on_rocm.sh index 54935f2b7..e4d579f16 100644 --- a/DocSum/tests/test_compose_on_rocm.sh +++ b/DocSum/tests/test_compose_on_rocm.sh @@ -50,7 +50,7 @@ function build_docker_images() { function start_services() { cd "$WORKPATH"/docker_compose/amd/gpu/rocm sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - + sed -i "s|container_name: docsum-backend-server|container_name: docsum-backend-server\n volumes:\n - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml # Start Docker Containers docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log sleep 3m @@ -138,10 +138,10 @@ function validate_microservices() { # llm microservice validate_services \ "${host_ip}:9000/v1/docsum" \ - "data: " \ + "text" \ "docsum-llm-server" \ "docsum-llm-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' } diff --git a/DocSum/tests/test_compose_on_xeon.sh b/DocSum/tests/test_compose_on_xeon.sh index 13036fc0d..aef3b5db1 100644 --- a/DocSum/tests/test_compose_on_xeon.sh +++ b/DocSum/tests/test_compose_on_xeon.sh @@ -49,7 +49,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - + sed -i "s|container_name: docsum-xeon-backend-server|container_name: docsum-xeon-backend-server\n volumes:\n - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log sleep 3m } @@ -160,10 +160,10 @@ function validate_microservices() { # llm microservice validate_services_json \ "${host_ip}:9000/v1/docsum" \ - "data: " \ + "text" \ "llm-docsum-tgi" \ "llm-docsum-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' # whisper microservice ulimit -s 65536 diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml index 20c2aced1..4d2c767df 100644 --- a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -8,7 +8,7 @@ services: ports: - ${LLM_ENDPOINT_PORT:-8008}:80 volumes: - - "./data:/data" + - "${DATA_PATH:-data}:/data" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/FaqGen/faqgen.py b/FaqGen/faqgen.py index f4b0a5803..01d1e4acb 100644 --- a/FaqGen/faqgen.py +++ b/FaqGen/faqgen.py @@ -113,7 +113,7 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File( model=chat_request.model if chat_request.model else None, ) result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs={"query": prompt}, llm_parameters=parameters + initial_inputs={"messages": prompt}, llm_parameters=parameters ) for node, response in result_dict.items(): # Here it suppose the last microservice in the megaservice is LLM. diff --git a/FaqGen/tests/test_compose_on_gaudi.sh b/FaqGen/tests/test_compose_on_gaudi.sh index 95ed2950a..48241a5d7 100644 --- a/FaqGen/tests/test_compose_on_gaudi.sh +++ b/FaqGen/tests/test_compose_on_gaudi.sh @@ -13,6 +13,7 @@ export TAG=${IMAGE_TAG} WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +export DATA_PATH="/data/cache" function build_docker_images() { cd $WORKPATH/docker_image_build @@ -91,10 +92,10 @@ function validate_microservices() { # llm microservice validate_services \ "${ip_address}:9000/v1/faqgen" \ - "data: " \ + "text" \ "llm" \ "llm-faqgen-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' } function validate_megaservice() { diff --git a/FaqGen/tests/test_compose_on_rocm.sh b/FaqGen/tests/test_compose_on_rocm.sh index 726c83461..5346f4126 100644 --- a/FaqGen/tests/test_compose_on_rocm.sh +++ b/FaqGen/tests/test_compose_on_rocm.sh @@ -95,10 +95,10 @@ function validate_microservices() { # llm microservice validate_services \ "${ip_address}:9000/v1/faqgen" \ - "data: " \ + "text" \ "llm" \ "faqgen-llm-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' } function validate_megaservice() { diff --git a/FaqGen/tests/test_compose_on_xeon.sh b/FaqGen/tests/test_compose_on_xeon.sh index e4409358d..eb7e49b24 100755 --- a/FaqGen/tests/test_compose_on_xeon.sh +++ b/FaqGen/tests/test_compose_on_xeon.sh @@ -91,10 +91,10 @@ function validate_microservices() { # llm microservice validate_services \ "${ip_address}:9000/v1/faqgen" \ - "data: " \ + "text" \ "llm" \ "llm-faqgen-server" \ - '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' } function validate_megaservice() {