diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md new file mode 100644 index 000000000..f527f81db --- /dev/null +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -0,0 +1,179 @@ +# Build and deploy SearchQnA Application on AMD GPU (ROCm) + +## Build images + +### Build Embedding Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/Dockerfile . +``` + +### Build Retriever Image + +```bash +docker build --no-cache -t opea/web-retriever-chroma:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/src/Dockerfile . +``` + +### Build Rerank Image + +```bash +docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/rerankings/src/Dockerfile . +``` + +### Build the LLM Docker Image + +```bash +docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . +``` + +### Build the MegaService Docker Image + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/SearchQnA +docker build --no-cache -t opea/searchqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### Build the UI Docker Image + +```bash +cd GenAIExamples/SearchQnA/ui +docker build --no-cache -t opea/opea/searchqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . +``` + +## Deploy SearchQnA Application + +### Features of Docker compose for AMD GPUs + +1. Added forwarding of GPU devices to the container TGI service with instructions: + +```yaml +shm_size: 1g +devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ +cap_add: + - SYS_PTRACE +group_add: + - video +security_opt: + - seccomp:unconfined +``` + +In this case, all GPUs are thrown. To reset a specific GPU, you need to use specific device names cardN and renderN. + +For example: + +```yaml +shm_size: 1g +devices: + - /dev/kfd:/dev/kfd + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/render128:/dev/dri/render128 +cap_add: + - SYS_PTRACE +group_add: + - video +security_opt: + - seccomp:unconfined +``` + +To find out which GPU device IDs cardN and renderN correspond to the same GPU, use the GPU driver utility + +### Go to the directory with the Docker compose file + +```bash +cd GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm +``` + +### Set environments + +In the file "GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh " it is necessary to set the required values. Parameter assignments are specified in the comments for each variable setting command + +```bash +chmod +x set_env.sh +. set_env.sh +``` + +### Run services + +``` +docker compose up -d +``` + +# Validate the MicroServices and MegaService + +## Validate TEI service + +```bash +curl http://${SEARCH_HOST_IP}:3001/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' +``` + +## Validate Embedding service + +```bash +curl http://${SEARCH_HOST_IP}:3002/v1/embeddings\ + -X POST \ + -d '{"text":"hello"}' \ + -H 'Content-Type: application/json' +``` + +## Validate Web Retriever service + +```bash +export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") +curl http://${SEARCH_HOST_IP}:3003/v1/web_retrieval \ + -X POST \ + -d "{\"text\":\"What is the 2024 holiday schedule?\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' +``` + +## Validate TEI Reranking service + +```bash +curl http://${SEARCH_HOST_IP}:3004/rerank \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ + -H 'Content-Type: application/json' +``` + +## Validate Reranking service + +```bash +curl http://${SEARCH_HOST_IP}:3005/v1/reranking\ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json' +``` + +## Validate TGI service + +```bash +curl http://${SEARCH_HOST_IP}:3006/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' +``` + +## Validate LLM service + +```bash +curl http://${SEARCH_HOST_IP}:3007/v1/chat/completions\ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' +``` + +## Validate MegaService + +```bash +curl http://${SEARCH_HOST_IP}:3008/v1/searchqna -H "Content-Type: application/json" -d '{ + "messages": "What is the latest news? Give me also the source link.", + "stream": "True" + }' +``` diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml b/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml new file mode 100644 index 000000000..f53128127 --- /dev/null +++ b/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -0,0 +1,173 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-License-Identifier: Apache-2.0 + +services: + search-tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: search-tei-embedding-server + ports: + - "3001:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + command: --model-id ${SEARCH_EMBEDDING_MODEL_ID} --auto-truncate + search-embedding: + image: ${REGISTRY:-opea}/embedding:${TAG:-latest} + container_name: search-embedding-server + depends_on: + - search-tei-embedding-service + ports: + - "3002:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_HOST_IP: ${SEARCH_HOST_IP} + TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT} + HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + search-web-retriever: + image: ${REGISTRY:-opea}/web-retriever:${TAG:-latest} + container_name: search-web-retriever-server + ports: + - "3003:7077" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT} + GOOGLE_API_KEY: ${SEARCH_GOOGLE_API_KEY} + GOOGLE_CSE_ID: ${SEARCH_GOOGLE_CSE_ID} + restart: unless-stopped + search-tei-reranking-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: search-tei-reranking-server + ports: + - "3004:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${SEARCH_RERANK_MODEL_ID} --auto-truncate + search-reranking: + image: ${REGISTRY:-opea}/reranking:${TAG:-latest} + container_name: search-reranking-server + depends_on: + - search-tei-reranking-service + ports: + - "3005:8000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_RERANKING_ENDPOINT: ${SEARCH_TEI_RERANKING_ENDPOINT} + HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + search-tgi-service: + image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + container_name: search-tgi-service + ports: + - "3006:80" + volumes: + - "./data:/data" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + shm_size: 1g + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + ipc: host + command: --model-id ${SEARCH_LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 + search-llm: + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} + container_name: search-llm-server + depends_on: + - search-tgi-service + ports: + - "3007:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT} + LLM_MODEL_ID: ${SEARCH_LLM_MODEL_ID} + LLM_MODEL: ${SEARCH_LLM_MODEL_ID} + HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + OPENAI_API_KEY: ${SEARCH_OPENAI_API_KEY} + restart: unless-stopped + search-backend-server: + image: ${REGISTRY:-opea}/searchqna:${TAG:-latest} + container_name: search-backend-server + depends_on: + - search-tei-embedding-service + - search-embedding + - search-web-retriever + - search-tei-reranking-service + - search-reranking + - search-tgi-service + - search-llm + ports: + - "${SEARCH_BACKEND_SERVICE_PORT:-3008}:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${SEARCH_MEGA_SERVICE_HOST_IP} + - EMBEDDING_SERVICE_HOST_IP=${SEARCH_EMBEDDING_SERVICE_HOST_IP} + - WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP} + - RERANK_SERVICE_HOST_IP=${SEARCH_RERANK_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${SEARCH_LLM_SERVICE_HOST_IP} + - EMBEDDING_SERVICE_PORT=${SEARCH_EMBEDDING_SERVICE_PORT} + - WEB_RETRIEVER_SERVICE_PORT=${SEARCH_WEB_RETRIEVER_SERVICE_PORT} + - RERANK_SERVICE_PORT=${SEARCH_RERANK_SERVICE_PORT} + - LLM_SERVICE_PORT=${SEARCH_LLM_SERVICE_PORT} + ipc: host + restart: always + search-ui-server: + image: ${REGISTRY:-opea}/searchqna-ui:${TAG:-latest} + container_name: search-ui-server + depends_on: + - search-backend-server + ports: + - "${SEARCH_FRONTEND_SERVICE_PORT:-5173}:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BACKEND_BASE_URL=${SEARCH_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh new file mode 100644 index 000000000..ca8fd3a82 --- /dev/null +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-License-Identifier: Apache-2.0 + +export SEARCH_HOST_IP=10.53.22.29 +export SEARCH_EXTERNAL_HOST_IP=68.69.180.77 +export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' +export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${SEARCH_HOST_IP}:3001 +export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' +export SEARCH_TEI_RERANKING_ENDPOINT=http://${SEARCH_HOST_IP}:3004 +export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY} + +export SEARCH_TGI_LLM_ENDPOINT=http://${SEARCH_HOST_IP}:3006 +export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3' + +export SEARCH_MEGA_SERVICE_HOST_IP=${SEARCH_EXTERNAL_HOST_IP} +export SEARCH_EMBEDDING_SERVICE_HOST_IP=${SEARCH_HOST_IP} +export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_HOST_IP} +export SEARCH_RERANK_SERVICE_HOST_IP=${SEARCH_HOST_IP} +export SEARCH_LLM_SERVICE_HOST_IP=${SEARCH_HOST_IP} + +export SEARCH_EMBEDDING_SERVICE_PORT=3002 +export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 +export SEARCH_RERANK_SERVICE_PORT=3005 +export SEARCH_LLM_SERVICE_PORT=3007 + +export SEARCH_FRONTEND_SERVICE_PORT=18143 +export SEARCH_BACKEND_SERVICE_PORT=18142 +export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${SEARCH_EXTERNAL_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna + +export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} +export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} diff --git a/SearchQnA/tests/test_compose_on_rocm.sh b/SearchQnA/tests/test_compose_on_rocm.sh new file mode 100644 index 000000000..cebe86133 --- /dev/null +++ b/SearchQnA/tests/test_compose_on_rocm.sh @@ -0,0 +1,137 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH/docker_image_build + git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="searchqna searchqna-ui embedding web-retriever reranking llm-textgen" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/amd/gpu/rocm/ + export SEARCH_HOST_IP=${ip_address} + export SEARCH_EXTERNAL_HOST_IP=${ip_address} + export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' + export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${SEARCH_HOST_IP}:3001 + export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' + export SEARCH_TEI_RERANKING_ENDPOINT=http://${SEARCH_HOST_IP}:3004 + export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY} + export SEARCH_TGI_LLM_ENDPOINT=http://${SEARCH_HOST_IP}:3006 + export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3' + export SEARCH_MEGA_SERVICE_HOST_IP=${SEARCH_EXTERNAL_HOST_IP} + export SEARCH_EMBEDDING_SERVICE_HOST_IP=${SEARCH_HOST_IP} + export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_HOST_IP} + export SEARCH_RERANK_SERVICE_HOST_IP=${SEARCH_HOST_IP} + export SEARCH_LLM_SERVICE_HOST_IP=${SEARCH_HOST_IP} + export SEARCH_EMBEDDING_SERVICE_PORT=3002 + export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 + export SEARCH_RERANK_SERVICE_PORT=3005 + export SEARCH_LLM_SERVICE_PORT=3007 + export SEARCH_FRONTEND_SERVICE_PORT=5173 + export SEARCH_BACKEND_SERVICE_PORT=3008 + export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${SEARCH_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna + export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} + export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} + + sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + + # Start Docker Containers + docker compose up -d > ${LOG_PATH}/start_services_with_compose.log + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs search-tgi-service > $LOG_PATH/search-tgi-service_start.log + if grep -q Connected $LOG_PATH/search-tgi-service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} + + +function validate_megaservice() { + result=$(http_proxy="" curl http://${ip_address}:3008/v1/searchqna -XPOST -d '{"messages": "What is black myth wukong?", "stream": "False"}' -H 'Content-Type: application/json') + echo $result + + if [[ $result == *"the"* ]]; then + docker logs search-web-retriever-server + docker logs search-backend-server + echo "Result correct." + else + docker logs search-web-retriever-server + docker logs search-backend-server + echo "Result wrong." + exit 1 + fi + +} + +function validate_frontend() { + cd $WORKPATH/ui/svelte + local conda_env_name="OPEA_e2e" + export PATH=${HOME}/miniconda3/bin/:$PATH + if conda info --envs | grep -q "$conda_env_name"; then + echo "$conda_env_name exist!" + else + conda create -n ${conda_env_name} python=3.12 -y + fi + source activate ${conda_env_name} + + sed -i "s/localhost/$ip_address/g" playwright.config.ts + + conda install -c conda-forge nodejs=22.6.0 -y + npm install && npm ci && npx playwright install --with-deps + node -v && npm -v && pip list + + exit_status=0 + npx playwright test || exit_status=$? + + if [ $exit_status -ne 0 ]; then + echo "[TEST INFO]: ---------frontend test failed---------" + exit $exit_status + else + echo "[TEST INFO]: ---------frontend test passed---------" + fi +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm/ + docker compose stop && docker compose rm -f +} + +function main() { + + stop_docker + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + start_services + + validate_megaservice + validate_frontend + + stop_docker + echo y | docker system prune + +} + +main