Skip to content

Commit

Permalink
Merge branch 'main' into melanie/mmqna_ui_port_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
mhbuehler authored Jan 23, 2025
2 parents fa70be2 + 6600c32 commit cebd9be
Show file tree
Hide file tree
Showing 45 changed files with 488 additions and 1,506 deletions.
1 change: 0 additions & 1 deletion .github/workflows/_example-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ jobs:
# Image Build
####################################################################################################
build-images:
if: ${{ !(fromJSON(inputs.test_helmchart)) }}
runs-on: "docker-build-${{ inputs.node }}"
steps:
- name: Clean Up Working Directory
Expand Down
17 changes: 15 additions & 2 deletions .github/workflows/_helm-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ on:
default: "latest"
required: false
type: string
version:
default: "0-latest"
required: false
type: string

jobs:
get-test-case:
Expand Down Expand Up @@ -154,6 +158,13 @@ jobs:
exit 0
fi
for img in `helm template -n $NAMESPACE $RELEASE_NAME oci://ghcr.io/opea-project/charts/${CHART_NAME} -f ${{ inputs.example }}/kubernetes/helm/${value_file} --version ${{ inputs.version }} | grep 'image:' | grep 'opea/' | awk '{print $2}' | xargs`;
do
# increase helm install wait for for vllm-gaudi case
if [[ $img == *"vllm-gaudi"* ]]; then
ROLLOUT_TIMEOUT_SECONDS=900s
fi
done
if ! helm install \
--create-namespace \
--namespace $NAMESPACE \
Expand All @@ -163,9 +174,11 @@ jobs:
--set global.modelUseHostPath=/home/sdp/.cache/huggingface/hub \
--set GOOGLE_API_KEY=${{ env.GOOGLE_API_KEY}} \
--set GOOGLE_CSE_ID=${{ env.GOOGLE_CSE_ID}} \
--set web-retriever.GOOGLE_API_KEY=${{ env.GOOGLE_API_KEY}} \
--set web-retriever.GOOGLE_CSE_ID=${{ env.GOOGLE_CSE_ID}} \
-f ${{ inputs.example }}/kubernetes/helm/${value_file} \
--version 0-latest \
--wait; then
--version ${{ inputs.version }} \
--wait --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
echo "Failed to install chart ${{ inputs.example }}"
echo "skip_validate=true" >> $GITHUB_ENV
.github/workflows/scripts/k8s-utils.sh dump_pods_status $NAMESPACE
Expand Down
9 changes: 7 additions & 2 deletions .github/workflows/scripts/k8s-utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ function dump_pod_log() {
kubectl describe pod $pod_name -n $namespace
echo "-----------------------------------"
echo "#kubectl logs $pod_name -n $namespace"
kubectl logs $pod_name -n $namespace
kubectl logs $pod_name -n $namespace --all-containers --prefix=true
echo "-----------------------------------"
}

Expand Down Expand Up @@ -44,8 +44,13 @@ function dump_pods_status() {

function dump_all_pod_logs() {
namespace=$1
echo "------SUMMARY of POD STATUS in NS $namespace------"
kubectl get pods -n $namespace -o wide
echo "------SUMMARY of SVC STATUS in NS $namespace------"
kubectl get services -n $namespace -o wide
echo "------SUMMARY of endpoint STATUS in NS $namespace------"
kubectl get endpoints -n $namespace -o wide
echo "-----DUMP POD STATUS AND LOG in NS $namespace------"

pods=$(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}')
for pod_name in $pods
do
Expand Down
101 changes: 101 additions & 0 deletions AgentQnA/docker_compose/amd/gpu/rocm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Single node on-prem deployment with Docker Compose on AMD GPU

This example showcases a hierarchical multi-agent system for question-answering applications. We deploy the example on Xeon. For LLMs, we use OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).

## Deployment with docker

1. First, clone this repo.
```
export WORKDIR=<your-work-directory>
cd $WORKDIR
git clone https://github.com/opea-project/GenAIExamples.git
```
2. Set up environment for this example </br>

```
# Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
export host_ip=$(hostname -I | awk '{print $1}')
# if you are in a proxy environment, also set the proxy-related environment variables
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy"
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
#OPANAI_API_KEY if you want to use OpenAI models
export OPENAI_API_KEY=<your-openai-key>
# Set AMD GPU settings
export AGENTQNA_CARD_ID="card1"
export AGENTQNA_RENDER_ID="renderD136"
```

3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)

First, launch the mega-service.

```
cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
bash launch_retrieval_tool.sh
```

Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.

```
bash run_ingest_data.sh
```

4. Launch Tool service
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
```
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
```
5. Launch `Agent` service

```
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
bash launch_agent_service_tgi_rocm.sh
```

6. [Optional] Build `Agent` docker image if pulling images failed.

```
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build -t opea/agent:latest -f comps/agent/src/Dockerfile .
```

## Validate services

First look at logs of the agent docker containers:

```
# worker agent
docker logs rag-agent-endpoint
```

```
# supervisor agent
docker logs react-agent-endpoint
```

You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>

Second, validate worker agent:

```
curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
"query": "Most recent album by Taylor Swift"
}'
```

Third, validate supervisor agent:

```
curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
"query": "Most recent album by Taylor Swift"
}'
```

## How to register your own tools with agent

You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/src/README.md).
38 changes: 8 additions & 30 deletions AgentQnA/kubernetes/helm/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,13 @@
# Accelerate inferencing in heaviest components to improve performance
# by overriding their subchart values

tgi:
vllm:
enabled: true
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.6"
resources:
limits:
habana.ai/gaudi: 4
MAX_INPUT_LENGTH: "4096"
MAX_TOTAL_TOKENS: "8192"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
extraCmdArgs: ["--sharded","true","--num-shard","4"]
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
repository: opea/vllm-gaudi
supervisor:
llm_endpoint_url: http://{{ .Release.Name }}-vllm
ragagent:
llm_endpoint_url: http://{{ .Release.Name }}-vllm
sqlagent:
llm_endpoint_url: http://{{ .Release.Name }}-vllm
16 changes: 7 additions & 9 deletions AgentQnA/tests/step1_build_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,17 @@ function build_vllm_docker_image() {
echo "Building the vllm docker image"
cd $WORKPATH
echo $WORKPATH
if [ ! -d "./vllm" ]; then
echo "clone vllm repo...."
git clone https://github.com/vllm-project/vllm.git
if [ ! -d "./vllm-fork" ]; then
git clone https://github.com/HabanaAI/vllm-fork.git
fi
cd ./vllm
echo "Checking out latest stable release of vllm"
git checkout v0.6.6
docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:comps --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
cd ./vllm-fork
git checkout v0.6.4.post2+Gaudi-1.19.0
docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:ci --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
if [ $? -ne 0 ]; then
echo "opea/vllm-gaudi:comps failed"
echo "opea/vllm-gaudi:ci failed"
exit 1
else
echo "opea/vllm-gaudi:comps successful"
echo "opea/vllm-gaudi:ci successful"
fi
}

Expand Down
8 changes: 5 additions & 3 deletions AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,17 @@ WORKPATH=$(dirname "$PWD")
export WORKDIR=$WORKPATH/../../
echo "WORKDIR=${WORKDIR}"
export ip_address=$(hostname -I | awk '{print $1}')
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
export TOOLSET_PATH=$WORKPATH/tools/
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
model="meta-llama/Meta-Llama-3.1-70B-Instruct"

export HF_CACHE_DIR=$WORKDIR/hf_cache
export HF_CACHE_DIR=/data2/huggingface
if [ ! -d "$HF_CACHE_DIR" ]; then
HF_CACHE_DIR=$WORKDIR/hf_cache
mkdir -p "$HF_CACHE_DIR"
fi
echo "HF_CACHE_DIR=$HF_CACHE_DIR"
ls $HF_CACHE_DIR

vllm_port=8086
Expand All @@ -35,7 +37,7 @@ function start_vllm_service_70B() {

echo "start vllm gaudi service"
echo "**************model is $model**************"
vllm_image=opea/vllm-gaudi:comps
vllm_image=opea/vllm-gaudi:ci
docker run -d --runtime=habana --rm --name "vllm-gaudi-server" -e HABANA_VISIBLE_DEVICES=0,1,2,3 -p $vllm_port:8000 -v $vllm_volume:/data -e HF_TOKEN=$HF_TOKEN -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_HOME=/data -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e VLLM_SKIP_WARMUP=true --cap-add=sys_nice --ipc=host $vllm_image --model ${model} --max-seq-len-to-capture 16384 --tensor-parallel-size 4
sleep 5s
echo "Waiting vllm gaudi ready"
Expand Down
3 changes: 0 additions & 3 deletions AgentQnA/tests/test_compose_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@

set -xe

echo "All running containers"
docker ps

WORKPATH=$(dirname "$PWD")
export WORKDIR=$WORKPATH/../../
echo "WORKDIR=${WORKDIR}"
Expand Down
6 changes: 5 additions & 1 deletion AudioQnA/kubernetes/helm/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ tgi:
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.6"
tag: "2.3.1"
resources:
limits:
habana.ai/gaudi: 1
Expand Down Expand Up @@ -33,11 +33,15 @@ tgi:
failureThreshold: 120

whisper:
image:
repository: opea/whisper-gaudi
resources:
limits:
habana.ai/gaudi: 1

speecht5:
image:
repository: opea/speecht5-gaudi
resources:
limits:
habana.ai/gaudi: 1
2 changes: 1 addition & 1 deletion ChatQnA/docker_compose/intel/cpu/xeon/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
1. TEI Embedding Service

```bash
curl ${host_ip}:6006/embed \
curl http://${host_ip}:6006/embed \
-X POST \
-d '{"inputs":"What is Deep Learning?"}' \
-H 'Content-Type: application/json'
Expand Down
Loading

0 comments on commit cebd9be

Please sign in to comment.