Merge remote-tracking branch 'opea-origin/main' into feature/GenAIExa…

…mple_SearchQnA_deploy_on_AMD # Conflicts: # DocSum/docker_compose/amd/gpu/rocm/compose.yaml
opea-project · Jan 15, 2025 · 4dc17e2 · 4dc17e2
2 parents 085f389 + 9812c2f
commit 4dc17e2
Show file tree

Hide file tree

Showing 201 changed files with 1,267 additions and 15,389 deletions.
diff --git a/.github/workflows/_example-workflow.yml b/.github/workflows/_example-workflow.yml
@@ -79,7 +79,6 @@ jobs:
           fi
           if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
                git clone https://github.com/HabanaAI/vllm-fork.git
-               cd vllm-fork && git checkout 3c39626 && cd ../
           fi
           git clone https://github.com/opea-project/GenAIComps.git
           cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../

diff --git a/.github/workflows/check-online-doc-build.yml b/.github/workflows/check-online-doc-build.yml
@@ -13,7 +13,7 @@ on:
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
 
     - name: Checkout

diff --git a/.github/workflows/push-infra-issue-creation.yml b/.github/workflows/push-infra-issue-creation.yml
@@ -40,7 +40,7 @@ jobs:
       - name: Create Issue
         uses: daisy-ycguo/create-issue-action@stable
         with:
-          token: ${{ secrets.Infra_Issue_Token }}
+          token: ${{ secrets.ACTION_TOKEN }}
           owner: opea-project
           repo: GenAIInfra
           title: |

diff --git a/AgentQnA/README.md b/AgentQnA/README.md
@@ -186,6 +186,10 @@ docker build -t opea/agent:latest --build-arg https_proxy=$https_proxy --build-a
    :::
    ::::
 
+## Deploy using Helm Chart
+
+Refer to the [AgentQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AgentQnA on Kubernetes.
+
 ## Validate services
 
 First look at logs of the agent docker containers:

diff --git a/AgentQnA/kubernetes/helm/README.md b/AgentQnA/kubernetes/helm/README.md
@@ -0,0 +1,11 @@
+# Deploy AgentQnA on Kubernetes cluster
+
+- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
+- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
+
+## Deploy on Gaudi
+
+```
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install agentqna oci://ghcr.io/opea-project/charts/agentqna  --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
+```
diff --git a/AgentQnA/kubernetes/helm/gaudi-values.yaml b/AgentQnA/kubernetes/helm/gaudi-values.yaml
@@ -0,0 +1,38 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Accelerate inferencing in heaviest components to improve performance
+# by overriding their subchart values
+
+tgi:
+  enabled: true
+  accelDevice: "gaudi"
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.0.6"
+  resources:
+    limits:
+      habana.ai/gaudi: 4
+  MAX_INPUT_LENGTH: "4096"
+  MAX_TOTAL_TOKENS: "8192"
+  CUDA_GRAPHS: ""
+  OMPI_MCA_btl_vader_single_copy_mechanism: "none"
+  PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
+  ENABLE_HPU_GRAPH: "true"
+  LIMIT_HPU_GRAPH: "true"
+  USE_FLASH_ATTENTION: "true"
+  FLASH_ATTENTION_RECOMPUTE: "true"
+  extraCmdArgs: ["--sharded","true","--num-shard","4"]
+  livenessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  startupProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+    failureThreshold: 120
diff --git a/AudioQnA/README.md b/AudioQnA/README.md
@@ -71,6 +71,10 @@ Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instr
 
 Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions on deploying AudioQnA on Xeon.
 
+## Deploy using Helm Chart
+
+Refer to the [AudioQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AudioQnA on Kubernetes.
+
 ## Supported Models
 
 ### ASR

diff --git a/AudioQnA/kubernetes/intel/README_gmc.md → AudioQnA/kubernetes/gmc/README.md b/AudioQnA/kubernetes/intel/README_gmc.md → AudioQnA/kubernetes/gmc/README.md
diff --git a/...s/intel/hpu/gaudi/gmc/audioQnA_gaudi.yaml → AudioQnA/kubernetes/gmc/audioQnA_gaudi.yaml b/...s/intel/hpu/gaudi/gmc/audioQnA_gaudi.yaml → AudioQnA/kubernetes/gmc/audioQnA_gaudi.yaml
diff --git a/...tes/intel/cpu/xeon/gmc/audioQnA_xeon.yaml → AudioQnA/kubernetes/gmc/audioQnA_xeon.yaml b/...tes/intel/cpu/xeon/gmc/audioQnA_xeon.yaml → AudioQnA/kubernetes/gmc/audioQnA_xeon.yaml
diff --git a/AudioQnA/kubernetes/helm/README.md b/AudioQnA/kubernetes/helm/README.md
@@ -0,0 +1,18 @@
+# Deploy AudioQnA on Kubernetes cluster
+
+- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
+- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
+
+## Deploy on Xeon
+
+```
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install audioqna oci://ghcr.io/opea-project/charts/audioqna  --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
+```
+
+## Deploy on Gaudi
+
+```
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install audioqna oci://ghcr.io/opea-project/charts/audioqna  --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
+```
diff --git a/AudioQnA/kubernetes/helm/cpu-values.yaml b/AudioQnA/kubernetes/helm/cpu-values.yaml
@@ -0,0 +1,5 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
diff --git a/AudioQnA/kubernetes/helm/gaudi-values.yaml b/AudioQnA/kubernetes/helm/gaudi-values.yaml
@@ -0,0 +1,43 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  accelDevice: "gaudi"
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.0.6"
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
+  HF_HUB_DISABLE_PROGRESS_BARS: 1
+  HF_HUB_ENABLE_HF_TRANSFER: 0
+  ENABLE_HPU_GRAPH: true
+  LIMIT_HPU_GRAPH: true
+  USE_FLASH_ATTENTION: true
+  FLASH_ATTENTION_RECOMPUTE: true
+  livenessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  startupProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+    failureThreshold: 120
+
+whisper:
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+
+speecht5:
+  resources:
+    limits:
+      habana.ai/gaudi: 1
diff --git a/AudioQnA/kubernetes/intel/README.md b/AudioQnA/kubernetes/intel/README.md
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,7 +13,7 @@ on: @@
     jobs:
       build:
-        runs-on: ubuntu-latest
+        runs-on: ubuntu-22.04
         steps:
         - name: Checkout
@@ Expand Down @@