Skip to content

Commit

Permalink
Merge remote-tracking branch 'opea-origin/main' into feature/GenAIExa…
Browse files Browse the repository at this point in the history
…mple_SearchQnA_deploy_on_AMD

# Conflicts:
#	DocSum/docker_compose/amd/gpu/rocm/compose.yaml
  • Loading branch information
Chingis Yundunov authored and Chingis Yundunov committed Jan 15, 2025
2 parents 085f389 + 9812c2f commit 4dc17e2
Show file tree
Hide file tree
Showing 201 changed files with 1,267 additions and 15,389 deletions.
1 change: 0 additions & 1 deletion .github/workflows/_example-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ jobs:
fi
if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
git clone https://github.com/HabanaAI/vllm-fork.git
cd vllm-fork && git checkout 3c39626 && cd ../
fi
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/check-online-doc-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ on:

jobs:
build:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:

- name: Checkout
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/push-infra-issue-creation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
- name: Create Issue
uses: daisy-ycguo/create-issue-action@stable
with:
token: ${{ secrets.Infra_Issue_Token }}
token: ${{ secrets.ACTION_TOKEN }}
owner: opea-project
repo: GenAIInfra
title: |
Expand Down
4 changes: 4 additions & 0 deletions AgentQnA/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,10 @@ docker build -t opea/agent:latest --build-arg https_proxy=$https_proxy --build-a
:::
::::

## Deploy using Helm Chart

Refer to the [AgentQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AgentQnA on Kubernetes.

## Validate services

First look at logs of the agent docker containers:
Expand Down
11 changes: 11 additions & 0 deletions AgentQnA/kubernetes/helm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Deploy AgentQnA on Kubernetes cluster

- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).

## Deploy on Gaudi

```
export HFTOKEN="insert-your-huggingface-token-here"
helm install agentqna oci://ghcr.io/opea-project/charts/agentqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
```
38 changes: 38 additions & 0 deletions AgentQnA/kubernetes/helm/gaudi-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Accelerate inferencing in heaviest components to improve performance
# by overriding their subchart values

tgi:
enabled: true
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.6"
resources:
limits:
habana.ai/gaudi: 4
MAX_INPUT_LENGTH: "4096"
MAX_TOTAL_TOKENS: "8192"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
extraCmdArgs: ["--sharded","true","--num-shard","4"]
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
4 changes: 4 additions & 0 deletions AudioQnA/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instr

Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions on deploying AudioQnA on Xeon.

## Deploy using Helm Chart

Refer to the [AudioQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AudioQnA on Kubernetes.

## Supported Models

### ASR
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
18 changes: 18 additions & 0 deletions AudioQnA/kubernetes/helm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Deploy AudioQnA on Kubernetes cluster

- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).

## Deploy on Xeon

```
export HFTOKEN="insert-your-huggingface-token-here"
helm install audioqna oci://ghcr.io/opea-project/charts/audioqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
```

## Deploy on Gaudi

```
export HFTOKEN="insert-your-huggingface-token-here"
helm install audioqna oci://ghcr.io/opea-project/charts/audioqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
```
5 changes: 5 additions & 0 deletions AudioQnA/kubernetes/helm/cpu-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

tgi:
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
43 changes: 43 additions & 0 deletions AudioQnA/kubernetes/helm/gaudi-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

tgi:
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.6"
resources:
limits:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120

whisper:
resources:
limits:
habana.ai/gaudi: 1

speecht5:
resources:
limits:
habana.ai/gaudi: 1
32 changes: 0 additions & 32 deletions AudioQnA/kubernetes/intel/README.md

This file was deleted.

Loading

0 comments on commit 4dc17e2

Please sign in to comment.