From bb2726303e3dd6ede460b08dcef8fa441208a01f Mon Sep 17 00:00:00 2001 From: Dolpher Du Date: Fri, 8 Nov 2024 13:41:40 +0000 Subject: [PATCH] Adjust guardrail values Signed-off-by: Dolpher Du --- helm-charts/chatqna/gaudi-values.yaml | 33 ++++++------ .../chatqna/guardrails-gaudi-values.yaml | 50 ++++++++++++------- 2 files changed, 49 insertions(+), 34 deletions(-) diff --git a/helm-charts/chatqna/gaudi-values.yaml b/helm-charts/chatqna/gaudi-values.yaml index 203e1afae..a3086a243 100644 --- a/helm-charts/chatqna/gaudi-values.yaml +++ b/helm-charts/chatqna/gaudi-values.yaml @@ -57,19 +57,20 @@ teirerank: timeoutSeconds: 1 # Embedding: Second largest bottleneck without rerank -tei: - accelDevice: "gaudi" - OMPI_MCA_btl_vader_single_copy_mechanism: "none" - MAX_WARMUP_SEQUENCE_LENGTH: "512" - image: - repository: ghcr.io/huggingface/tei-gaudi - tag: 1.5.0 - resources: - limits: - habana.ai/gaudi: 1 - securityContext: - readOnlyRootFilesystem: false - livenessProbe: - timeoutSeconds: 1 - readinessProbe: - timeoutSeconds: 1 +# By default tei on gaudi is disabled. +# tei: +# accelDevice: "gaudi" +# OMPI_MCA_btl_vader_single_copy_mechanism: "none" +# MAX_WARMUP_SEQUENCE_LENGTH: "512" +# image: +# repository: ghcr.io/huggingface/tei-gaudi +# tag: 1.5.0 +# resources: +# limits: +# habana.ai/gaudi: 1 +# securityContext: +# readOnlyRootFilesystem: false +# livenessProbe: +# timeoutSeconds: 1 +# readinessProbe: +# timeoutSeconds: 1 diff --git a/helm-charts/chatqna/guardrails-gaudi-values.yaml b/helm-charts/chatqna/guardrails-gaudi-values.yaml index 9e7cf091c..9ee715cdd 100644 --- a/helm-charts/chatqna/guardrails-gaudi-values.yaml +++ b/helm-charts/chatqna/guardrails-gaudi-values.yaml @@ -12,26 +12,29 @@ guardrails-usvc: SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" # gaudi related config -tei: - accelDevice: "gaudi" - image: - repository: ghcr.io/huggingface/tei-gaudi - tag: 1.5.0 - resources: - limits: - habana.ai/gaudi: 1 - securityContext: - readOnlyRootFilesystem: false - livenessProbe: - timeoutSeconds: 1 - readinessProbe: - timeoutSeconds: 1 +# tei running on CPU by default +# tei: +# accelDevice: "gaudi" +# image: +# repository: ghcr.io/huggingface/tei-gaudi +# tag: 1.5.0 +# resources: +# limits: +# habana.ai/gaudi: 1 +# securityContext: +# readOnlyRootFilesystem: false +# livenessProbe: +# timeoutSeconds: 1 +# readinessProbe: +# timeoutSeconds: 1 teirerank: accelDevice: "gaudi" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + MAX_WARMUP_SEQUENCE_LENGTH: "512" image: - repository: opea/tei-gaudi - tag: "latest" + repository: ghcr.io/huggingface/tei-gaudi + tag: "1.5.0" resources: limits: habana.ai/gaudi: 1 @@ -50,9 +53,15 @@ tgi: resources: limits: habana.ai/gaudi: 1 - MAX_INPUT_LENGTH: "1024" - MAX_TOTAL_TOKENS: "2048" + # higher limits are needed with extra input tokens added by rerank + MAX_INPUT_LENGTH: "2048" + MAX_TOTAL_TOKENS: "4096" CUDA_GRAPHS: "" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + ENABLE_HPU_GRAPH: "true" + LIMIT_HPU_GRAPH: "true" + USE_FLASH_ATTENTION: "true" + FLASH_ATTENTION_RECOMPUTE: "true" livenessProbe: initialDelaySeconds: 5 periodSeconds: 5 @@ -79,6 +88,11 @@ tgi-guardrails: MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" CUDA_GRAPHS: "" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + ENABLE_HPU_GRAPH: "true" + LIMIT_HPU_GRAPH: "true" + USE_FLASH_ATTENTION: "true" + FLASH_ATTENTION_RECOMPUTE: "true" livenessProbe: initialDelaySeconds: 5 periodSeconds: 5