Skip to content

Commit

Permalink
Adjust guardrail values
Browse files Browse the repository at this point in the history
Signed-off-by: Dolpher Du <[email protected]>
  • Loading branch information
yongfengdu committed Nov 8, 2024
1 parent 01bfa66 commit bb27263
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 34 deletions.
33 changes: 17 additions & 16 deletions helm-charts/chatqna/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,20 @@ teirerank:
timeoutSeconds: 1

# Embedding: Second largest bottleneck without rerank
tei:
accelDevice: "gaudi"
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
MAX_WARMUP_SEQUENCE_LENGTH: "512"
image:
repository: ghcr.io/huggingface/tei-gaudi
tag: 1.5.0
resources:
limits:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false
livenessProbe:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1
# By default tei on gaudi is disabled.
# tei:
# accelDevice: "gaudi"
# OMPI_MCA_btl_vader_single_copy_mechanism: "none"
# MAX_WARMUP_SEQUENCE_LENGTH: "512"
# image:
# repository: ghcr.io/huggingface/tei-gaudi
# tag: 1.5.0
# resources:
# limits:
# habana.ai/gaudi: 1
# securityContext:
# readOnlyRootFilesystem: false
# livenessProbe:
# timeoutSeconds: 1
# readinessProbe:
# timeoutSeconds: 1
50 changes: 32 additions & 18 deletions helm-charts/chatqna/guardrails-gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,29 @@ guardrails-usvc:
SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"

# gaudi related config
tei:
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tei-gaudi
tag: 1.5.0
resources:
limits:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false
livenessProbe:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1
# tei running on CPU by default
# tei:
# accelDevice: "gaudi"
# image:
# repository: ghcr.io/huggingface/tei-gaudi
# tag: 1.5.0
# resources:
# limits:
# habana.ai/gaudi: 1
# securityContext:
# readOnlyRootFilesystem: false
# livenessProbe:
# timeoutSeconds: 1
# readinessProbe:
# timeoutSeconds: 1

teirerank:
accelDevice: "gaudi"
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
MAX_WARMUP_SEQUENCE_LENGTH: "512"
image:
repository: opea/tei-gaudi
tag: "latest"
repository: ghcr.io/huggingface/tei-gaudi
tag: "1.5.0"
resources:
limits:
habana.ai/gaudi: 1
Expand All @@ -50,9 +53,15 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
# higher limits are needed with extra input tokens added by rerank
MAX_INPUT_LENGTH: "2048"
MAX_TOTAL_TOKENS: "4096"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
Expand All @@ -79,6 +88,11 @@ tgi-guardrails:
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
Expand Down

0 comments on commit bb27263

Please sign in to comment.