Skip to content

Commit

Permalink
Clean up ChatQnA vLLM Gaudi parameters
Browse files Browse the repository at this point in the history
Signed-off-by: Eero Tamminen <[email protected]>
  • Loading branch information
eero-t committed Dec 17, 2024
1 parent 75f8d2a commit 43ad885
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 25 deletions.
18 changes: 7 additions & 11 deletions helm-charts/chatqna/gaudi-vllm-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,15 @@ vllm:
periodSeconds: 5
timeoutSeconds: 1

# TODO: these are taken from GenAIExamples HPU manifest as-is
# vLLM chart needs to adopt / apply relevant ones
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
HF_HOME: "/tmp/.cache/huggingface"
GPU_MEMORY_UTILIZATION: "0.5"
DTYPE: "auto"
TENSOR_PARALLEL_SIZE: "1"
BLOCK_SIZE: "128"
MAX_NUM_SEQS: "256"
MAX_SEQ_LEN_TO_CAPTURE: "2048"

extraCmdArgs: [
"--tensor-parallel-size", "1",
"--block-size", "128",
"--max-num-seqs", "256",
"--max-seq_len-to-capture", "2048"
]


# Reranking: second largest bottleneck when reranking is in use
Expand Down
14 changes: 0 additions & 14 deletions helm-charts/chatqna/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,21 +71,7 @@ tgi:
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
vllm:
enabled: false
# TODO: manifest in GenAIExamples uses "meta-llama/Meta-Llama-3-8B-Instruct" instead?
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
# TODO: these are non-redundant/non-broken options used by Agent component,
# but I think their values should be handled inside vLLM component, with
# deployment applying numbers set in configMap, based on values YAML file
# variables.
extraCmdArgs: [
"--enforce-eager",
"--tensor-parallel-size", "1",
"--dtype", "auto",
"--block-size", "128",
"--max-num-seqs", "256",
"--max-seq_len-to-capture", "2048",
"--gpu-memory-utilization", "0.5"
]

# disable guardrails-usvc by default
# See guardrails-values.yaml for guardrail related options
Expand Down
3 changes: 3 additions & 0 deletions helm-charts/common/vllm/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ data:
{{- if .Values.VLLM_CPU_KVCACHE_SPACE }}
VLLM_CPU_KVCACHE_SPACE: {{ .Values.VLLM_CPU_KVCACHE_SPACE | quote}}
{{- end }}
{{- if .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES }}
PT_HPU_ENABLE_LAZY_COLLECTIVES: {{ .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES | quote }}
{{- end }}
{{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote}}
{{- end }}
5 changes: 5 additions & 0 deletions helm-charts/common/vllm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ tolerations: []
affinity: {}

LLM_MODEL_ID: Intel/neural-chat-7b-v3-3

# Environment variables for vLLM (set in configmap):
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html#environment-variables
OMPI_MCA_btl_vader_single_copy_mechanism: ""
PT_HPU_ENABLE_LAZY_COLLECTIVES: ""
VLLM_CPU_KVCACHE_SPACE: ""

global:
Expand Down

0 comments on commit 43ad885

Please sign in to comment.