Clean up ChatQnA vLLM Gaudi parameters

Signed-off-by: Eero Tamminen <[email protected]>
opea-project · Dec 17, 2024 · 43ad885 · 43ad885
1 parent 75f8d2a
commit 43ad885
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 25 deletions.
diff --git a/helm-charts/chatqna/gaudi-vllm-values.yaml b/helm-charts/chatqna/gaudi-vllm-values.yaml
@@ -30,19 +30,15 @@ vllm:
     periodSeconds: 5
     timeoutSeconds: 1
 
-  # TODO: these are taken from GenAIExamples HPU manifest as-is
-  # vLLM chart needs to adopt / apply relevant ones
-  HABANA_LOGS: "/tmp/habana_logs"
-  NUMBA_CACHE_DIR: "/tmp"
   PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
   OMPI_MCA_btl_vader_single_copy_mechanism: "none"
-  HF_HOME: "/tmp/.cache/huggingface"
-  GPU_MEMORY_UTILIZATION: "0.5"
-  DTYPE: "auto"
-  TENSOR_PARALLEL_SIZE: "1"
-  BLOCK_SIZE: "128"
-  MAX_NUM_SEQS: "256"
-  MAX_SEQ_LEN_TO_CAPTURE: "2048"
+
+  extraCmdArgs: [
+    "--tensor-parallel-size", "1",
+    "--block-size", "128",
+    "--max-num-seqs", "256",
+    "--max-seq_len-to-capture", "2048"
+  ]
 
 
 # Reranking: second largest bottleneck when reranking is in use

diff --git a/helm-charts/chatqna/values.yaml b/helm-charts/chatqna/values.yaml
@@ -71,21 +71,7 @@ tgi:
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
 vllm:
   enabled: false
-  # TODO: manifest in GenAIExamples uses "meta-llama/Meta-Llama-3-8B-Instruct" instead?
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  # TODO: these are non-redundant/non-broken options used by Agent component,
-  # but I think their values should be handled inside vLLM component, with
-  # deployment applying numbers set in configMap, based on values YAML file
-  # variables.
-  extraCmdArgs: [
-    "--enforce-eager",
-    "--tensor-parallel-size", "1",
-    "--dtype", "auto",
-    "--block-size", "128",
-    "--max-num-seqs", "256",
-    "--max-seq_len-to-capture", "2048",
-    "--gpu-memory-utilization", "0.5"
-  ]
 
 # disable guardrails-usvc by default
 # See guardrails-values.yaml for guardrail related options

diff --git a/helm-charts/common/vllm/templates/configmap.yaml b/helm-charts/common/vllm/templates/configmap.yaml
@@ -25,6 +25,9 @@ data:
   {{- if .Values.VLLM_CPU_KVCACHE_SPACE }}
   VLLM_CPU_KVCACHE_SPACE: {{ .Values.VLLM_CPU_KVCACHE_SPACE | quote}}
   {{- end }}
+  {{- if .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES }}
+  PT_HPU_ENABLE_LAZY_COLLECTIVES: {{ .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES | quote }}
+  {{- end }}
   {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
   OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote}}
   {{- end }}
diff --git a/helm-charts/common/vllm/values.yaml b/helm-charts/common/vllm/values.yaml
@@ -103,6 +103,11 @@ tolerations: []
 affinity: {}
 
 LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+
+# Environment variables for vLLM (set in configmap):
+# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html#environment-variables
+OMPI_MCA_btl_vader_single_copy_mechanism: ""
+PT_HPU_ENABLE_LAZY_COLLECTIVES: ""
 VLLM_CPU_KVCACHE_SPACE: ""
 
 global: