diff --git a/helm-charts/chatqna/gaudi-values.yaml b/helm-charts/chatqna/gaudi-values.yaml index 6867fbef4..203e1afae 100644 --- a/helm-charts/chatqna/gaudi-values.yaml +++ b/helm-charts/chatqna/gaudi-values.yaml @@ -13,6 +13,7 @@ tgi: resources: limits: habana.ai/gaudi: 1 + # higher limits are needed with extra input tokens added by rerank MAX_INPUT_LENGTH: "2048" MAX_TOTAL_TOKENS: "4096" CUDA_GRAPHS: ""