From 69e3a56943ee21d3487b22c49d0a2a1f6872ae9c Mon Sep 17 00:00:00 2001 From: binbin Deng <108676127+plusbang@users.noreply.github.com> Date: Wed, 6 Nov 2024 10:07:00 +0800 Subject: [PATCH] [NPU] Hot fix of load_low_bit (#12344) --- python/llm/src/ipex_llm/transformers/npu_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_model.py b/python/llm/src/ipex_llm/transformers/npu_model.py index 07faed4f1fd..c37aca8b064 100644 --- a/python/llm/src/ipex_llm/transformers/npu_model.py +++ b/python/llm/src/ipex_llm/transformers/npu_model.py @@ -549,7 +549,7 @@ def load_low_bit(cls, pretrained_model_name_or_path: str, *model_args, **kwargs) from ipex_llm.transformers.npu_models.convert_mp import optimize_llm optimize_llm( llm, - max_output_len=max_context_len, + max_context_len=max_context_len, max_prompt_len=max_prompt_len, inter_pp=inter_pp, intra_pp=intra_pp,