diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/Multimodal/README.md b/python/llm/example/NPU/HF-Transformers-AutoModels/Multimodal/README.md index 246053c793a..53f47df7946 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/Multimodal/README.md +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/Multimodal/README.md @@ -37,8 +37,8 @@ pip install timm torch==2.1.2 torchvision==0.16.2 pip install BCEmbedding==0.1.5 transformers==4.40.0 # [optional] for Speech_Paraformer-Large -pip install -U funasr -pip install modelscope torch==2.1.2 torchaudio==2.1.2 +pip install funasr==1.1.14 +pip install modelscope==1.20.1 torch==2.1.2 torchaudio==2.1.2 ``` ### 2. Runtime Configurations diff --git a/python/llm/src/ipex_llm/transformers/npu_model.py b/python/llm/src/ipex_llm/transformers/npu_model.py index 51dcb17ccae..850f9009ee5 100644 --- a/python/llm/src/ipex_llm/transformers/npu_model.py +++ b/python/llm/src/ipex_llm/transformers/npu_model.py @@ -162,11 +162,13 @@ def from_pretrained(cls, *args, **kwargs): model = cls.HF_Model.from_pretrained(*args, **kwargs) else: model = cls.HF_Model(*args, **kwargs) - model.config.update({"bigdl_lcmu_enabled": False}) + if hasattr(model, "config"): + model.config.update({"bigdl_lcmu_enabled": False}) logger.info(f"Converting model, it may takes up to several minutes ...") - model.config.update({"optimize_model": optimize_model}) + if hasattr(model, "config"): + model.config.update({"optimize_model": optimize_model}) if mock_device == "cpu": with torch.no_grad(): diff --git a/python/llm/src/ipex_llm/transformers/npu_models/paraformer_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/paraformer_mp.py index 8d72cec50e9..a315f6a635d 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/paraformer_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/paraformer_mp.py @@ -294,17 +294,17 @@ def forward( torch.Tensor: result """ backend_cls = self.backend_cls_prefill - inputs = (x, - masks, - self.layer_norm_0_weight, - self.layer_norm_0_bias, - self.layer_norm_1_weight, - self.layer_norm_1_bias, - self.fsmn_weight, - self.qkv_bias, - self.out_bias, - self.w1_bias, - self.w2_bias, + inputs = (x.to(torch.float16), + masks.to(torch.float16), + self.layer_norm_0_weight.to(torch.float16), + self.layer_norm_0_bias.to(torch.float16), + self.layer_norm_1_weight.to(torch.float16), + self.layer_norm_1_bias.to(torch.float16), + self.fsmn_weight.to(torch.float16), + self.qkv_bias.to(torch.float16), + self.out_bias.to(torch.float16), + self.w1_bias.to(torch.float16), + self.w2_bias.to(torch.float16), ) outputs = run_model( @@ -431,6 +431,8 @@ def forward( args = (xs_pad, masks) self.prefill_input_queue.put(args) xs_pad, masks = self.prefill_result_queue.get() + xs_pad = xs_pad.to(torch.float32) + masks = masks.to(torch.float32) return xs_pad, masks def shutdown(self): @@ -639,7 +641,7 @@ def __init__( ): super().__init__() - self.do_print = True + self.do_print = do_print op_parameters = [] for w in parameters: