From 5be5e67b6ca0e179c889cf5a3df6ba1d5686d44e Mon Sep 17 00:00:00 2001 From: changwangss Date: Thu, 6 Jul 2023 19:51:34 -0700 Subject: [PATCH 1/7] add starcode past-kv shape for TSModelForCausal class Signed-off-by: changwangss --- optimum/intel/generation/modeling.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/optimum/intel/generation/modeling.py b/optimum/intel/generation/modeling.py index 1a93375bbe..45246fb660 100644 --- a/optimum/intel/generation/modeling.py +++ b/optimum/intel/generation/modeling.py @@ -20,11 +20,11 @@ from typing import Optional, Tuple, Union import torch -from huggingface_hub import hf_hub_download from transformers import AutoConfig, AutoModelForCausalLM, PretrainedConfig, PreTrainedModel from transformers.modeling_outputs import CausalLMOutputWithPast from transformers.utils import WEIGHTS_NAME +from huggingface_hub import hf_hub_download from optimum.exporters import TasksManager from optimum.modeling_base import OptimizedModel from optimum.utils import NormalizedConfigManager @@ -273,13 +273,17 @@ def forward( num_attention_heads = self.normalized_config.num_attention_heads hidden_size = self.normalized_config.hidden_size d_k = hidden_size // num_attention_heads - - if self.config.model_type != "bloom": + if self.config.model_type == "gpt_bigcode": + new_shape = [input_ids.shape[0], 0, d_k // 2] + empty_tensor = torch.empty(size=new_shape) + if self.model_dtype is not None: + empty_tensor = empty_tensor.to(self.model_dtype) + past_key_values = tuple([empty_tensor] * num_layers) + elif self.config.model_type != "bloom": new_shape = [input_ids.shape[0], num_attention_heads, 0, d_k] empty_tensor = torch.empty(size=new_shape) if self.model_dtype is not None: empty_tensor = empty_tensor.to(self.model_dtype) - past_key_values = tuple(tuple(empty_tensor for _ in range(nb_pkv)) for _ in range(num_layers)) pkv = tuple(empty_tensor for _ in range(nb_pkv)) else: pkv = () @@ -292,7 +296,8 @@ def forward( if self.model_dtype is not None: empty_tensor = empty_tensor.to(self.model_dtype) pkv = pkv + (empty_tensor,) - past_key_values = tuple(tuple(pkv) for _ in range(num_layers)) + if past_key_values is None: + past_key_values = tuple(tuple(pkv) for _ in range(num_layers)) inputs["past_key_values"] = past_key_values outputs = self.model(**inputs) From c58b94d1924458792f244cad556b9a058f7e326f Mon Sep 17 00:00:00 2001 From: changwangss Date: Sun, 9 Jul 2023 23:14:58 -0700 Subject: [PATCH 2/7] improve code style and past-kv shape Signed-off-by: changwangss --- optimum/intel/generation/modeling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/generation/modeling.py b/optimum/intel/generation/modeling.py index 45246fb660..cbe84d13b9 100644 --- a/optimum/intel/generation/modeling.py +++ b/optimum/intel/generation/modeling.py @@ -274,7 +274,7 @@ def forward( hidden_size = self.normalized_config.hidden_size d_k = hidden_size // num_attention_heads if self.config.model_type == "gpt_bigcode": - new_shape = [input_ids.shape[0], 0, d_k // 2] + new_shape = [input_ids.shape[0], 0, d_k * 2] empty_tensor = torch.empty(size=new_shape) if self.model_dtype is not None: empty_tensor = empty_tensor.to(self.model_dtype) From 694faaf28bd22e6ce232183edb88e1a4b1e5c068 Mon Sep 17 00:00:00 2001 From: changwangss Date: Sun, 9 Jul 2023 23:20:06 -0700 Subject: [PATCH 3/7] fix style Signed-off-by: changwangss --- optimum/intel/generation/modeling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/generation/modeling.py b/optimum/intel/generation/modeling.py index cbe84d13b9..6467455c62 100644 --- a/optimum/intel/generation/modeling.py +++ b/optimum/intel/generation/modeling.py @@ -20,11 +20,11 @@ from typing import Optional, Tuple, Union import torch +from huggingface_hub import hf_hub_download from transformers import AutoConfig, AutoModelForCausalLM, PretrainedConfig, PreTrainedModel from transformers.modeling_outputs import CausalLMOutputWithPast from transformers.utils import WEIGHTS_NAME -from huggingface_hub import hf_hub_download from optimum.exporters import TasksManager from optimum.modeling_base import OptimizedModel from optimum.utils import NormalizedConfigManager From ccb2877d0ed599f01e946a6f0fd4136069f7a4cb Mon Sep 17 00:00:00 2001 From: changwangss Date: Mon, 25 Sep 2023 02:39:44 -0700 Subject: [PATCH 4/7] support gpt_bigcode Signed-off-by: changwangss --- optimum/intel/generation/modeling.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/optimum/intel/generation/modeling.py b/optimum/intel/generation/modeling.py index 6467455c62..7748159fbc 100644 --- a/optimum/intel/generation/modeling.py +++ b/optimum/intel/generation/modeling.py @@ -49,10 +49,10 @@ def prepare_jit_inputs(model: PreTrainedModel, task: str, use_cache: bool = Fals onnx_config_class = TasksManager.get_exporter_config_constructor(model=model, exporter="onnx", task=task) onnx_config = onnx_config_class(model.config) if task == "text-generation" and use_cache: - onnx_config = onnx_config_class(model.config, use_past=True) + onnx_config = onnx_config_class(model.config, use_past=True, use_past_in_inputs=True) dummy_inputs = onnx_config.generate_dummy_inputs(framework="pt") model_inputs = {key: dummy_inputs[key] for key in signature.parameters if dummy_inputs.get(key, None) is not None} - if task == "text-generation" and use_cache: + if task == "text-generation" and use_cache and model.config.model_type != "gpt_bigcode": # WA jit.trace issue of model like llama in https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L464, or else, generation output will be incorrect pkv = [] for i in range(len(model_inputs["past_key_values"])): @@ -70,6 +70,8 @@ def prepare_jit_inputs(model: PreTrainedModel, task: str, use_cache: bool = Fals def jit_trace(model: PreTrainedModel, task: str, use_cache: bool = False): model_inputs = prepare_jit_inputs(model, task, use_cache) + # check if the model_inputs is correct. + model(**model_inputs) torch._C._jit_set_texpr_fuser_enabled(False) if "past_key_values" in model_inputs.keys(): model.config.return_dict = False From d3e38908d3d8d1f452206d5bb9bd6ed5f39619bb Mon Sep 17 00:00:00 2001 From: changwangss Date: Mon, 25 Sep 2023 18:35:52 -0700 Subject: [PATCH 5/7] add gptbigcode to ipex test Signed-off-by: changwangss --- tests/ipex/test_inference.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ipex/test_inference.py b/tests/ipex/test_inference.py index d5cf571150..f1aa44468f 100644 --- a/tests/ipex/test_inference.py +++ b/tests/ipex/test_inference.py @@ -40,6 +40,7 @@ "gpt2": "hf-internal-testing/tiny-random-gpt2", "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel", "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", + "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel", } _CLASSIFICATION_TASK_TO_AUTOMODELS = { @@ -59,6 +60,7 @@ class IPEXIntegrationTest(unittest.TestCase): "gptj", "gpt2", "gpt_neo", + "gpt_bigcode" ) QA_SUPPORTED_ARCHITECTURES = ( From 7507dd5bc4876dc1d91394ff3945e3e01ffa3195 Mon Sep 17 00:00:00 2001 From: changwangss Date: Mon, 25 Sep 2023 19:00:22 -0700 Subject: [PATCH 6/7] fix style Signed-off-by: changwangss --- tests/ipex/test_inference.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/ipex/test_inference.py b/tests/ipex/test_inference.py index f1aa44468f..d21267fbdb 100644 --- a/tests/ipex/test_inference.py +++ b/tests/ipex/test_inference.py @@ -15,7 +15,6 @@ import unittest import torch -from parameterized import parameterized # TODO : add more tasks from transformers import ( @@ -29,6 +28,7 @@ from optimum.intel import inference_mode as ipex_inference_mode from optimum.intel.generation.modeling import TSModelForCausalLM +from parameterized import parameterized MODEL_NAMES = { @@ -56,12 +56,7 @@ class IPEXIntegrationTest(unittest.TestCase): "roberta", ) - TEXT_GENERATION_SUPPORTED_ARCHITECTURES = ( - "gptj", - "gpt2", - "gpt_neo", - "gpt_bigcode" - ) + TEXT_GENERATION_SUPPORTED_ARCHITECTURES = ("gptj", "gpt2", "gpt_neo", "gpt_bigcode") QA_SUPPORTED_ARCHITECTURES = ( "bert", From f62bb9d3b01b1eae9115eb99971d831c83f49ba2 Mon Sep 17 00:00:00 2001 From: changwangss Date: Mon, 25 Sep 2023 19:09:56 -0700 Subject: [PATCH 7/7] fix style Signed-off-by: changwangss --- tests/ipex/test_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ipex/test_inference.py b/tests/ipex/test_inference.py index d21267fbdb..6889e76af2 100644 --- a/tests/ipex/test_inference.py +++ b/tests/ipex/test_inference.py @@ -15,6 +15,7 @@ import unittest import torch +from parameterized import parameterized # TODO : add more tasks from transformers import ( @@ -28,7 +29,6 @@ from optimum.intel import inference_mode as ipex_inference_mode from optimum.intel.generation.modeling import TSModelForCausalLM -from parameterized import parameterized MODEL_NAMES = {