diff --git a/optimum/intel/neural_compressor/modeling_base.py b/optimum/intel/neural_compressor/modeling_base.py index b014f8a476..b74bd305bc 100644 --- a/optimum/intel/neural_compressor/modeling_base.py +++ b/optimum/intel/neural_compressor/modeling_base.py @@ -32,6 +32,7 @@ AutoModelForSequenceClassification, AutoModelForTokenClassification, AutoModelForVision2Seq, + GenerationConfig, GenerationMixin, PretrainedConfig, ) @@ -83,6 +84,8 @@ def __init__( self._device = getattr(self.model, "device", None) or torch.device( "cuda:0" if torch.cuda.is_available() else "cpu" ) + self.generation_config = GenerationConfig.from_model_config(config) + # Registers the INCModelForXXX classes into the transformers AutoModel classes to avoid warnings when creating # a pipeline https://github.com/huggingface/transformers/blob/cad61b68396a1a387287a8e2e2fef78a25b79383/src/transformers/pipelines/base.py#L863 AutoConfig.register(self.base_model_prefix, AutoConfig) diff --git a/setup.py b/setup.py index 5c6cf76404..d6e066b22c 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ INSTALL_REQUIRE = [ "torch>=1.11", - "transformers>=4.36.0,<4.39.0", + "transformers>=4.36.0,<4.40.0", "optimum @ git+https://github.com/huggingface/optimum.git#egg=optimum", "datasets>=1.4.0", "sentencepiece", @@ -61,7 +61,7 @@ "openvino": ["openvino>=2023.3", "nncf>=2.8.1"], "openvino-tokenizers": ["openvino-tokenizers[transformers]"], "nncf": ["nncf>=2.8.1"], - "ipex": ["intel-extension-for-pytorch"], + "ipex": ["intel-extension-for-pytorch", "transformers>=4.36.0,<4.39.0"], "diffusers": ["diffusers"], "quality": QUALITY_REQUIRE, "tests": TESTS_REQUIRE, diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 22049474ab..d1da08f58e 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -501,7 +501,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "qwen", "qwen2", "stablelm", - # "starcoder2", # TODO: enable with next transformers release + "starcoder2", "phi", ) GENERATION_LENGTH = 100 @@ -525,10 +525,8 @@ def test_compare_to_transformers(self, model_arch): model_kwargs = {} if model_arch in self.REMOTE_CODE_MODELS: - model_kwargs = { - "config": AutoConfig.from_pretrained(model_id, trust_remote_code=True), - "trust_remote_code": True, - } + model_kwargs = {"trust_remote_code": True} + ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG, **model_kwargs) self.assertIsInstance(ov_model.config, PretrainedConfig) self.assertTrue(ov_model.use_cache) @@ -572,6 +570,10 @@ def test_pipeline(self, model_arch): "trust_remote_code": True, } tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS) + + if model_arch == "qwen": + tokenizer._convert_tokens_to_ids = lambda x: 0 + model = OVModelForCausalLM.from_pretrained( model_id, export=True, use_cache=False, compile=False, **model_kwargs )