diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index 695c22e985..095a8b613c 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -106,10 +106,10 @@ def parse_args_openvino(parser: "ArgumentParser"): ), ) optional_group.add_argument( - "--variant", + "--weights-variant", type=str, default=None, - help=("Select a variant of the model to export."), + help=("If specified load weights from variant filename."), ) optional_group.add_argument( "--ratio", @@ -475,6 +475,6 @@ def run(self): stateful=not self.args.disable_stateful, convert_tokenizer=not self.args.disable_convert_tokenizer, library_name=library_name, - model_variant=self.args.variant, + weights_variant=self.args.weights_variant, # **input_shapes, ) diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 520a28559f..afc8718dea 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -122,7 +122,7 @@ def main_export( convert_tokenizer: bool = False, library_name: Optional[str] = None, model_loading_kwargs: Optional[Dict[str, Any]] = None, - model_variant: Optional[str] = None, + weights_variant: Optional[str] = None, **kwargs_shapes, ): """ @@ -238,8 +238,8 @@ def main_export( custom_architecture = False patch_16bit = False loading_kwargs = model_loading_kwargs or {} - if model_variant is not None: - loading_kwargs["variant"] = model_variant + if weights_variant is not None: + loading_kwargs["variant"] = weights_variant if library_name == "transformers": config = AutoConfig.from_pretrained( model_name_or_path, @@ -350,7 +350,7 @@ class StoreAttr(object): GPTQQuantizer.post_init_model = post_init_model elif library_name == "diffusers" and is_openvino_version(">=", "2024.6"): - _loading_kwargs = {} if model_variant is None else {"variant": model_variant} + _loading_kwargs = {} if weights_variant is None else {"variant": weights_variant} dtype = deduce_diffusers_dtype( model_name_or_path, revision=revision, diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 3fd26a6e0d..25064dd044 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -609,7 +609,7 @@ def _from_transformers( trust_remote_code=trust_remote_code, ov_config=ov_config, library_name=cls._library_name, - model_variant=variant, + weights_variant=variant, ) return cls._from_pretrained( diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index b411bf07d9..b74c4dc623 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -327,7 +327,7 @@ def _from_transformers( stateful=stateful, model_loading_kwargs=model_loading_kwargs, library_name=cls._library_name, - model_variant=variant, + weughts_variant=variant, ) if config.model_type == "phi3" and config.max_position_embeddings != getattr( diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index c2e245c5e7..2613d26c68 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -590,7 +590,7 @@ def _from_transformers( force_download=force_download, ov_config=ov_config, library_name=cls._library_name, - model_variant=variant, + weights_variant=variant, ) return cls._from_pretrained( @@ -767,7 +767,7 @@ def _reshape_text_encoder( self, model: openvino.runtime.Model, batch_size: int = -1, tokenizer_max_length: int = -1 ): if batch_size != -1: - shapes = {model.inputs[0]: [batch_size, tokenizer_max_length]} + shapes = {input_tensor: [batch_size, tokenizer_max_length] for input_tensor in model.inputs} model.reshape(shapes) return model @@ -824,9 +824,9 @@ def reshape( tokenizer_max_len = -1 else: tokenizer_max_len = ( - self.tokenizer.model_max_length + getattr(self.tokenizer, "model_max_length", -1) if self.tokenizer is not None - else self.tokenizer_2.model_max_length + else getattr(self.tokenizer_2, "model_max_length", -1) ) if self.unet is not None: @@ -848,21 +848,19 @@ def reshape( self.text_encoder.model = self._reshape_text_encoder( self.text_encoder.model, batch_size, - self.tokenizer.model_max_length if "Gemma" not in self.tokenizer.__class__.__name__ else -1, + getattr(self.tokenizer, "model_max_length", -1) + if "Gemma" not in self.tokenizer.__class__.__name__ + else -1, ) if self.text_encoder_2 is not None: self.text_encoder_2.model = self._reshape_text_encoder( - self.text_encoder_2.model, - batch_size, - self.tokenizer_2.model_max_length if "Gemma" not in self.tokenizer.__class__.__name__ else -1, + self.text_encoder_2.model, batch_size, getattr(self.tokenizer_2, "model_max_length", -1) ) if self.text_encoder_3 is not None: self.text_encoder_3.model = self._reshape_text_encoder( - self.text_encoder_3.model, - batch_size, - self.tokenizer_3.model_max_length if "Gemma" not in self.tokenizer.__class__.__name__ else -1, + self.text_encoder_3.model, batch_size, getattr(self.tokenizer_3, "model_max_length", -1) ) self.clear_requests() @@ -1068,9 +1066,7 @@ def forward( model_inputs = {"input_ids": input_ids} if "attention_mask" in self.input_names: - model_inputs["attention_mask"] = ( - attention_mask if attention_mask is not None else torch.ones(input_ids.shape, dtype=torch.long) - ) + model_inputs["attention_mask"] = attention_mask ov_outputs = self.request(model_inputs, share_inputs=True) main_out = ov_outputs[0] diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py index c7cd7227f2..b89c238b14 100644 --- a/optimum/intel/openvino/modeling_visual_language.py +++ b/optimum/intel/openvino/modeling_visual_language.py @@ -630,7 +630,7 @@ def _from_transformers( trust_remote_code=trust_remote_code, ov_config=ov_config, stateful=stateful, - model_variant=variant, + weights_variant=variant, ) config = AutoConfig.from_pretrained(save_dir_path, trust_remote_code=trust_remote_code) return cls._from_pretrained( diff --git a/tests/openvino/test_diffusion.py b/tests/openvino/test_diffusion.py index bff1879340..2a3917a4c2 100644 --- a/tests/openvino/test_diffusion.py +++ b/tests/openvino/test_diffusion.py @@ -78,7 +78,7 @@ class OVPipelineForText2ImageTest(unittest.TestCase): NEGATIVE_PROMPT_SUPPORT_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl", "latent-consistency"] if is_transformers_version(">=", "4.40.0"): SUPPORTED_ARCHITECTURES.extend(["stable-diffusion-3", "flux", "sana"]) - NEGATIVE_PROMPT_SUPPORT_ARCHITECTURES.append(["stable-diffusion-3"]) + NEGATIVE_PROMPT_SUPPORT_ARCHITECTURES.extend(["stable-diffusion-3"]) CALLBACK_SUPPORT_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl", "latent-consistency"] OVMODEL_CLASS = OVPipelineForText2Image @@ -94,13 +94,6 @@ def generate_inputs(self, height=128, width=128, batch_size=1): return inputs - def get_auto_cls(self, model_arch): - if model_arch == "sana": - from diffusers import SanaPipeline - - return SanaPipeline - return self.AUTOMODEL_CLASS - @require_diffusers def test_load_vanilla_model_which_is_not_supported(self): with self.assertRaises(Exception) as context: @@ -145,17 +138,16 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): ov_pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) diffusers_pipeline = auto_cls.from_pretrained(MODEL_NAMES[model_arch]) - with torch.no_grad(): - for output_type in ["latent", "np", "pt"]: - inputs["output_type"] = output_type - if model_arch == "sana": - # resolution binning will lead to resize output to standard resolution and back that can interpolate floating-point deviations - inputs["use_resolution_binning"] = False - atol = 1e-4 + for output_type in ["latent", "np", "pt"]: + inputs["output_type"] = output_type + if model_arch == "sana": + # resolution binning will lead to resize output to standard resolution and back that can interpolate floating-point deviations + inputs["use_resolution_binning"] = False + atol = 1e-4 - ov_output = ov_pipeline(**inputs, generator=get_generator("pt", SEED)).images - diffusers_output = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images - np.testing.assert_allclose(ov_output, diffusers_output, atol=atol, rtol=1e-2) + ov_output = ov_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_output = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + np.testing.assert_allclose(ov_output, diffusers_output, atol=atol, rtol=1e-2) # test on inputs nondivisible on 64 height, width, batch_size = 96, 96, 1 @@ -191,8 +183,7 @@ def __call__(self, *args, **kwargs) -> None: auto_callback = Callback() ov_pipe = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) - auto_cls = self.get_auto_cls(model_arch) - auto_pipe = auto_cls.from_pretrained(MODEL_NAMES[model_arch]) + auto_pipe = DiffusionPipeline.from_pretrained(MODEL_NAMES[model_arch]) # callback_steps=1 to trigger callback every step ov_pipe(**inputs, callback=ov_callback, callback_steps=1)