From daaac33b525e6af689d3f7505998fa7575ea3d7c Mon Sep 17 00:00:00 2001 From: Aidova Date: Thu, 14 Sep 2023 13:00:40 +0400 Subject: [PATCH] move VAE decoder to fp32 execution precision on GPU --- optimum/intel/openvino/modeling_diffusion.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 1085c9e81c..816596def9 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -92,7 +92,8 @@ def __init__( self._model_save_dir = ( Path(model_save_dir.name) if isinstance(model_save_dir, TemporaryDirectory) else model_save_dir ) - self.vae_decoder = OVModelVaeDecoder(vae_decoder, self) + vae_decoder_fp32 = kwargs.get("vae_decoder_fp32_on_gpu", False) or scheduler.config.prediction_type != "epsilon" + self.vae_decoder = OVModelVaeDecoder(vae_decoder, self, fp32_gpu=vae_decoder_fp32) self.unet = OVModelUnet(unet, self) self.text_encoder = OVModelTextEncoder(text_encoder, self) if text_encoder is not None else None self.text_encoder_2 = ( @@ -593,8 +594,10 @@ def __call__( class OVModelVaeDecoder(OVModelPart): def __init__( - self, model: openvino.runtime.Model, parent_model: OVBaseModel, ov_config: Optional[Dict[str, str]] = None + self, model: openvino.runtime.Model, parent_model: OVBaseModel, ov_config: Optional[Dict[str, str]] = None, + fp32_gpu=False ): + self.fp32_gpu = fp32_gpu super().__init__(model, parent_model, ov_config, "vae_decoder") def __call__(self, latent_sample: np.ndarray): @@ -606,6 +609,11 @@ def __call__(self, latent_sample: np.ndarray): outputs = self.request(inputs, shared_memory=True) return list(outputs.values()) + def _compile(self): + if "GPU" in self.device and self.fp32_gpu: + self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"}) + super()._compile() + class OVModelVaeEncoder(OVModelPart): def __init__( @@ -691,7 +699,8 @@ class OVStableDiffusionXLPipelineBase(OVStableDiffusionPipelineBase): export_feature = "stable-diffusion-xl" def __init__(self, *args, add_watermarker: Optional[bool] = None, **kwargs): - super().__init__(*args, **kwargs) + kwargs["vae_decoder_fp32_on_gpu"] = True + super().__init__(*args, **kwargs, ) add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available()