Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sd3 pipeline support #916

Merged
merged 24 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,10 @@ def run(self):
from optimum.intel import OVStableDiffusionPipeline

model_cls = OVStableDiffusionPipeline
elif class_name == "StableDiffusion3Pipeline":
from optimum.intel import OVStableDiffusion3Pipeline

model_cls = OVStableDiffusion3Pipeline
else:
raise NotImplementedError(f"Quantization in hybrid mode isn't supported for class {class_name}.")

Expand Down
2 changes: 1 addition & 1 deletion optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def maybe_convert_tokenizers(library_name: str, output: Path, model=None, prepro
f"models won't be generated. Exception: {exception}"
)
elif model:
for tokenizer_name in ("tokenizer", "tokenizer_2"):
for tokenizer_name in ("tokenizer", "tokenizer_2", "tokenizer_3"):
tokenizer = getattr(model, tokenizer_name, None)
if tokenizer:
export_tokenizer(tokenizer, output / tokenizer_name, task=task)
Expand Down
265 changes: 247 additions & 18 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import functools
import gc
import logging
Expand All @@ -31,7 +32,12 @@
from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed
from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx
from optimum.exporters.onnx.convert import export_tensorflow as export_tensorflow_onnx
from optimum.exporters.utils import _get_submodels_and_export_configs as _default_get_submodels_and_export_configs
from optimum.exporters.utils import (
_get_submodels_and_export_configs as _default_get_submodels_and_export_configs,
)
from optimum.exporters.utils import (
get_diffusion_models_for_export,
)
from optimum.intel.utils.import_utils import (
_nncf_version,
_open_clip_version,
Expand Down Expand Up @@ -618,23 +624,27 @@ def export_from_model(
model, library_name, task, preprocessors, custom_export_configs, fn_get_submodels
)

logging.disable(logging.INFO)
export_config, models_and_export_configs, stateful_submodels = _get_submodels_and_export_configs(
model=model,
task=task,
monolith=False,
custom_export_configs=custom_export_configs if custom_export_configs is not None else {},
custom_architecture=custom_architecture,
fn_get_submodels=fn_get_submodels,
preprocessors=preprocessors,
library_name=library_name,
model_kwargs=model_kwargs,
_variant="default",
legacy=False,
exporter="openvino",
stateful=stateful,
)
logging.disable(logging.NOTSET)
if library_name == "diffusers":
export_config, models_and_export_configs = get_diffusion_models_for_export_ext(model, exporter="openvino")
stateful_submodels = False
else:
logging.disable(logging.INFO)
export_config, models_and_export_configs, stateful_submodels = _get_submodels_and_export_configs(
model=model,
task=task,
monolith=False,
custom_export_configs=custom_export_configs if custom_export_configs is not None else {},
custom_architecture=custom_architecture,
fn_get_submodels=fn_get_submodels,
preprocessors=preprocessors,
library_name=library_name,
model_kwargs=model_kwargs,
_variant="default",
legacy=False,
exporter="openvino",
stateful=stateful,
)
logging.disable(logging.NOTSET)

if library_name == "open_clip":
if hasattr(model.config, "save_pretrained"):
Expand Down Expand Up @@ -700,6 +710,10 @@ def export_from_model(
if tokenizer_2 is not None:
tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))

tokenizer_3 = getattr(model, "tokenizer_3", None)
if tokenizer_3 is not None:
tokenizer_3.save_pretrained(output.joinpath("tokenizer_3"))

model.save_config(output)

export_models(
Expand Down Expand Up @@ -888,3 +902,218 @@ def _get_submodels_and_export_configs(
)
stateful_per_model = [stateful] * len(models_for_export)
return export_config, models_for_export, stateful_per_model


def get_diffusion_models_for_export_ext(
pipeline: "DiffusionPipeline", int_dtype: str = "int64", float_dtype: str = "fp32", exporter: str = "openvino"
):
try:
from diffusers import (
StableDiffusion3Img2ImgPipeline,
StableDiffusion3InpaintPipeline,
StableDiffusion3Pipeline,
)

is_sd3 = isinstance(
pipeline, (StableDiffusion3Pipeline, StableDiffusion3InpaintPipeline, StableDiffusion3Img2ImgPipeline)
)
except ImportError:
is_sd3 = False

try:
from diffusers import FluxPipeline

is_flux = isinstance(pipeline, FluxPipeline)
except ImportError:
is_flux = False

if not is_sd3 and not is_flux:
return None, get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter)
if is_sd3:
models_for_export = get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype)
else:
models_for_export = get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype)

return None, models_for_export


def get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype):
models_for_export = {}

# Text encoder
text_encoder = getattr(pipeline, "text_encoder", None)
if text_encoder is not None:
text_encoder.config.output_hidden_states = True
text_encoder.text_model.config.output_hidden_states = True
text_encoder_config_constructor = TasksManager.get_exporter_config_constructor(
model=text_encoder,
exporter=exporter,
library_name="diffusers",
task="feature-extraction",
model_type="clip-text-with-projection",
)
text_encoder_export_config = text_encoder_config_constructor(
pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["text_encoder"] = (text_encoder, text_encoder_export_config)

transformer = pipeline.transformer
transformer.config.text_encoder_projection_dim = transformer.config.joint_attention_dim
transformer.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False)
transformer.config.time_cond_proj_dim = None
export_config_constructor = TasksManager.get_exporter_config_constructor(
model=transformer,
exporter=exporter,
library_name="diffusers",
task="semantic-segmentation",
model_type="sd3-transformer",
)
transformer_export_config = export_config_constructor(
pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["transformer"] = (transformer, transformer_export_config)

# VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565
vae_encoder = copy.deepcopy(pipeline.vae)
vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters}
vae_config_constructor = TasksManager.get_exporter_config_constructor(
model=vae_encoder,
exporter=exporter,
library_name="diffusers",
task="semantic-segmentation",
model_type="vae-encoder",
)
vae_encoder_export_config = vae_config_constructor(
vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config)

# VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600
vae_decoder = copy.deepcopy(pipeline.vae)
vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample)
vae_config_constructor = TasksManager.get_exporter_config_constructor(
model=vae_decoder,
exporter=exporter,
library_name="diffusers",
task="semantic-segmentation",
model_type="vae-decoder",
)
vae_decoder_export_config = vae_config_constructor(
vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config)

text_encoder_2 = getattr(pipeline, "text_encoder_2", None)
if text_encoder_2 is not None:
text_encoder_2.config.output_hidden_states = True
text_encoder_2.text_model.config.output_hidden_states = True
export_config_constructor = TasksManager.get_exporter_config_constructor(
model=text_encoder_2,
exporter=exporter,
library_name="diffusers",
task="feature-extraction",
model_type="clip-text-with-projection",
)
export_config = export_config_constructor(text_encoder_2.config, int_dtype=int_dtype, float_dtype=float_dtype)
models_for_export["text_encoder_2"] = (text_encoder_2, export_config)

text_encoder_3 = getattr(pipeline, "text_encoder_3", None)
if text_encoder_3 is not None:
export_config_constructor = TasksManager.get_exporter_config_constructor(
model=text_encoder_3,
exporter=exporter,
library_name="diffusers",
task="feature-extraction",
model_type="t5-encoder-model",
)
export_config = export_config_constructor(
text_encoder_3.config,
int_dtype=int_dtype,
float_dtype=float_dtype,
)
models_for_export["text_encoder_3"] = (text_encoder_3, export_config)

return models_for_export


def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
models_for_export = {}

# Text encoder
text_encoder = getattr(pipeline, "text_encoder", None)
if text_encoder is not None:
text_encoder_config_constructor = TasksManager.get_exporter_config_constructor(
model=text_encoder,
exporter=exporter,
library_name="diffusers",
task="feature-extraction",
model_type="clip-text-model",
)
text_encoder_export_config = text_encoder_config_constructor(
pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["text_encoder"] = (text_encoder, text_encoder_export_config)

transformer = pipeline.transformer
transformer.config.text_encoder_projection_dim = transformer.config.joint_attention_dim
transformer.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False)
transformer.config.time_cond_proj_dim = None
export_config_constructor = TasksManager.get_exporter_config_constructor(
model=transformer,
exporter=exporter,
library_name="diffusers",
task="semantic-segmentation",
model_type="flux-transformer",
)
transformer_export_config = export_config_constructor(
pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["transformer"] = (transformer, transformer_export_config)

# VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565
vae_encoder = copy.deepcopy(pipeline.vae)
vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters}
vae_config_constructor = TasksManager.get_exporter_config_constructor(
model=vae_encoder,
exporter=exporter,
library_name="diffusers",
task="semantic-segmentation",
model_type="vae-encoder",
)
vae_encoder_export_config = vae_config_constructor(
vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config)

# VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600
vae_decoder = copy.deepcopy(pipeline.vae)
vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample)
vae_config_constructor = TasksManager.get_exporter_config_constructor(
model=vae_decoder,
exporter=exporter,
library_name="diffusers",
task="semantic-segmentation",
model_type="vae-decoder",
)
vae_decoder_export_config = vae_config_constructor(
vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config)

text_encoder_2 = getattr(pipeline, "text_encoder_2", None)
if text_encoder_2 is not None:
export_config_constructor = TasksManager.get_exporter_config_constructor(
model=text_encoder_2,
exporter=exporter,
library_name="diffusers",
task="feature-extraction",
model_type="t5-encoder-model",
)
export_config = export_config_constructor(
text_encoder_2.config,
int_dtype=int_dtype,
float_dtype=float_dtype,
)
models_for_export["text_encoder_2"] = (text_encoder_2, export_config)

return models_for_export
Loading
Loading