Skip to content

Commit

Permalink
Add hybrid quantization for Flux model (#1060)
Browse files Browse the repository at this point in the history
* Add hybrid quantization for Flux model

* Update optimum/intel/openvino/quantization.py

Co-authored-by: Nikita Savelyev <[email protected]>

---------

Co-authored-by: Nikita Savelyev <[email protected]>
  • Loading branch information
l-bat and nikita-savelyevv authored Dec 11, 2024
1 parent f6b73d0 commit d7b1e1d
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 1 deletion.
4 changes: 4 additions & 0 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,10 @@ def run(self):
from optimum.intel import OVStableDiffusion3Pipeline

model_cls = OVStableDiffusion3Pipeline
elif class_name == "FluxPipeline":
from optimum.intel import OVFluxPipeline

model_cls = OVFluxPipeline
else:
raise NotImplementedError(f"Quantization in hybrid mode isn't supported for class {class_name}.")

Expand Down
4 changes: 3 additions & 1 deletion optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,9 @@ def _hybrid_quantization(

wc_config = copy.deepcopy(quantization_config)
wc_config.ignored_scope = wc_config.ignored_scope or {}
wc_config.ignored_scope["types"] = wc_config.ignored_scope.get("types", []) + ["Convolution"]

wc_ignored_types = ["Convolution"] if any(op.get_type_name() == "Convolution" for op in model.get_ops()) else []
wc_config.ignored_scope["types"] = wc_config.ignored_scope.get("types", []) + wc_ignored_types
compressed_model = _weight_only_quantization(model, wc_config, **kwargs)

ptq_ignored_scope = quantization_config.get_ignored_scope_instance()
Expand Down
1 change: 1 addition & 0 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class OVCLIExportTestCase(unittest.TestCase):

if is_transformers_version(">=", "4.45"):
SUPPORTED_SD_HYBRID_ARCHITECTURES.append(("stable-diffusion-3", 9, 65))
SUPPORTED_SD_HYBRID_ARCHITECTURES.append(("flux", 7, 56))

TEST_4BIT_CONFIGURATIONS = [
("text-generation-with-past", "opt125m", "int4 --sym --group-size 128", {"int8": 4, "int4": 72}),
Expand Down
2 changes: 2 additions & 0 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

from optimum.intel import (
OVConfig,
OVFluxPipeline,
OVLatentConsistencyModelPipeline,
OVModelForAudioClassification,
OVModelForCausalLM,
Expand Down Expand Up @@ -491,6 +492,7 @@ class OVWeightCompressionTest(unittest.TestCase):
SUPPORTED_ARCHITECTURES_WITH_HYBRID_QUANTIZATION.extend(
[
(OVStableDiffusion3Pipeline, "stable-diffusion-3", 9, 65),
(OVFluxPipeline, "flux", 7, 56),
]
)

Expand Down

0 comments on commit d7b1e1d

Please sign in to comment.