Skip to content

Commit

Permalink
Add LoRA
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Nov 22, 2024
1 parent ee96c82 commit 4784292
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 21 deletions.
12 changes: 12 additions & 0 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,16 @@ def parse_args_openvino(parser: "ArgumentParser"):
"applying GPTQ takes additional memory and time."
),
)
optional_group.add_argument(
"--lora",
action="store_true",
default=None,
help=(
"Indicates whether to apply LoRA Correction algorithm. When enabled, this algorithm mitigates quantization "
"noise introduced during weight compression by leveraging low-rank adaptation. Please note, that applying "
"LoRA algorithm takes additional memory and time."
),
)
optional_group.add_argument(
"--sensitivity-metric",
type=str,
Expand Down Expand Up @@ -215,6 +225,7 @@ def no_compression_parameter_provided(args):
args.awq,
args.scale_estimation,
args.gptq,
args.lora,
args.sensitivity_metric,
)
)
Expand Down Expand Up @@ -287,6 +298,7 @@ def run(self):
"sensitivity_metric": self.args.sensitivity_metric,
"scale_estimation": self.args.scale_estimation,
"gptq": self.args.gptq,
"lora": self.args.lora,
"weight_format": self.args.weight_format,
}

Expand Down
26 changes: 18 additions & 8 deletions optimum/intel/openvino/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,11 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
- A string, the *model id* of a predefined processor hosted inside a model repo on huggingface.co.
- A path to a *directory* containing files required by the processor, for instance saved
using the [`~AutoProcessor.save_pretrained`] method, e.g., `./my_model_directory/`.
lora (`bool`, *optional*):
If True, apply LoRA Correction algorithm. When enabled, this algorithm mitigates quantization noise
introduced during weight compression by leveraging low-rank adaptation. It calculates low-rank matrices via
singular value decomposition (SVD) on the difference between the original and quantized weights. These
matrices are iteratively refined by solving a system of linear equations to improve accuracy.
"""

def __init__(
Expand All @@ -376,6 +381,7 @@ def __init__(
weight_format: Optional[str] = None,
gptq: bool = None,
processor: Optional[str] = None,
lora: bool = None,
**kwargs,
):
super().__init__(bits=bits, sym=sym, ignored_scope=ignored_scope, num_samples=num_samples)
Expand All @@ -391,6 +397,7 @@ def __init__(
self.weight_format = weight_format
self.gptq = gptq
self.processor = processor
self.lora = lora
self.post_init()

def post_init(self):
Expand Down Expand Up @@ -464,14 +471,17 @@ def post_init(self):
raise ValueError(
f"When applying weight compression with '{self.weight_format}' weight format, the `bits` parameter must be set to 4, but found {self.bits}"
)
if self.quant_method == OVQuantizationMethod.AWQ:
raise ValueError(f"The AWQ algorithm is not supported for '{self.weight_format}' weight format")
if self.scale_estimation:
raise ValueError(
f"The Scale Estimation algorithm is not supported for '{self.weight_format}' weight format"
)
if self.weight_format == "mxfp4" and self.gptq:
raise ValueError("The GPTQ algorithm is not supported for 'mxfp4' weight format")
if self.weight_format == "mxfp4":
if self.quant_method == OVQuantizationMethod.AWQ:
raise ValueError("The AWQ algorithm is not supported for 'mxpf4' weight format")
if self.scale_estimation:
raise ValueError("The Scale Estimation algorithm is not supported for 'mxpf4' weight format")
if self.gptq:
raise ValueError("The GPTQ algorithm is not supported for 'mxfp4' weight format")
if self.lora:
raise ValueError("The LoRA algorithm is not supported for 'mxfp4' weight format")
if self.gptq and self.lora:
raise ValueError("The GPTQ and LoRA algorithms can't be applied simultaneously")


@dataclass
Expand Down
16 changes: 3 additions & 13 deletions optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,20 +453,13 @@ def _quantize_ovbasemodel(
if calibration_dataset is None:
raise ValueError("Calibration dataset is required to run quantization.")

# TODO: remove after update to NNCF 2.14
model_type = nncf.ModelType(quantization_config.model_type)
ignored_scope = quantization_config.get_ignored_scope_instance()
if model_type == nncf.ModelType.TRANSFORMER:
ignored_scope.types += ["GroupNormalization"]
ignored_scope.validate = False

# Actual model quantization
quantized_model = nncf.quantize(
self.model.model,
calibration_dataset,
subset_size=quantization_config.num_samples,
ignored_scope=ignored_scope,
model_type=model_type,
ignored_scope=quantization_config.get_ignored_scope_instance(),
model_type=nncf.ModelType(quantization_config.model_type),
preset=nncf.QuantizationPreset.PERFORMANCE if quantization_config.sym else nncf.QuantizationPreset.MIXED,
fast_bias_correction=quantization_config.fast_bias_correction,
advanced_parameters=nncf.AdvancedQuantizationParameters(
Expand Down Expand Up @@ -951,6 +944,7 @@ def _weight_only_quantization(
subset_size=config.num_samples if config.num_samples else 128,
scale_estimation=config.scale_estimation,
gptq=config.gptq,
lora_correction=config.lora,
)


Expand Down Expand Up @@ -1027,10 +1021,6 @@ def _hybrid_quantization(
ptq_ignored_scope = quantization_config.get_ignored_scope_instance()
ptq_ignored_scope.names += ops_to_compress

# TODO: remove after update to NNCF 2.14
ptq_ignored_scope.types += ["GroupNormalization"]
ptq_ignored_scope.validate = False

subset_size = quantization_config.num_samples if quantization_config.num_samples else 200
quantized_model = nncf.quantize(
model=compressed_model,
Expand Down
7 changes: 7 additions & 0 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ class OVCLIExportTestCase(unittest.TestCase):
"int4 --ratio 1.0 --sym --group-size 16 --gptq --dataset wikitext2 --num-samples 100 ",
{"int8": 4, "int4": 14},
),
(
"text-generation-with-past",
"llama_awq",
"int4 --ratio 1.0 --sym --group-size 16 --lora --dataset wikitext2 --num-samples 1",
{"int8": 4, "int4": 14},
),
]

if is_transformers_version(">=", "4.40.0"):
Expand Down Expand Up @@ -317,6 +323,7 @@ def test_exporters_cli_4bit(self, task: str, model_type: str, option: str, expec
self.assertTrue("--awq" not in option or b"Applying AWQ" in result.stdout)
self.assertTrue("--scale-estimation" not in option or b"Applying Scale Estimation" in result.stdout)
self.assertTrue("--gptq" not in option or b"Applying GPTQ" in result.stdout)
self.assertTrue("--lora" not in option or b"with correction of low-rank adapters" in result.stdout)

def test_exporters_cli_int4_with_local_model_and_default_config(self):
with TemporaryDirectory() as tmpdir:
Expand Down
13 changes: 13 additions & 0 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,18 @@ class OVWeightCompressionTest(unittest.TestCase):
),
{"int4": 12, "int8": 8},
),
(
OVModelForCausalLM,
"llama_awq",
False,
dict(
bits=4,
num_samples=1,
dataset="c4",
lora=True,
),
{"int4": 12, "int8": 8},
),
]

if is_transformers_version(">=", "4.40.0"):
Expand Down Expand Up @@ -685,6 +697,7 @@ def test_ovmodel_4bit_auto_compression_with_config(
quantization_config.scale_estimation or False, wc_rt_info["scale_estimation"].value == "True"
)
self.assertEqual(quantization_config.gptq or False, wc_rt_info["gptq"].value == "True")
self.assertEqual(quantization_config.lora or False, wc_rt_info["lora_correction"].value == "True")

openvino_config = OVConfig.from_pretrained(tmp_dir)
self.assertEqual(openvino_config.quantization_config.bits, 4)
Expand Down

0 comments on commit 4784292

Please sign in to comment.