diff --git a/neural_compressor/transformers/quantization/utils.py b/neural_compressor/transformers/quantization/utils.py index f09262fb01d..df7785183c2 100644 --- a/neural_compressor/transformers/quantization/utils.py +++ b/neural_compressor/transformers/quantization/utils.py @@ -238,9 +238,16 @@ def _replace_linear( dtype=torch.int32, device=torch.device(device), ) + + # Note: update_g_idx is only applicable for ipex versions >=2.7 model._modules[name].set_weights_bias( module.qweight.data if hasattr(module, "qweight") else weight, None if module.bias is None else module.bias.data, + **( + {"update_g_idx": not empty_weights} + if "update_g_idx" in model._modules[name].set_weights_bias.__code__.co_varnames + else {} + ), ) else: raise Exception("{} device Unsupported weight only quantization!".format(device))