Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
update qconfig for xpu
Browse files Browse the repository at this point in the history
Signed-off-by: zhenwei-intel <[email protected]>
  • Loading branch information
zhenwei-intel committed Jul 4, 2024
1 parent 51a50d4 commit 9e438b5
Showing 1 changed file with 4 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1872,7 +1872,10 @@ def load_low_bit(cls, pretrained_model_name_or_path, *model_args, **kwargs):
# weight dtype is higher priority than bits in config.json when both existed.
if quantization_config.weight_dtype is None:
if quantization_config.bits == 4:
quantization_config.weight_dtype = "int4_clip"
if use_xpu:
quantization_config.weight_dtype = "int4_fullrange"
else:
quantization_config.weight_dtype = "int4_clip"
logger.info(
"{} quantization weight_dtype is used due to bits is 4 in config.json.".format(
quantization_config.weight_dtype)
Expand Down Expand Up @@ -1918,7 +1921,6 @@ def load_low_bit(cls, pretrained_model_name_or_path, *model_args, **kwargs):
"fp4_e2m1",
"fp4_e2m1_bnb",
"nf4",
"int4_fullrange",
]:
model = build_woq_model(model, quantization_config)
else:
Expand Down Expand Up @@ -2026,7 +2028,6 @@ def replace_ipex_cpu_woq_linear(model, current_name=[]):
"nf4",
"fp4_e2m1",
"fp4_e2m1_bnb",
"int4_fullrange",
] and not quantization_config.use_ipex:
model = replace_linear(
model,
Expand Down

0 comments on commit 9e438b5

Please sign in to comment.