Skip to content

Commit

Permalink
Update code
Browse files Browse the repository at this point in the history
  • Loading branch information
PenghuiCheng committed Oct 19, 2023
1 parent 2f9916b commit f5363e7
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 17 deletions.
2 changes: 2 additions & 0 deletions examples/neural_compressor/language-modeling/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ torch >= 1.9
datasets >= 1.8.0
sentencepiece != 0.1.92
protobuf
intel-extension-for-transformers
peft
14 changes: 7 additions & 7 deletions examples/neural_compressor/language-modeling/run_clm.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import torch
import transformers
from datasets import load_dataset
from intel_extension_for_transformers.transformers.utils.quantization_config import WeightOnlyQuantConfig
from neural_compressor import (
DistillationConfig,
PostTrainingQuantConfig,
Expand All @@ -56,7 +57,7 @@
from transformers.utils import check_min_version
from transformers.utils.versions import require_version

from optimum.intel.neural_compressor import INCModelForCausalLM, INCQuantizer, INCTrainer, WeightOnlyQuantConfig
from optimum.intel.neural_compressor import INCModelForCausalLM, INCQuantizer, INCTrainer


os.environ["CUDA_VISIBLE_DEVICES"] = ""
Expand Down Expand Up @@ -626,7 +627,7 @@ def compute_metrics(eval_preds):
recipes = {}
if optim_args.quantization_approach == "weight_only":
if optim_args.apply_pruning or optim_args.apply_distillation:
raise ValueError("Can't mixture weight only quantization and pruning, distillation.")
raise ValueError("Weight only quantization and pruning or distillation cannot be combined.")
quantization_config = WeightOnlyQuantConfig(
weight_dtype=optim_args.weight_dtype,
group_size=optim_args.group_size,
Expand Down Expand Up @@ -716,12 +717,11 @@ def compute_metrics(eval_preds):
if optim_args.apply_quantization and optim_args.quantization_approach in {"static", "dynamic", "weight_only"}:
model = trainer.model if isinstance(trainer.model, PreTrainedModel) else trainer.model._model
quantizer = INCQuantizer.from_pretrained(model)
if optim_args.quantization_approach == "static":
if optim_args.quantization_approach != "dynamic":
num_calibration_samples = min(len(train_dataset), optim_args.num_calibration_samples)
train_dataset = train_dataset.select(range(num_calibration_samples))
quantization_config.calibration_sampling_size = num_calibration_samples
elif optim_args.quantization_approach == "weight_only":
train_dataset = train_dataset.select(range(num_calibration_samples))
if optim_args.quantization_approach == "static":
quantization_config.calibration_sampling_size = num_calibration_samples

quantizer.quantize(
quantization_config=quantization_config,
Expand All @@ -735,7 +735,7 @@ def compute_metrics(eval_preds):
)
trainer.model = quantizer._quantized_model

# Weight only quantization didn't support save/load function due to weight only model has private linear operator.
# TODO: Weight only quantization didn't support save/load function now. Will implement it soon.
if (
optim_args.apply_quantization
and optim_args.verify_loading
Expand Down
2 changes: 1 addition & 1 deletion optimum/intel/neural_compressor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

from ..utils.import_utils import is_diffusers_available
from .configuration import INCConfig, WeightOnlyQuantConfig
from .configuration import INCConfig
from .modeling_base import (
INCModel,
INCModelForMaskedLM,
Expand Down
5 changes: 2 additions & 3 deletions optimum/intel/neural_compressor/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

from typing import Dict, Optional, Union

from intel_extension_for_transformers.transformers.utils import WeightOnlyQuantConfig
from neural_compressor.config import DistillationConfig, WeightPruningConfig, _BaseQuantizationConfig

from optimum.configuration_utils import BaseConfig
Expand All @@ -36,7 +35,7 @@ class INCConfig(BaseConfig):

def __init__(
self,
quantization: Optional[Union[Dict, _BaseQuantizationConfig, WeightOnlyQuantConfig]] = None,
quantization=None,
pruning: Optional[Union[Dict, _BaseQuantizationConfig]] = None,
distillation: Optional[Union[Dict, _BaseQuantizationConfig]] = None,
save_onnx_model: bool = False,
Expand All @@ -51,7 +50,7 @@ def __init__(
self.save_onnx_model = save_onnx_model

@staticmethod
def _create_quantization_config(config: Union[Dict, _BaseQuantizationConfig, WeightOnlyQuantConfig]):
def _create_quantization_config(config):
# TODO : add activations_dtype and weights_dtype
if isinstance(config, _BaseQuantizationConfig):
approach = _quantization_model[config.approach]
Expand Down
15 changes: 12 additions & 3 deletions optimum/intel/neural_compressor/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

import torch
from datasets import Dataset, load_dataset
from intel_extension_for_transformers.llm.quantization.utils import convert_to_quantized_model
from neural_compressor.adaptor.pytorch import PyTorch_FXAdaptor, _cfg_to_qconfig, _propagate_qconfig
from neural_compressor.config import PostTrainingQuantConfig
from neural_compressor.experimental.export import torch_to_int8_onnx
Expand All @@ -49,6 +48,7 @@
from ..utils.import_utils import (
_ipex_version,
_neural_compressor_version,
is_intel_extension_for_transformers_available,
is_ipex_version,
is_neural_compressor_version,
)
Expand All @@ -67,6 +67,10 @@
from .utils import INCDataLoader, _cfgs_to_fx_cfgs


if is_intel_extension_for_transformers_available():
from intel_extension_for_transformers.llm.quantization.utils import convert_to_quantized_model
from intel_extension_for_transformers.transformers.utils.quantization_config import WeightOnlyQuantConfig

logger = logging.getLogger(__name__)

NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0"
Expand Down Expand Up @@ -135,7 +139,7 @@ def from_pretrained(cls, model: PreTrainedModel, **kwargs):
def quantize(
self,
save_directory: Union[str, Path],
quantization_config: Union[PostTrainingQuantConfig, WeightOnlyQuantConfig] = None,
quantization_config=None,
calibration_dataset: Dataset = None,
batch_size: int = 8,
data_collator: Optional[DataCollator] = None,
Expand Down Expand Up @@ -176,13 +180,18 @@ def quantize(
calibration_dataloader = None
self._set_task()

if weight_only or isinstance(quantization_config, WeightOnlyQuantConfig):
if weight_only or not isinstance(quantization_config, PostTrainingQuantConfig):
# check neural-compressor version
if is_neural_compressor_version("<", NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION):
raise ImportError(
f"Found an incompatible version of neural-compressor. Found version {_neural_compressor_version}, "
f"but only version {NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION} or higher supports weight-only quantization."
)
if not is_intel_extension_for_transformers_available():
raise ImportError(
"Didn't find out intel-etension-for-transformers package. "
"Please install packages: pip install intel-etension-for-transformers and pip install peft."
)

if quantization_config is None:
quantization_config = WeightOnlyQuantConfig()
Expand Down
31 changes: 31 additions & 0 deletions optimum/intel/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@
_neural_compressor_available = False


_intel_extension_for_transformers_available = importlib.util.find_spec("intel_extension_for_transformers") is not None
_intel_extension_for_transformers_version = "N/A"
if _intel_extension_for_transformers_available:
try:
_intel_extension_for_transformers_version = importlib_metadata.version("intel_extension_for_transformers")
except importlib_metadata.PackageNotFoundError:
_intel_extension_for_transformers_available = False


_ipex_available = importlib.util.find_spec("intel_extension_for_pytorch") is not None
_ipex_version = "N/A"
if _ipex_available:
Expand Down Expand Up @@ -118,6 +127,10 @@ def is_neural_compressor_available():
return _neural_compressor_available


def is_intel_extension_for_transformers_available():
return _intel_extension_for_transformers_available


def is_ipex_available():
return _ipex_available

Expand Down Expand Up @@ -181,6 +194,15 @@ def is_neural_compressor_version(operation: str, version: str):
return compare_versions(parse(_neural_compressor_version), operation, version)


def is_intel_extension_for_transformers_version(operation: str, version: str):
"""
Compare the current intel_extension_for_transformers version to a given reference with an operation.
"""
if not _intel_extension_for_transformers_available:
return False
return compare_versions(parse(_intel_extension_for_transformers_version), operation, version)


def is_openvino_version(operation: str, version: str):
"""
Compare the current OpenVINO version to a given reference with an operation.
Expand Down Expand Up @@ -245,13 +267,22 @@ def is_ipex_version(operation: str, version: str):
`pip install neural-compressor`. Please note that you may need to restart your runtime after installation.
"""

INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR = """
{0} requires the intel-extension-for-transformers library but it was not found in your environment. You can install it with pip:
`pip install neural-compressor`. Please note that you may need to restart your runtime after installation.
"""

BACKENDS_MAPPING = OrderedDict(
[
("diffusers", (is_diffusers_available, DIFFUSERS_IMPORT_ERROR)),
("ipex", (is_ipex_available, IPEX_IMPORT_ERROR)),
("nncf", (is_nncf_available, NNCF_IMPORT_ERROR)),
("openvino", (is_openvino_available, OPENVINO_IMPORT_ERROR)),
("neural_compressor", (is_neural_compressor_available, NEURAL_COMPRESSOR_IMPORT_ERROR)),
(
"intel_extension_for_transformers",
(is_intel_extension_for_transformers_available, NEURAL_COMPRESSOR_IMPORT_ERROR),
),
]
)

Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
"rjieba",
"timm",
"invisible-watermark>=0.2.0",
"intel-extension-for-transformers",
"peft",
]

QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"]
Expand All @@ -41,8 +43,6 @@
"neural-compressor>=2.2.0",
"onnx",
"onnxruntime<1.15.0",
"intel-extension-for-transformers",
"peft",
],
"openvino": ["openvino>=2023.1.0", "onnx", "onnxruntime"],
"nncf": ["nncf>=2.6.0"],
Expand Down
2 changes: 1 addition & 1 deletion tests/neural_compressor/test_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
INCSeq2SeqTrainer,
INCStableDiffusionPipeline,
)
from optimum.intel.neural_compressor import WeightOnlyQuantConfig
from intel_extension_for_transformers.transformers.utils.quantization_config import WeightOnlyQuantConfig
from optimum.intel.utils.constant import DIFFUSION_WEIGHTS_NAME
from optimum.onnxruntime import ORTModelForCausalLM, ORTModelForSequenceClassification
from optimum.pipelines import ORT_SUPPORTED_TASKS
Expand Down

0 comments on commit f5363e7

Please sign in to comment.