Skip to content

Commit

Permalink
fix conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
echarlaix committed Nov 6, 2023
2 parents c21f736 + 9562235 commit 1343a34
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 40 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/test_openvino.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,9 @@ jobs:
- name: Test with Pytest
run: |
pytest tests/openvino/ --ignore test_modeling_basic
- name: Test openvino-nightly import
run: |
pip uninstall -y openvino
pip install openvino-nightly
python -c "from optimum.intel import OVModelForCausalLM; OVModelForCausalLM.from_pretrained('hf-internal-testing/tiny-random-gpt2', export=True, compile=False)"
42 changes: 41 additions & 1 deletion optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from typing import Any, Callable, Dict, Optional, Union

from requests.exceptions import ConnectionError as RequestsConnectionError
from transformers import AutoTokenizer
from transformers import AutoConfig, AutoTokenizer

from optimum.exporters import TasksManager
from optimum.exporters.onnx import __main__ as optimum_main
Expand Down Expand Up @@ -136,6 +136,41 @@ def main_export(
original_task = task
task = TasksManager.map_from_synonym(task)

# Patch the modules to export of GPTQ models w/o GPU
do_gptq_patching = False
try:
config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=trust_remote_code)
config_dict = config.to_dict()
quantization_config = config_dict.get("quantization_config", None)
do_gptq_patching = quantization_config and quantization_config["quant_method"] == "gptq"
except Exception:
pass

if do_gptq_patching:
import torch

torch.set_default_dtype(torch.float32)
orig_cuda_check = torch.cuda.is_available
torch.cuda.is_available = lambda: True

from optimum.gptq import GPTQQuantizer

orig_post_init_model = GPTQQuantizer.post_init_model

def post_init_model(self, model):
from auto_gptq import exllama_set_max_input_length

class StoreAttr(object):
pass

model.quantize_config = StoreAttr()
model.quantize_config.desc_act = self.desc_act
if self.desc_act and not self.disable_exllama and self.max_input_length is not None:
model = exllama_set_max_input_length(model, self.max_input_length)
return model

GPTQQuantizer.post_init_model = post_init_model

framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework)

# get the shapes to be used to generate dummy inputs
Expand Down Expand Up @@ -317,3 +352,8 @@ def main_export(
int8=int8,
model_kwargs=model_kwargs,
)

# Unpatch modules after GPTQ export
if do_gptq_patching:
torch.cuda.is_available = orig_cuda_check
GPTQQuantizer.post_init_model = orig_post_init_model
11 changes: 11 additions & 0 deletions optimum/intel/neural_compressor/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@
from itertools import chain
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union


# Integrations must be imported before ML frameworks:
# isort: off
from transformers.integrations import hp_params
from transformers.integrations.deepspeed import deepspeed_init, deepspeed_load_checkpoint, is_deepspeed_available

# isort: on

import datasets
import torch
import torch.distributed as dist
Expand All @@ -36,6 +44,7 @@
from transformers.data.data_collator import DataCollator
from transformers.debug_utils import DebugOption, DebugUnderflowOverflow


# Integrations must be imported before ML frameworks:
from transformers.integrations import deepspeed_init, deepspeed_load_checkpoint, hp_params, is_deepspeed_available
from transformers.modeling_utils import PreTrainedModel, get_parameter_dtype, unwrap_model
Expand Down Expand Up @@ -129,6 +138,8 @@ def __init__(
task: Optional[str] = None,
save_onnx_model: bool = False,
):
self.neftune_noise_alpha = None

super().__init__(
model,
args,
Expand Down
35 changes: 1 addition & 34 deletions optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,34 +229,6 @@ def _from_transformers(
if use_cache:
task = task + "-with-past"

# Patch the modules to export of GPTQ models w/o GPU
do_gptq_patching = False
config_dict = config.to_dict()
quantization_config = config_dict.get("quantization_config", None)
do_gptq_patching = quantization_config and quantization_config["quant_method"] == "gptq"
if do_gptq_patching:
torch.set_default_dtype(torch.float32)
orig_cuda_check = torch.cuda.is_available
torch.cuda.is_available = lambda: True

from optimum.gptq import GPTQQuantizer

orig_post_init_model = GPTQQuantizer.post_init_model

def post_init_model(self, model):
from auto_gptq import exllama_set_max_input_length

class StoreAttr(object):
pass

model.quantize_config = StoreAttr()
model.quantize_config.desc_act = self.desc_act
if self.desc_act and not self.disable_exllama and self.max_input_length is not None:
model = exllama_set_max_input_length(model, self.max_input_length)
return model

GPTQQuantizer.post_init_model = post_init_model

main_export(
model_name_or_path=model_id,
output=save_dir_path,
Expand All @@ -271,11 +243,6 @@ class StoreAttr(object):
int8=load_in_8bit,
)

# Unpatch modules after GPTQ export
if do_gptq_patching:
torch.cuda.is_available = orig_cuda_check
GPTQQuantizer.post_init_model = orig_post_init_model

config.is_decoder = True
config.is_encoder_decoder = False
config.save_pretrained(save_dir_path)
Expand Down Expand Up @@ -519,7 +486,7 @@ def _from_pretrained(
elif model_type == "gpt-bigcode":
init_cls = OVGPTBigCodeForCausalLM
else:
init_cls = OVModelForCausalLM
init_cls = cls

return init_cls(model=model, config=config, model_save_dir=model_cache_path.parent, **kwargs)

Expand Down
15 changes: 12 additions & 3 deletions optimum/intel/openvino/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,15 @@
from pathlib import Path
from typing import Callable, Dict, List, Optional, Tuple, Type, Union


# Integrations must be imported before ML frameworks:
# isort: off
from transformers.integrations import hp_params
from transformers.integrations.deepspeed import deepspeed_init, deepspeed_load_checkpoint, is_deepspeed_available

# isort: on

import openvino
import openvino.runtime
import torch
import torch.distributed as dist
import torch.nn.functional as F
Expand Down Expand Up @@ -190,6 +197,8 @@ def __init__(
task: Optional[str] = None,
feature: Optional[str] = None,
):
self.neftune_noise_alpha = None

super().__init__(
model,
args,
Expand Down Expand Up @@ -821,12 +830,12 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None):
if state_dict is None:
state_dict = self.model.state_dict()
if is_pretrained_model:
unwrapped_model.save_pretrained(output_dir, state_dict=state_dict)
unwrapped_model.save_pretrained(output_dir, state_dict=state_dict, safe_serialization=False)
else:
logger.info("Trainer.model is not a `PreTrainedModel`, only saving its state dict.")
torch.save(state_dict, os.path.join(output_dir, WEIGHTS_NAME))
else:
self.model.save_pretrained(output_dir, state_dict=state_dict)
self.model.save_pretrained(output_dir, state_dict=state_dict, safe_serialization=False)

if self.tokenizer is not None:
self.tokenizer.save_pretrained(output_dir)
Expand Down
5 changes: 4 additions & 1 deletion optimum/intel/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,10 @@
try:
_openvino_version = importlib_metadata.version("openvino")
except importlib_metadata.PackageNotFoundError:
_openvino_available = False
try:
_openvino_version = importlib_metadata.version("openvino-nightly")
except importlib_metadata.PackageNotFoundError:
_openvino_available = False


_nncf_available = importlib.util.find_spec("nncf") is not None
Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
assert False, "Error: Could not open '%s' due %s\n" % (filepath, error)

INSTALL_REQUIRE = [
<<<<<<< HEAD
"optimum @ git+https://github.com/huggingface/optimum.git",
"transformers>=4.20.0",
"datasets>=1.4.0",
Expand Down Expand Up @@ -41,8 +42,9 @@
"neural-compressor>=2.2.0",
"onnx",
"onnxruntime<1.15.0",
"transformers>=4.33.0",
],
"openvino": ["openvino>=2023.1.0", "onnx", "onnxruntime"],
"openvino": ["openvino>=2023.1.0", "onnx", "onnxruntime", "transformers>=4.33.0"],
"nncf": ["nncf>=2.6.0"],
"ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"],
"diffusers": ["diffusers"],
Expand Down
3 changes: 3 additions & 0 deletions tests/neural_compressor/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import unittest

import torch
from packaging.version import Version, parse
from parameterized import parameterized
from transformers import AutoTokenizer, pipeline, set_seed

Expand All @@ -39,6 +40,7 @@
INCTrainer,
)
from optimum.intel.neural_compressor.utils import _HEAD_TO_AUTOMODELS, WEIGHTS_NAME
from optimum.version import __version__ as _optimum_version


os.environ["CUDA_VISIBLE_DEVICES"] = ""
Expand Down Expand Up @@ -133,6 +135,7 @@ def test_pipeline(self, model_id, task):

pipe(*inputs)

@unittest.skipIf(parse(_optimum_version) < Version("1.14.0"), "not supported, needs optimum>=v1.14.0")
def test_compare_with_and_without_past_key_values(self):
model_id = "echarlaix/tiny-random-gpt2-torchscript"
tokenizer = AutoTokenizer.from_pretrained(model_id)
Expand Down

0 comments on commit 1343a34

Please sign in to comment.