Skip to content

Commit

Permalink
Support Transformers 4.43 (#856)
Browse files Browse the repository at this point in the history
* install from pr

* updates

* fix

* update TRANSFORMERS_MAX_VERSION

* fix sdpa in training

* fix whisper

* fix

* whisper calibration checks

* fix OVTrainerTextClassificationTrainingTest's expected fake quantize

* fix OVCLIExportTestCase's expected_int4

* update min ci transformers version to 4.37

* fix OVQuantizerTest's expected fake quantize

* reorder_cache

* fix expected compressed matmuls

* fix test_exporters_cli_int4_with_local_model_and_default_config

* fix qwen custom modeling test

* fix failing ipex tests

* fix ipex

* fix the last ipex failing test_compare_with_and_without_past_key_values

* use minimal prepare_inputs_for_generation in OVModelForSpeechSeq2Seq

* keeping compatibility with transformers 4.36

* keep support of whisper using WhisperGenerationMixin.generate a,d dummy model fix

* trigger

* fix

* device property

* standardize .device and ._device attributes/properties

* fix

* fix

* revert

Co-authored-by: Ella Charlaix <[email protected]>

* use falcon

* torch.device property always cpu

* style

* resolve conflicts

* decoder_attention_mask for older versions

* optimum main

* limit inc transformers version

* fix pipeline missing dtype

* add dtype for seq to seq models

* pass phi beam search test and skip internlm2

* fix for internlm2

---------

Co-authored-by: Ella Charlaix <[email protected]>
  • Loading branch information
IlyasMoutawwakil and echarlaix committed Aug 6, 2024
1 parent 6b9ea23 commit 71ae2da
Show file tree
Hide file tree
Showing 18 changed files with 300 additions and 558 deletions.
42 changes: 24 additions & 18 deletions .github/workflows/test_ipex.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,32 @@ concurrency:

jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [3.8, 3.9]
transformers-version: [4.39.0, 4.41.2]
os: [ubuntu-latest]
python-version: [3.9]
transformers-version: ["4.39.0", "4.43.*"]
ipex-version: ["2.2.0", "2.3.*"]
include:
- python-version: 3.8
transformers-version: 4.39.0
ipex-version: 2.2.0

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install torch torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
pip install .[ipex,tests]
pip install transformers==${{ matrix.transformers-version }}
- name: Test with Pytest
run: |
pytest tests/ipex/
- uses: actions/checkout@v2
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install torch==${{ matrix.ipex-version }} --extra-index-url https://download.pytorch.org/whl/cpu
pip install intel_extension_for_pytorch==${{ matrix.ipex-version }}
pip install Pillow parameterized
pip install transformers[testing]==${{ matrix.transformers-version }}
pip install .[ipex]
- name: Test with Pytest
run: |
pytest tests/ipex/
57 changes: 29 additions & 28 deletions .github/workflows/test_openvino.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,36 +21,37 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.8", "3.12"]
transformers-version: ["4.36.0", "4.42.*"]
transformers-version: ["4.36.0", "4.43.*"]
os: [ubuntu-latest]

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
# install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install transformers==${{ matrix.transformers-version }}
pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
- name: Test with Pytest
env:
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
run: |
pytest tests/openvino/ --ignore tests/openvino/test_modeling_basic.py --durations=0
- name: Test basic
run: |
pip uninstall -y nncf
pytest tests/openvino/test_modeling_basic.py
- name: Test openvino-nightly
run: |
pip uninstall -y openvino
pip install openvino-nightly
python -c "from optimum.intel import OVModelForCausalLM; OVModelForCausalLM.from_pretrained('hf-internal-testing/tiny-random-gpt2', export=True, compile=False)"
optimum-cli export openvino -m hf-internal-testing/tiny-random-gpt2 gpt2-ov
- uses: actions/checkout@v4
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
# install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
pip install transformers==${{ matrix.transformers-version }}
- name: Test with Pytest
env:
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
run: |
pytest tests/openvino/ --ignore tests/openvino/test_modeling_basic.py --durations=0
- name: Test basic
run: |
pip uninstall -y nncf
pytest tests/openvino/test_modeling_basic.py
- name: Test openvino-nightly
run: |
pip uninstall -y openvino
pip install openvino-nightly
python -c "from optimum.intel import OVModelForCausalLM; OVModelForCausalLM.from_pretrained('hf-internal-testing/tiny-random-gpt2', export=True, compile=False)"
optimum-cli export openvino -m hf-internal-testing/tiny-random-gpt2 gpt2-ov
64 changes: 35 additions & 29 deletions .github/workflows/test_openvino_basic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: OpenVINO - Basic Test
on:
workflow_dispatch:
schedule:
- cron: '41 1 * * *' # run every day at 1:41
- cron: "41 1 * * *" # run every day at 1:41
push:
branches:
- v*-release
Expand All @@ -23,36 +23,42 @@ jobs:
# Testing lower and upper bound of supported Python versions
# This also ensures that the test fails if dependencies break for Python 3.7
python-version: ["3.8", "3.12"]
optimum: ['optimum', 'git+https://github.com/huggingface/optimum.git']
os: ["ubuntu-22.04", "windows-latest"]
transformers-version: ["4.43.*"]
include:
- python-version: "3.12"
os: "ubuntu-22.04"
transformers-version: "4.36.0"

runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v4
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
# Install openvino manually to prevent dependency conflicts when .[openvino] pins
# optimum or transformers to a specific version
# Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install .[tests] openvino ${{ matrix.optimum}}
- name: Pip freeze
run: pip freeze

- name: Test with Pytest
run: |
pytest tests/openvino/test_modeling_basic.py
- name: Slow tests
run: |
pip install nncf
pytest tests/openvino -s -m "run_slow" --durations=0
env:
RUN_SLOW: 1
- uses: actions/checkout@v4
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
# Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
# Install openvino manually to prevent dependency conflicts when .[openvino] pins
# optimum or transformers to a specific version
pip install .[tests] openvino
pip install transformers==${{ matrix.transformers-version }}
- name: Pip freeze
run: pip freeze

- name: Test with Pytest
run: |
pytest tests/openvino/test_modeling_basic.py
- name: Slow tests
run: |
pip install nncf
pytest tests/openvino -s -m "run_slow" --durations=0
env:
RUN_SLOW: 1
2 changes: 1 addition & 1 deletion optimum/exporters/ipex/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

# Please also update in the setup.py and .github/workflows/test_ipex.yml if you change the transformers version
_TRANSFORMERS_MIN_VERSION = "4.39.0"
_TRANSFORMERS_MAX_VERSION = "4.41.2"
_TRANSFORMERS_MAX_VERSION = "4.43.99"

_IPEX_EXPORTED_ARCH = ("LlamaForCausalLM",)
_IPEX_EXPORTED_TASK = ("text-generation",)
Expand Down
6 changes: 4 additions & 2 deletions optimum/intel/ipex/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,9 +470,11 @@ def __init__(
self._reorder_cache = _ipex_reorder_cache
else:
# Check if _reorder_cache is a static method
if isinstance(self.model_cls.__dict__["_reorder_cache"], staticmethod):
if "_reorder_cache" in self.model_cls.__dict__ and isinstance(
self.model_cls.__dict__["_reorder_cache"], staticmethod
):
self._reorder_cache = self.model_cls._reorder_cache
else:
elif "_reorder_cache" in self.model_cls.__dict__:
self._reorder_cache = self.model_cls._reorder_cache.__get__(self)

if is_transformers_version(">=", "4.38.0") and model_type in {"llama", "phi", "persimmon"}:
Expand Down
1 change: 0 additions & 1 deletion optimum/intel/openvino/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ def __init__(self, model: openvino.runtime.Model, config: transformers.Pretraine
# Avoid warnings when creating a transformers pipeline
AutoConfig.register(self.base_model_prefix, AutoConfig)
self.auto_model_class.register(AutoConfig, self.__class__)
self.device = torch.device("cpu")

def to(self, device: str):
"""
Expand Down
35 changes: 34 additions & 1 deletion optimum/intel/openvino/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from typing import Dict, Optional, Union

import openvino
import torch
from huggingface_hub import hf_hub_download
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from openvino import Core, convert_model
Expand All @@ -34,7 +35,7 @@
from ...exporters.openvino import export, main_export
from ..utils.import_utils import is_nncf_available
from .configuration import OVConfig, OVDynamicQuantizationConfig, OVWeightQuantizationConfig
from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, _print_compiled_model_properties
from .utils import ONNX_WEIGHTS_NAME, OV_TO_PT_TYPE, OV_XML_FILE_NAME, _print_compiled_model_properties


core = Core()
Expand Down Expand Up @@ -77,16 +78,27 @@ def __init__(
model = self._reshape(model, -1, -1, height, width)

input_names = {}
input_dtypes = {}
for idx, key in enumerate(model.inputs):
names = tuple(key.get_names())
input_names[next((name for name in names if "/" not in name), names[0])] = idx
input_dtypes[
next((name for name in names if "/" not in name), names[0])
] = key.get_element_type().get_type_name()
self.input_names = input_names
self.input_dtypes = input_dtypes

output_names = {}
output_dtypes = {}
for idx, key in enumerate(model.outputs):
names = tuple(key.get_names())
output_names[next((name for name in names if "/" not in name), names[0])] = idx
output_dtypes[
next((name for name in names if "/" not in name), names[0])
] = key.get_element_type().get_type_name()

self.output_names = output_names
self.output_dtypes = output_dtypes

self.model = model
self.request = None
Expand All @@ -103,6 +115,27 @@ def __init__(
if enable_compilation:
self.compile()

@property
def device(self) -> torch.device:
"""
`torch.device`: The device on which the module is (for torch compatibility).
"""
return torch.device("cpu")

@property
def dtype(self) -> Optional[torch.dtype]:
for dtype in self.input_dtypes.values():
torch_dtype = OV_TO_PT_TYPE.get(dtype)
if torch_dtype.is_floating_point:
return torch_dtype

for dtype in self.output_dtypes.values():
torch_dtype = OV_TO_PT_TYPE.get(dtype)
if torch_dtype.is_floating_point:
return torch_dtype

return None

@staticmethod
def load_model(
file_name: Union[str, Path],
Expand Down
2 changes: 2 additions & 0 deletions optimum/intel/openvino/modeling_base_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,8 @@ def _reshape(self, model: openvino.runtime.Model, batch_size: int, sequence_leng
shapes[inputs][0] = batch_size if not is_decoder else -1
if inputs.get_any_name().startswith("past_key_values"):
shapes[inputs][2] = -1
elif inputs.get_any_name().startswith("cache_position"):
shapes[inputs][0] = sequence_length
elif is_decoder and not inputs.get_any_name().startswith("encoder"):
shapes[inputs][1] = -1
else:
Expand Down
23 changes: 12 additions & 11 deletions optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import numpy as np
import openvino
import PIL
import torch
from diffusers import (
DDIMScheduler,
LMSDiscreteScheduler,
Expand Down Expand Up @@ -420,10 +421,6 @@ def to(self, device: str):

return self

@property
def device(self) -> str:
return self._device.lower()

@property
def height(self) -> int:
height = self.unet.model.inputs[0].get_partial_shape()[2]
Expand Down Expand Up @@ -629,21 +626,25 @@ def _compile(self):
if (
"CACHE_DIR" not in self.ov_config.keys()
and not str(self._model_dir).startswith(gettempdir())
and "gpu" in self.device.lower()
and "GPU" in self._device
):
self.ov_config["CACHE_DIR"] = os.path.join(self._model_dir, self._model_name, "model_cache")

logger.info(f"Compiling the {self._model_name} to {self.device} ...")
self.request = core.compile_model(self.model, self.device, self.ov_config)
logger.info(f"Compiling the {self._model_name} to {self._device} ...")
self.request = core.compile_model(self.model, self._device, self.ov_config)
# OPENVINO_LOG_LEVEL can be found in https://docs.openvino.ai/2023.2/openvino_docs_OV_UG_supported_plugins_AUTO_debugging.html
if "OPENVINO_LOG_LEVEL" in os.environ and int(os.environ["OPENVINO_LOG_LEVEL"]) > 2:
logger.info(f"{self.device} SUPPORTED_PROPERTIES:")
logger.info(f"{self._device} SUPPORTED_PROPERTIES:")
_print_compiled_model_properties(self.request)

@property
def device(self):
def _device(self) -> str:
return self.parent_model._device

@property
def device(self) -> torch.device:
return self.parent_model.device


class OVModelTextEncoder(OVModelPart):
def __init__(
Expand Down Expand Up @@ -715,7 +716,7 @@ def __call__(self, latent_sample: np.ndarray):
return list(outputs.values())

def _compile(self):
if "GPU" in self.device:
if "GPU" in self._device:
self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"})
super()._compile()

Expand All @@ -736,7 +737,7 @@ def __call__(self, sample: np.ndarray):
return list(outputs.values())

def _compile(self):
if "GPU" in self.device:
if "GPU" in self._device:
self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"})
super()._compile()

Expand Down
Loading

0 comments on commit 71ae2da

Please sign in to comment.