Merge branch 'ea/qwen2vl' of https://github.com/eaidova/optimum-intel …

…into ea/qwen2vl
huggingface · Dec 17, 2024 · d7ba440 · d7ba440
2 parents f6fdfba + b6deb1a
commit d7ba440
Show file tree

Hide file tree

Showing 53 changed files with 2,320 additions and 1,114 deletions.
diff --git a/.github/workflows/dockerfile_sanity.yml b/.github/workflows/dockerfile_sanity.yml
@@ -5,13 +5,13 @@ on:
     branches:
       - main
     paths:
-      - "docker/Dockerfile.intel"
-
+      - 'Dockerfile.ipex'
+ 
   pull_request:
     branches:
       - main
     paths:
-      - "docker/Dockerfile.intel"
+      - 'Dockerfile.ipex'
 
 jobs:
   build_and_run:
@@ -27,7 +27,7 @@ jobs:
       - name: Build and Run Docker Image
         run: |
           IMAGE_NAME="intel_image:latest"
-          docker build -f docker/Dockerfile.intel -t $IMAGE_NAME .
+          docker build -f Dockerfile.ipex -t $IMAGE_NAME .
           if [ $? -ne 0 ]; then
             echo "Docker image build failed."
             exit 1

diff --git a/.github/workflows/test_inc.yml b/.github/workflows/test_inc.yml
@@ -18,7 +18,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        torch-version: ["2.4.*", "2.5.0"]
+        torch-version: ["2.4.0", "2.5.*"]
 
     runs-on: ubuntu-22.04
 
@@ -35,7 +35,7 @@ jobs:
         run: |
           pip install --upgrade pip
           pip install torch==${{ matrix.torch-version }} torchaudio torchvision --index-url https://download.pytorch.org/whl/cpu
-          pip install .[neural-compressor,ipex,diffusers,peft,tests] transformers[testing] intel-extension-for-pytorch==${{ matrix.torch-version }}
+          pip install .[neural-compressor,diffusers,peft,tests] transformers[testing] intel-extension-for-pytorch==${{ matrix.torch-version }}
 
       - name: Assert versions
         run: |

diff --git a/.github/workflows/test_ipex.yml b/.github/workflows/test_ipex.yml
@@ -18,8 +18,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        torch-version: ["2.2.0", "2.3.*", "2.4.*"]
-        transformers-version: ["4.39.0", "4.44.*"]
+        transformers-version: ["4.46.0", "4.46.3"]
+        torch-version: ["2.4.0", "2.5.*"]
 
     runs-on: ubuntu-22.04
 
@@ -38,10 +38,6 @@ jobs:
           pip install torch==${{ matrix.torch-version }} torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
           pip install .[ipex,tests] transformers[testing]==${{ matrix.transformers-version }} intel_extension_for_pytorch==${{ matrix.torch-version }}
 
-      - if: ${{ matrix.torch-version == '2.2.0' }}
-        name: Downgrade Numpy
-        run: pip install numpy==1.*
-
       - name: Assert versions
         run: |
           python -c "import torch; print(torch.__version__); assert torch.__version__.startswith('${{ matrix.torch-version }}'.replace('.*', ''))"
@@ -50,4 +46,4 @@ jobs:
 
       - name: Test with Pytest
         run: |
-          pytest tests/ipex
+          pytest tests/ipex
diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
@@ -1,6 +1,7 @@
 name: OpenVINO - Test
 
 on:
+  workflow_dispatch:
   push:
     branches:
       - main
@@ -46,9 +47,9 @@ jobs:
           pip install .[openvino,openvino-tokenizers,diffusers,tests] transformers[testing]
 
       - if: ${{ matrix.transformers-version != 'latest' }}
-        name: Downgrade Transformers and Accelerate
+        name: Install specific dependencies and versions required for older transformers
         run: |
-          pip install transformers==${{ matrix.transformers-version }} accelerate==0.*
+          pip install transformers==${{ matrix.transformers-version }} accelerate==0.* peft==0.13.* diffusers==0.30.* transformers_stream_generator
 
       - if: ${{ matrix.test-pattern == '*modeling*' }}
         name: Uninstall NNCF

diff --git a/.github/workflows/test_openvino_full.yml b/.github/workflows/test_openvino_full.yml
@@ -0,0 +1,88 @@
+name: OpenVINO - Full Test
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "41 3 * * *" # run every day at 3:41
+  push:
+    branches:
+      - v*-release
+  pull_request:
+    types: [opened, synchronize, reopened, labeled]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') ||  (github.event_name == 'push') || contains( github.event.pull_request.labels.*.name, 'openvino-test') }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - python-version: "3.9"
+            os: "ubuntu-22.04"
+            transformers-version: "latest"
+            openvino: "ov-stable"
+            nncf: "nncf-stable"
+          - python-version: "3.9"
+            os: "ubuntu-22.04"
+            transformers-version: "latest"
+            openvino: "ov-nightly"
+            nncf: "nncf-stable"
+          - python-version: "3.9"
+            os: "ubuntu-22.04"
+            transformers-version: "latest"
+            openvino: "ov-stable"
+            nncf: "nncf-develop"
+          - python-version: "3.9"
+            os: "ubuntu-22.04"
+            transformers-version: "latest"
+            openvino: "ov-nightly"
+            nncf: "nncf-develop"
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          # Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
+          pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+          pip install .[tests]
+
+      - name: Install openvino-nightly
+        if: ${{ matrix.openvino == 'ov-nightly' }}
+        run: pip install --pre -U openvino openvino-tokenizers --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+
+      - name: Install openvino release
+        if: ${{ matrix.openvino == 'ov-stable' }}
+        run: pip install .[openvino]
+
+      - name: Install nncf develop
+        if: ${{ matrix.nncf == 'nncf-develop' }}
+        run: pip install git+https://github.com/openvinotoolkit/nncf.git
+
+      - name: Install nncf release
+        if: ${{ matrix.nncf == 'nncf-stable' }}
+        run: pip install .[nncf]
+
+      - name: Install the lowest compatible transformers version
+        if: ${{ matrix.transformers-version != 'latest' }}
+        run: pip install transformers==${{ matrix.transformers-version }}
+
+      - name: Pip freeze
+        run: pip freeze
+
+      - name: OpenVINO tests
+        run: pytest tests/openvino --durations=0
+        env:
+          RUN_SLOW: 1
+          HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
diff --git a/.github/workflows/test_openvino_slow.yml b/.github/workflows/test_openvino_slow.yml
@@ -25,9 +25,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-22.04", "windows-2019"]
-        openvino-version: ["stable", "nightly"]
         transformers-version: ["4.36.0", "latest"]
-        nncf: ["nncf", "git+https://github.com/openvinotoolkit/nncf.git"]
 
     runs-on: ${{ matrix.os }}
 
@@ -47,14 +45,9 @@ jobs:
           pip install .[openvino,tests] transformers[testing]
           pip uninstall -y nncf
 
-      - if: ${{ matrix.openvino-version == 'nightly' }}
-        name: Install nightly OpenVINO
-        run: |
-          pip install openvino openvino-tokenizers --pre --upgrade --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-
       - if: ${{ matrix.transformers-version != 'latest' }}
-        name: Downgrade Transformers and Accelerate
-        run: pip install transformers==${{ matrix.transformers-version }} accelerate==0.*
+        name: Install specific dependencies and versions required for older transformers
+        run: pip install transformers==${{ matrix.transformers-version }} accelerate==0.* peft==0.13.*, diffusers==0.30.* transformers_stream_generator
 
       - name: Pip freeze
         run: pip freeze
@@ -65,7 +58,11 @@ jobs:
 
       - name: Install dependencies (slow)
         run: |
-          pip install ${{ matrix.nncf }}
+          pip install .[nncf]
+
+      - if: ${{ matrix.transformers-version != 'latest' }}
+        name: Downgrade Transformers and Accelerate
+        run: pip install transformers==${{ matrix.transformers-version }} accelerate==0.*
 
       - name: Test with Pytest (slow)
         run: |

diff --git a/Dockerfile.ipex b/Dockerfile.ipex
@@ -0,0 +1,73 @@
+ARG PLATFORM=cpu
+
+FROM ubuntu:22.04 as cpu
+WORKDIR /usr/src/
+RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
+    sh -c "apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    ca-certificates \
+    git \
+    curl \
+    vim \
+    build-essential \
+    ccache \
+    libgoogle-perftools-dev \
+    numactl \
+    cmake \
+    libjpeg-dev \
+    pybind11-dev \
+    libpng-dev \
+    python3 \
+    python3-pip \
+    && rm -rf /var/lib/apt/lists/*"
+RUN /usr/sbin/update-ccache-symlinks
+RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
+
+ARG IPEX_VERSION=2.5.0
+ARG PYTORCH_VERSION=2.5.1
+ARG TORCHVISION_VERSION=0.20.1+cpu
+ARG TORCHAUDIO_VERSION=2.5.1+cpu
+
+RUN python3 -m pip install --no-cache-dir \
+    torch==${PYTORCH_VERSION}+cpu \
+    torchvision==${TORCHVISION_VERSION} \
+    torchaudio==${TORCHAUDIO_VERSION} \
+    --index-url https://download.pytorch.org/whl/cpu && \
+    python3 -m pip install intel-openmp -f https://download.pytorch.org/whl/torch_stable.html && \
+    python3 -m pip install intel-extension-for-pytorch==$IPEX_VERSION && \
+    python3 -m pip install oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/ && \
+    python3 -m pip install --no-cache-dir  py-libnuma
+
+ARG KMP_BLOCKTIME=1
+ENV KMP_BLOCKTIME=${KMP_BLOCKTIME}
+ARG KMP_HW_SUBSET=1T
+ENV KMP_HW_SUBSET=${KMP_HW_SUBSET}
+ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc.so"
+
+FROM intel/intel-extension-for-pytorch:2.3.110-xpu as xpu
+WORKDIR /usr/src/
+
+RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
+    sh -c "apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    ca-certificates \
+    git \
+    curl \
+    vim \
+    ccache \
+    libgoogle-perftools-dev \
+    numactl \
+    libjpeg-dev \
+    pybind11-dev \
+    libpng-dev \
+    && rm -rf /var/lib/apt/lists/*"
+RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --dearmor | tee /usr/share/keyrings/intel-graphics.gpg > /dev/null
+
+RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
+| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list
+
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt install -y intel-basekit xpu-smi cmake ninja-build pciutils
+
+FROM ${PLATFORM}
+
+COPY optimum optimum
+COPY Makefile setup.cfg setup.py pyproject.toml README.md ./
+RUN pip install .
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@
 
 🤗 Optimum Intel is the interface between the 🤗 Transformers and Diffusers libraries and the different tools and libraries provided by Intel to accelerate end-to-end pipelines on Intel architectures.
 
-[Intel Extension for PyTorch](https://intel.github.io/intel-extension-for-pytorch/#introduction) is an open-source library which provides optimizations for both eager mode and graph mode, however, compared to eager mode, graph mode in PyTorch* normally yields better performance from optimization techniques, such as operation fusion.
+[Intel Extension for PyTorch](https://intel.github.io/intel-extension-for-pytorch/#introduction) is an open-source library which provides optimizations like faster attention and operators fusion.
 
 Intel [Neural Compressor](https://www.intel.com/content/www/us/en/developer/tools/oneapi/neural-compressor.html) is an open-source library enabling the usage of the most popular compression techniques such as quantization, pruning and knowledge distillation. It supports automatic accuracy-driven tuning strategies in order for users to easily generate quantized model. The users can easily apply static, dynamic and aware-training quantization approaches while giving an expected accuracy criteria. It also supports different weight pruning techniques enabling the creation of pruned model giving a predefined sparsity target.
 
@@ -159,7 +159,7 @@ optimized_model = OVModelForSequenceClassification.from_pretrained(save_dir)
 
 
 ## IPEX
-To load your IPEX model, you can just replace your `AutoModelForXxx` class with the corresponding `IPEXModelForXxx` class. You can set `export=True` to load a PyTorch checkpoint, export your model via TorchScript and apply IPEX optimizations : both operators optimization (replaced with customized IPEX operators) and graph-level optimization (like operators fusion) will be applied on your model.
+To load your IPEX model, you can just replace your `AutoModelForXxx` class with the corresponding `IPEXModelForXxx` class. It will load a PyTorch checkpoint, and apply IPEX operators optimization (replaced with customized IPEX operators).
 ```diff
   from transformers import AutoTokenizer, pipeline
 - from transformers import AutoModelForCausalLM
@@ -168,7 +168,7 @@ To load your IPEX model, you can just replace your `AutoModelForXxx` class with
 
   model_id = "gpt2"
 - model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
-+ model = IPEXModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, export=True)
++ model = IPEXModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
   tokenizer = AutoTokenizer.from_pretrained(model_id)
   pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
   results = pipe("He's a dreadful magician and")

diff --git a/docker/Dockerfile.intel b/docker/Dockerfile.intel