Merge branch 'main' into hengguo/h2o

intel · Jul 2, 2024 · 2618e6f · 2618e6f
2 parents 523ca76 + 816f475
commit 2618e6f
Show file tree

Hide file tree

Showing 17 changed files with 42 additions and 17 deletions.
diff --git a/.github/workflows/script/install_binary.sh b/.github/workflows/script/install_binary.sh
@@ -4,6 +4,7 @@ source /intel-extension-for-transformers/.github/workflows/script/change_color.s
 cd /intel-extension-for-transformers
 export CMAKE_ARGS="-DNE_DNNL_CACHE_DIR=/cache"
 pip install -U pip
+pip install -r requirements.txt
 $BOLD_YELLOW && echo "---------------- git submodule update --init --recursive -------------" && $RESET
 git config --global --add safe.directory "*"
 git submodule update --init --recursive

diff --git a/examples/huggingface/neural_speed/requirements.txt b/examples/huggingface/neural_speed/requirements.txt
@@ -1,12 +1,11 @@
 intel_extension_for_transformers
 neural-speed
-lm-eval
+lm-eval==0.4.2
 sentencepiece
 gguf
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch==2.3.0+cpu
 transformers
-intel_extension_for_pytorch==2.3.0
 tiktoken
 transformers_stream_generator
-zipfile38
+zipfile38
diff --git a/...e/pytorch/text-classification/deployment/emotion/distilbert_base_uncased/requirements.txt b/...e/pytorch/text-classification/deployment/emotion/distilbert_base_uncased/requirements.txt
@@ -1,4 +1,5 @@
-neural-compressor
+intel-extension-for-transformers==1.4.2
+neural-compressor==2.6
 transformers
 accelerate
 datasets >= 1.8.0

diff --git a/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base/requirements.txt
@@ -1,4 +1,5 @@
-neural-compressor
+intel-extension-for-transformers==1.4.2
+neural-compressor==2.6
 transformers
 accelerate
 datasets >= 1.8.0

diff --git a/.../huggingface/pytorch/text-classification/deployment/mrpc/bert_base_cased/requirements.txt b/.../huggingface/pytorch/text-classification/deployment/mrpc/bert_base_cased/requirements.txt
@@ -1,4 +1,5 @@
-neural-compressor
+intel-extension-for-transformers==1.4.2
+neural-compressor==2.6
 transformers
 accelerate
 datasets >= 1.8.0

diff --git a/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base_cased/run_glue.py b/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base_cased/run_glue.py
@@ -468,7 +468,7 @@ def preprocess_function(examples):
 
     # Get the metric function
     if data_args.task_name is not None:
-        metric = load_metric("glue", data_args.task_name)
+        metric = load_metric("glue", data_args.task_name,trust_remote_code=True)
     else:
         metric = load_metric("accuracy")
 

diff --git a/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_mini/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_mini/requirements.txt
@@ -1,4 +1,5 @@
-neural-compressor
+intel-extension-for-transformers==1.4.2
+neural-compressor==2.6
 transformers
 accelerate
 datasets >= 1.8.0

diff --git a/...face/pytorch/text-classification/deployment/mrpc/distilbert_base_uncased/requirements.txt b/...face/pytorch/text-classification/deployment/mrpc/distilbert_base_uncased/requirements.txt
@@ -1,4 +1,5 @@
-neural-compressor
+intel-extension-for-transformers==1.4.2
+neural-compressor==2.6
 transformers
 accelerate
 datasets >= 1.8.0

diff --git a/...les/huggingface/pytorch/text-classification/deployment/mrpc/roberta_base/requirements.txt b/...les/huggingface/pytorch/text-classification/deployment/mrpc/roberta_base/requirements.txt
@@ -1,4 +1,5 @@
-neural-compressor
+intel-extension-for-transformers==1.4.2
+neural-compressor==2.6
 transformers
 accelerate
 datasets >= 1.8.0

diff --git a/...ples/huggingface/pytorch/text-classification/deployment/sparse/bert_mini/requirements.txt b/...ples/huggingface/pytorch/text-classification/deployment/sparse/bert_mini/requirements.txt
@@ -1,4 +1,5 @@
-neural-compressor
+intel-extension-for-transformers==1.4.2
+neural-compressor==2.6
 transformers
 accelerate
 datasets >= 1.8.0

diff --git a/...ce/pytorch/text-classification/deployment/sparse/distilbert_base_uncased/requirements.txt b/...ce/pytorch/text-classification/deployment/sparse/distilbert_base_uncased/requirements.txt
@@ -1,4 +1,5 @@
-neural-compressor
+intel-extension-for-transformers==1.4.2
+neural-compressor==2.6
 transformers
 accelerate
 datasets >= 1.8.0

diff --git a/examples/huggingface/pytorch/text-classification/deployment/sst2/bert_mini/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/sst2/bert_mini/requirements.txt
@@ -1,4 +1,5 @@
-neural-compressor
+intel-extension-for-transformers==1.4.2
+neural-compressor==2.6
 transformers
 accelerate
 datasets >= 1.8.0

diff --git a/...face/pytorch/text-classification/deployment/sst2/distilbert_base_uncased/requirements.txt b/...face/pytorch/text-classification/deployment/sst2/distilbert_base_uncased/requirements.txt
@@ -1,4 +1,5 @@
-neural-compressor
+intel-extension-for-transformers==1.4.2
+neural-compressor==2.6
 transformers
 accelerate
 datasets >= 1.8.0

diff --git a/...gface/pytorch/text-classification/deployment/sst2/minilm_l6_h384_uncased/requirements.txt b/...gface/pytorch/text-classification/deployment/sst2/minilm_l6_h384_uncased/requirements.txt
@@ -1,4 +1,5 @@
-neural-compressor
+intel-extension-for-transformers==1.4.2
+neural-compressor==2.6
 transformers
 accelerate
 datasets >= 1.8.0

diff --git a/intel_extension_for_transformers/qbits/__init__.py b/intel_extension_for_transformers/qbits/__init__.py
@@ -16,5 +16,6 @@
 # limitations under the License.
 
 import torch
-if not torch.xpu._is_compiled():
-    from intel_extension_for_transformers.qbits_py import * # pylint: disable=E0401, E0611
+import intel_extension_for_transformers
+if "gpu" not in intel_extension_for_transformers.__version__:
+    from intel_extension_for_transformers.qbits_py import *  # pylint: disable=E0401, E0611
diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
@@ -840,6 +840,12 @@ def forward(self, input: torch.Tensor) -> tuple[torch.Tensor, None]:
                 or device_map == torch.device("cpu")
             ) and model.config.model_type == "chatglm":
                 model = model.float()
+            if (
+                not torch.cuda.is_available()
+                or device_map == "cpu"
+                or device_map == torch.device("cpu")
+            ) and model.config.model_type == "mpt":
+                model.config.architectures = ["MptForCausalLM"]
             model.eval()
             model_type = model.config.model_type.replace("_", "-")
 
@@ -1077,6 +1083,7 @@ def calib_func(model):
                 recipes=quantization_config.recipes,
                 example_inputs=example_inputs,
             )
+
             model = quantization.fit(
                 model,
                 conf,

diff --git a/setup.py b/setup.py
@@ -8,10 +8,12 @@
 from pathlib import Path
 from setuptools import Extension, find_packages, setup
 from setuptools.command.build_ext import build_ext
+from setuptools_scm import get_version
 
 result = subprocess.Popen("pip install -r requirements.txt", shell=True)
 result.wait()
 
+
 def is_intel_gpu_available():
     import torch
     import intel_extension_for_pytorch as ipex
@@ -286,6 +288,9 @@ def check_submodules():
                            "intel_extension_for_transformers/transformers/runtime/"),
         ])
     cmdclass = {'build_ext': CMakeBuild}
+    itrex_version = get_version()
+    if IS_INTEL_GPU:
+        itrex_version = itrex_version + "-gpu"
 
     setup(
         name="intel-extension-for-transformers",
@@ -324,4 +329,5 @@ def check_submodules():
         ],
         setup_requires=['setuptools_scm'],
         use_scm_version=True,
+        version=itrex_version
     )