From f44bf957d5097d97be7ff9ee13f81923b3a0f51c Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Thu, 27 Jun 2024 10:59:31 +0800 Subject: [PATCH 1/9] Fix docs online build (#1637) Signed-off-by: Wang, Chang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/api_doc/optimization/config.rst | 5 +- docs/api_doc/optimization/tf_optimization.rst | 6 - docs/api_doc/user_api.rst | 2 - examples/.config/tensorflow_optimize.json | 255 ------------------ 4 files changed, 1 insertion(+), 267 deletions(-) delete mode 100644 docs/api_doc/optimization/tf_optimization.rst delete mode 100644 examples/.config/tensorflow_optimize.json diff --git a/docs/api_doc/optimization/config.rst b/docs/api_doc/optimization/config.rst index 435f5d6ddad..2ca607c03dc 100644 --- a/docs/api_doc/optimization/config.rst +++ b/docs/api_doc/optimization/config.rst @@ -4,7 +4,4 @@ Config .. autoapisummary:: intel_extension_for_transformers.transformers.utils.metrics intel_extension_for_transformers.transformers.utils.objectives - intel_extension_for_transformers.transformers.config - intel_extension_for_transformers.transformers.quantization - intel_extension_for_transformers.transformers.distillation - intel_extension_for_transformers.transformers.pruning + intel_extension_for_transformers.transformers.utils.config diff --git a/docs/api_doc/optimization/tf_optimization.rst b/docs/api_doc/optimization/tf_optimization.rst deleted file mode 100644 index 3aa7cb7864a..00000000000 --- a/docs/api_doc/optimization/tf_optimization.rst +++ /dev/null @@ -1,6 +0,0 @@ -TensorFlow Optimizer -============== - -.. autoapisummary:: - - intel_extension_for_transformers.transformers.optimizer_tf diff --git a/docs/api_doc/user_api.rst b/docs/api_doc/user_api.rst index 712132f5d55..80a7ead6078 100644 --- a/docs/api_doc/user_api.rst +++ b/docs/api_doc/user_api.rst @@ -7,7 +7,5 @@ The following Python API information is available: :maxdepth: 1 optimization/trainer.rst - optimization/optimizer.rst optimization/model.rst - optimization/tf_optimization.rst optimization/config.rst diff --git a/examples/.config/tensorflow_optimize.json b/examples/.config/tensorflow_optimize.json deleted file mode 100644 index ab0aacaf6ce..00000000000 --- a/examples/.config/tensorflow_optimize.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "bert_base_mrpc_static": { - "working_dir": "huggingface/tensorflow/text-classification/quantization/ptq", - "tune":{ - "cmd": "bash run_tuning.sh", - "params": { - "topology": "bert_base_mrpc_static", - "output_model": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/text-classification" - } - }, - "benchmark": { - "cmd": "bash run_benchmark.sh", - "params": { - "topology": "bert_base_mrpc_static", - "mode": "accuracy", - "batch_size": "64", - "iters": "100", - "int8": "false", - "config": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/text-classification" - } - } - }, - "distilgpt2_clm": { - "working_dir": "huggingface/tensorflow/language-modeling/quantization/ptq", - "tune":{ - "cmd": "bash run_tuning.sh", - "params": { - "topology": "distilgpt2_clm", - "output_model": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/language-modeling" - } - }, - "benchmark": { - "cmd": "bash run_benchmark.sh", - "params": { - "topology": "distilgpt2_clm", - "mode": "accuracy", - "batch_size": "16", - "iters": "100", - "int8": "false", - "config": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/language-modeling" - } - } - }, - "distilbert_mlm": { - "working_dir": "huggingface/tensorflow/language-modeling/quantization/ptq", - "tune":{ - "cmd": "bash run_tuning.sh", - "params": { - "topology": "distilbert_mlm", - "output_model": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/language-modeling" - } - }, - "benchmark": { - "cmd": "bash run_benchmark.sh", - "params": { - "topology": "distilbert_mlm", - "mode": "accuracy", - "batch_size": "16", - "iters": "100", - "int8": "false", - "config": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/language-modeling" - } - } - }, - "bert_base_ner": { - "working_dir": "huggingface/tensorflow/token-classification/quantization", - "tune":{ - "cmd": "bash run_tuning.sh", - "params": { - "topology": "bert_base_ner", - "output_model": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/token-classification" - } - }, - "benchmark": { - "cmd": "bash run_benchmark.sh", - "params": { - "topology": "bert_base_ner", - "mode": "accuracy", - "batch_size": "16", - "iters": "100", - "int8": "false", - "config": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/token-classification" - } - } - }, - "distilbert_qa": { - "working_dir": "huggingface/tensorflow/question-answering/quantization", - "tune":{ - "cmd": "bash run_tuning.sh", - "params": { - "topology": "distilbert_qa", - "output_model": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/question-answering" - } - }, - "benchmark": { - "cmd": "bash run_benchmark.sh", - "params": { - "topology": "distilbert_qa", - "mode": "accuracy", - "batch_size": "16", - "iters": "100", - "int8": "false", - "config": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/question-answering" - } - } - }, - "distilbert_swag": { - "working_dir": "huggingface/tensorflow/multiple-choice/quantization", - "tune":{ - "cmd": "bash run_tuning.sh", - "params": { - "topology": "distilbert_swag", - "output_model": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/multiple-choice" - } - }, - "benchmark": { - "cmd": "bash run_benchmark.sh", - "params": { - "topology": "distilbert_swag", - "mode": "accuracy", - "batch_size": "16", - "iters": "100", - "int8": "false", - "config": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/multiple-choice" - } - } - }, - "roberta_qa": { - "working_dir": "huggingface/tensorflow/question-answering/quantization", - "tune":{ - "cmd": "bash run_tuning.sh", - "params": { - "topology": "roberta_qa", - "output_model": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/question-answering" - } - }, - "benchmark": { - "cmd": "bash run_benchmark.sh", - "params": { - "topology": "roberta_qa", - "mode": "accuracy", - "batch_size": "16", - "iters": "100", - "int8": "false", - "config": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/question-answering" - } - } - }, - "distilroberta_mlm": { - "working_dir": "huggingface/tensorflow/language-modeling/quantization/ptq", - "tune":{ - "cmd": "bash run_tuning.sh", - "params": { - "topology": "distilroberta_mlm", - "output_model": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/language-modeling" - } - }, - "benchmark": { - "cmd": "bash run_benchmark.sh", - "params": { - "topology": "distilroberta_mlm", - "mode": "accuracy", - "batch_size": "16", - "iters": "100", - "int8": "false", - "config": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/language-modeling" - } - } - }, - "legalbert_mrpc": { - "working_dir": "huggingface/tensorflow/text-classification/quantization/ptq", - "tune":{ - "cmd": "bash run_tuning.sh", - "params": { - "topology": "legalbert_mrpc", - "output_model": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/text-classification" - } - }, - "benchmark": { - "cmd": "bash run_benchmark.sh", - "params": { - "topology": "legalbert_mrpc", - "mode": "accuracy", - "batch_size": "16", - "iters": "100", - "int8": "false", - "config": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/text-classification" - } - } - }, - "xlnet_mrpc": { - "working_dir": "huggingface/tensorflow/text-classification/quantization/ptq", - "tune":{ - "cmd": "bash run_tuning.sh", - "params": { - "topology": "xlnet_mrpc", - "output_model": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/text-classification" - } - }, - "benchmark": { - "cmd": "bash run_benchmark.sh", - "params": { - "topology": "xlnet_mrpc", - "mode": "accuracy", - "batch_size": "16", - "iters": "100", - "int8": "false", - "config": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/text-classification" - } - } - }, - "albert_large_mrpc": { - "working_dir": "huggingface/tensorflow/text-classification/quantization/ptq", - "tune":{ - "cmd": "bash run_tuning.sh", - "params": { - "topology": "albert_large_mrpc", - "output_model": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/text-classification" - } - }, - "benchmark": { - "cmd": "bash run_benchmark.sh", - "params": { - "topology": "albert_large_mrpc", - "mode": "accuracy", - "batch_size": "16", - "iters": "100", - "int8": "false", - "config": "saved_results", - "cache_dir": "${HOME}/.cache/nlp_toolkit/text-classification" - } - } - } -} From b1168c13dedf5b110a786649ffcca615894fdb70 Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Thu, 27 Jun 2024 11:30:51 +0800 Subject: [PATCH 2/9] Fix SQ bloom (#1636) Signed-off-by: changwangss --- .../transformers/llm/evaluation/models.py | 3 +-- .../transformers/modeling/modeling_auto.py | 6 +----- .../transformers/utils/utility.py | 6 +----- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/intel_extension_for_transformers/transformers/llm/evaluation/models.py b/intel_extension_for_transformers/transformers/llm/evaluation/models.py index 98dc24e3673..61b301a380a 100644 --- a/intel_extension_for_transformers/transformers/llm/evaluation/models.py +++ b/intel_extension_for_transformers/transformers/llm/evaluation/models.py @@ -38,8 +38,7 @@ def _reorder_cache( This is required to match `past_key_values` with the correct beam_idx at every generation step. """ - if self.config.model_type == "bloom": - return self._reorder_cache_bloom(past_key_values, beam_idx) + if self.config.model_type == "chatglm": return tuple( tuple( diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py index c0a9925494a..263e4784d92 100644 --- a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py +++ b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py @@ -946,11 +946,7 @@ def collate_batch(batch): ) last_ind.append(input_ids.shape[0] - 1) - if model_type in ["bloom"]: - attention_mask = torch.ones(len(input_ids) + 1) - attention_mask[0] = 0 - else: - attention_mask = torch.ones(len(input_ids)) + attention_mask = torch.ones(len(input_ids)) position_ids = torch.arange(len(input_ids)) input_ids_padded.append(input_ids) attention_mask_padded.append(attention_mask) diff --git a/intel_extension_for_transformers/transformers/utils/utility.py b/intel_extension_for_transformers/transformers/utils/utility.py index 2467531fab2..78fe5f2063d 100644 --- a/intel_extension_for_transformers/transformers/utils/utility.py +++ b/intel_extension_for_transformers/transformers/utils/utility.py @@ -375,11 +375,7 @@ def get_example_inputs(model_config, batch_size=1, tokenizer=None, num_beams=4): past_key_values = generate_dummy_past_key_values(config=model_config, input_bs=batch_size) input_ids = input_ids[:, :512] - if model_type in ["bloom", "qwen"]: - attention_mask = torch.ones(input_ids.shape[0], input_ids.shape[1] + 1) - attention_mask[:,0] = 0 - else: - attention_mask = torch.ones(input_ids.shape) + attention_mask = torch.ones(input_ids.shape) position_ids = torch.arange(input_ids.shape[1]).repeat(batch_size, 1) if model_type in MODEL_TYPES_REQUIRING_POSITION_IDS: From c47dcb8466ee1aee5a7fa6d5e47461331cb56db5 Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Fri, 28 Jun 2024 09:29:26 +0800 Subject: [PATCH 3/9] Support phi series SQ (#1639) Signed-off-by: changwangss --- examples/.config/pytorch_optimize.json | 52 +++++++++++++++++++ .../quantization/run_benchmark.sh | 13 +++-- .../quantization/run_tuning.sh | 17 ++++-- 3 files changed, 75 insertions(+), 7 deletions(-) diff --git a/examples/.config/pytorch_optimize.json b/examples/.config/pytorch_optimize.json index 1c1573ceb41..791dc367033 100644 --- a/examples/.config/pytorch_optimize.json +++ b/examples/.config/pytorch_optimize.json @@ -2450,6 +2450,58 @@ } } }, + "phi_2b_gen_ipex_static": { + "working_dir": "huggingface/pytorch/text-generation/quantization", + "tune": { + "cmd": "bash run_tuning.sh", + "params": { + "topology": "phi_2b", + "task": "generation", + "approach": "static", + "output_model": "saved_results" + } + }, + "benchmark": { + "cmd": "bash run_benchmark.sh", + "params": { + "topology": "phi_2b", + "task": "generation", + "approach": "static", + "backend": "ipex", + "mode": "benchmark", + "batch_size": "112", + "iters": "100", + "int8": "false", + "config": "saved_results" + } + } + }, + "phi_3b_gen_ipex_static": { + "working_dir": "huggingface/pytorch/text-generation/quantization", + "tune": { + "cmd": "bash run_tuning.sh", + "params": { + "topology": "phi_3b", + "task": "generation", + "approach": "static", + "output_model": "saved_results" + } + }, + "benchmark": { + "cmd": "bash run_benchmark.sh", + "params": { + "topology": "phi_3b", + "task": "generation", + "approach": "static", + "backend": "ipex", + "mode": "benchmark", + "batch_size": "112", + "iters": "100", + "int8": "false", + "config": "saved_results" + } + } + }, "flan-t5-large_gen_ipex_static": { "working_dir": "huggingface/pytorch/text2text-generation", "tune": { diff --git a/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh b/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh index c92e733e9fe..61cd923588b 100644 --- a/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh +++ b/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh @@ -166,13 +166,18 @@ function run_benchmark { model_name_or_path="Intel/neural-chat-7b-v3" script="run_generation_sq.py" elif [ "${topology}" = "phi_1b" ]; then - model_name_or_path="susnato/phi-1_dev" - pip install transformers==4.36.1 + model_name_or_path="microsoft/phi-1" script="run_generation_sq.py" elif [ "${topology}" = "phi_1_5b" ]; then - model_name_or_path="susnato/phi-1_5_dev" - pip install transformers==4.36.1 + model_name_or_path="microsoft/phi-1_5" script="run_generation_sq.py" + elif [ "${topology}" = "phi_2b" ]; then + model_name_or_path="microsoft/phi-2" + script="run_generation_sq.py" + elif [ "${topology}" = "phi_3b" ]; then + model_name_or_path="microsoft/Phi-3-mini-4k-instruct" + script="run_generation_sq.py" + extra_cmd=$extra_cmd" --trust_remote_code" elif [ "${topology}" = "llama2_7b_gptq" ] && [ "$model_source" != "huggingface" ]; then model_name_or_path="/tf_dataset2/models/nlp_toolkit/llama-2-7b-chat/Llama-2-7b-chat-hf" script="run_generation_cpu_woq.py" diff --git a/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh b/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh index 7c3919a132a..7dfa912f90e 100644 --- a/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh +++ b/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh @@ -216,14 +216,25 @@ function run_tuning { script="run_generation_sq.py" elif [ "${topology}" = "phi_1b" ]; then alpha=0.5 - model_name_or_path="susnato/phi-1_dev" + model_name_or_path="microsoft/phi-1" extra_cmd=$extra_cmd" --sq --alpha ${alpha}" extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}" - extra_cmd=$extra_cmd" --trust_remote_code" script="run_generation_sq.py" elif [ "${topology}" = "phi_1_5b" ]; then alpha=0.5 - model_name_or_path="susnato/phi-1_5_dev" + model_name_or_path="microsoft/phi-1_5" + extra_cmd=$extra_cmd" --sq --alpha ${alpha}" + extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}" + script="run_generation_sq.py" + elif [ "${topology}" = "phi_2b" ]; then + alpha=0.5 + model_name_or_path="microsoft/phi-2" + extra_cmd=$extra_cmd" --sq --alpha ${alpha}" + extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}" + script="run_generation_sq.py" + elif [ "${topology}" = "phi_3b" ]; then + alpha=0.5 + model_name_or_path="microsoft/Phi-3-mini-4k-instruct" extra_cmd=$extra_cmd" --sq --alpha ${alpha}" extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}" extra_cmd=$extra_cmd" --trust_remote_code" From a9a0e9321ef31228e8f735374ba52ba7af3dc462 Mon Sep 17 00:00:00 2001 From: "Wang, Zhe" Date: Fri, 28 Jun 2024 17:07:12 +0800 Subject: [PATCH 4/9] fix xpu version itrex detect (#1638) --- .github/workflows/script/install_binary.sh | 1 + intel_extension_for_transformers/qbits/__init__.py | 5 +++-- setup.py | 6 ++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/script/install_binary.sh b/.github/workflows/script/install_binary.sh index bbd6b7df2f1..7bca0d4d2f3 100644 --- a/.github/workflows/script/install_binary.sh +++ b/.github/workflows/script/install_binary.sh @@ -4,6 +4,7 @@ source /intel-extension-for-transformers/.github/workflows/script/change_color.s cd /intel-extension-for-transformers export CMAKE_ARGS="-DNE_DNNL_CACHE_DIR=/cache" pip install -U pip +pip install -r requirements.txt $BOLD_YELLOW && echo "---------------- git submodule update --init --recursive -------------" && $RESET git config --global --add safe.directory "*" git submodule update --init --recursive diff --git a/intel_extension_for_transformers/qbits/__init__.py b/intel_extension_for_transformers/qbits/__init__.py index c23599090dc..5cd39d26a7f 100644 --- a/intel_extension_for_transformers/qbits/__init__.py +++ b/intel_extension_for_transformers/qbits/__init__.py @@ -16,5 +16,6 @@ # limitations under the License. import torch -if not torch.xpu._is_compiled(): - from intel_extension_for_transformers.qbits_py import * # pylint: disable=E0401, E0611 +import intel_extension_for_transformers +if "gpu" not in intel_extension_for_transformers.__version__: + from intel_extension_for_transformers.qbits_py import * # pylint: disable=E0401, E0611 diff --git a/setup.py b/setup.py index 17700afeeb3..13aec7b7025 100644 --- a/setup.py +++ b/setup.py @@ -8,10 +8,12 @@ from pathlib import Path from setuptools import Extension, find_packages, setup from setuptools.command.build_ext import build_ext +from setuptools_scm import get_version result = subprocess.Popen("pip install -r requirements.txt", shell=True) result.wait() + def is_intel_gpu_available(): import torch import intel_extension_for_pytorch as ipex @@ -286,6 +288,9 @@ def check_submodules(): "intel_extension_for_transformers/transformers/runtime/"), ]) cmdclass = {'build_ext': CMakeBuild} + itrex_version = get_version() + if IS_INTEL_GPU: + itrex_version = itrex_version + "-gpu" setup( name="intel-extension-for-transformers", @@ -324,4 +329,5 @@ def check_submodules(): ], setup_requires=['setuptools_scm'], use_scm_version=True, + version=itrex_version ) From 63056ece9ad6430b956ac995b61f519fd2b7b4a8 Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Fri, 28 Jun 2024 17:54:14 +0800 Subject: [PATCH 5/9] Improve MPT series SQ (#1640) Signed-off-by: Wang, Chang --- .../transformers/modeling/modeling_auto.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py index 263e4784d92..a5be8cdc519 100644 --- a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py +++ b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py @@ -840,6 +840,12 @@ def forward(self, input: torch.Tensor) -> tuple[torch.Tensor, None]: or device_map == torch.device("cpu") ) and model.config.model_type == "chatglm": model = model.float() + if ( + not torch.cuda.is_available() + or device_map == "cpu" + or device_map == torch.device("cpu") + ) and model.config.model_type == "mpt": + model.config.architectures = ["MptForCausalLM"] model.eval() model_type = model.config.model_type.replace("_", "-") @@ -1077,6 +1083,7 @@ def calib_func(model): recipes=quantization_config.recipes, example_inputs=example_inputs, ) + model = quantization.fit( model, conf, From 76aa9b16ee27325bc2db7d206757342284ed9148 Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Fri, 28 Jun 2024 18:55:29 +0800 Subject: [PATCH 6/9] Fix Qwen neural speed (#1641) Signed-off-by: changwangss --- examples/huggingface/neural_speed/requirements.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/huggingface/neural_speed/requirements.txt b/examples/huggingface/neural_speed/requirements.txt index 62d596fa100..3f7fca6d65d 100644 --- a/examples/huggingface/neural_speed/requirements.txt +++ b/examples/huggingface/neural_speed/requirements.txt @@ -1,12 +1,11 @@ intel_extension_for_transformers neural-speed -lm-eval +lm-eval==0.4.2 sentencepiece gguf --extra-index-url https://download.pytorch.org/whl/cpu torch==2.3.0+cpu transformers -intel_extension_for_pytorch==2.3.0 tiktoken transformers_stream_generator -zipfile38 \ No newline at end of file +zipfile38 From 816f475b1ec3f4d6c9c4328eb132b30af31c083f Mon Sep 17 00:00:00 2001 From: intellinjun <105184542+intellinjun@users.noreply.github.com> Date: Fri, 28 Jun 2024 18:56:06 +0800 Subject: [PATCH 7/9] fix neural engine error (#1642) Signed-off-by: intellinjun --- .../emotion/distilbert_base_uncased/requirements.txt | 3 ++- .../deployment/mrpc/bert_base/requirements.txt | 3 ++- .../deployment/mrpc/bert_base_cased/requirements.txt | 3 ++- .../deployment/mrpc/bert_base_cased/run_glue.py | 2 +- .../deployment/mrpc/bert_mini/requirements.txt | 3 ++- .../deployment/mrpc/distilbert_base_uncased/requirements.txt | 3 ++- .../deployment/mrpc/roberta_base/requirements.txt | 3 ++- .../deployment/sparse/bert_mini/requirements.txt | 3 ++- .../deployment/sparse/distilbert_base_uncased/requirements.txt | 3 ++- .../deployment/sst2/bert_mini/requirements.txt | 3 ++- .../deployment/sst2/distilbert_base_uncased/requirements.txt | 3 ++- .../deployment/sst2/minilm_l6_h384_uncased/requirements.txt | 3 ++- 12 files changed, 23 insertions(+), 12 deletions(-) diff --git a/examples/huggingface/pytorch/text-classification/deployment/emotion/distilbert_base_uncased/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/emotion/distilbert_base_uncased/requirements.txt index 6cf73c3deae..5d46e354048 100644 --- a/examples/huggingface/pytorch/text-classification/deployment/emotion/distilbert_base_uncased/requirements.txt +++ b/examples/huggingface/pytorch/text-classification/deployment/emotion/distilbert_base_uncased/requirements.txt @@ -1,4 +1,5 @@ -neural-compressor +intel-extension-for-transformers==1.4.2 +neural-compressor==2.6 transformers accelerate datasets >= 1.8.0 diff --git a/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base/requirements.txt index 94b03297a4a..3e3e2e9d604 100644 --- a/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base/requirements.txt +++ b/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base/requirements.txt @@ -1,4 +1,5 @@ -neural-compressor +intel-extension-for-transformers==1.4.2 +neural-compressor==2.6 transformers accelerate datasets >= 1.8.0 diff --git a/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base_cased/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base_cased/requirements.txt index 94b03297a4a..3e3e2e9d604 100644 --- a/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base_cased/requirements.txt +++ b/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base_cased/requirements.txt @@ -1,4 +1,5 @@ -neural-compressor +intel-extension-for-transformers==1.4.2 +neural-compressor==2.6 transformers accelerate datasets >= 1.8.0 diff --git a/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base_cased/run_glue.py b/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base_cased/run_glue.py index 9374620302a..f3645880317 100644 --- a/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base_cased/run_glue.py +++ b/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_base_cased/run_glue.py @@ -468,7 +468,7 @@ def preprocess_function(examples): # Get the metric function if data_args.task_name is not None: - metric = load_metric("glue", data_args.task_name) + metric = load_metric("glue", data_args.task_name,trust_remote_code=True) else: metric = load_metric("accuracy") diff --git a/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_mini/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_mini/requirements.txt index 94b03297a4a..3e3e2e9d604 100644 --- a/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_mini/requirements.txt +++ b/examples/huggingface/pytorch/text-classification/deployment/mrpc/bert_mini/requirements.txt @@ -1,4 +1,5 @@ -neural-compressor +intel-extension-for-transformers==1.4.2 +neural-compressor==2.6 transformers accelerate datasets >= 1.8.0 diff --git a/examples/huggingface/pytorch/text-classification/deployment/mrpc/distilbert_base_uncased/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/mrpc/distilbert_base_uncased/requirements.txt index 94b03297a4a..3e3e2e9d604 100644 --- a/examples/huggingface/pytorch/text-classification/deployment/mrpc/distilbert_base_uncased/requirements.txt +++ b/examples/huggingface/pytorch/text-classification/deployment/mrpc/distilbert_base_uncased/requirements.txt @@ -1,4 +1,5 @@ -neural-compressor +intel-extension-for-transformers==1.4.2 +neural-compressor==2.6 transformers accelerate datasets >= 1.8.0 diff --git a/examples/huggingface/pytorch/text-classification/deployment/mrpc/roberta_base/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/mrpc/roberta_base/requirements.txt index 94b03297a4a..3e3e2e9d604 100644 --- a/examples/huggingface/pytorch/text-classification/deployment/mrpc/roberta_base/requirements.txt +++ b/examples/huggingface/pytorch/text-classification/deployment/mrpc/roberta_base/requirements.txt @@ -1,4 +1,5 @@ -neural-compressor +intel-extension-for-transformers==1.4.2 +neural-compressor==2.6 transformers accelerate datasets >= 1.8.0 diff --git a/examples/huggingface/pytorch/text-classification/deployment/sparse/bert_mini/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/sparse/bert_mini/requirements.txt index 94b03297a4a..3e3e2e9d604 100644 --- a/examples/huggingface/pytorch/text-classification/deployment/sparse/bert_mini/requirements.txt +++ b/examples/huggingface/pytorch/text-classification/deployment/sparse/bert_mini/requirements.txt @@ -1,4 +1,5 @@ -neural-compressor +intel-extension-for-transformers==1.4.2 +neural-compressor==2.6 transformers accelerate datasets >= 1.8.0 diff --git a/examples/huggingface/pytorch/text-classification/deployment/sparse/distilbert_base_uncased/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/sparse/distilbert_base_uncased/requirements.txt index 94b03297a4a..3e3e2e9d604 100644 --- a/examples/huggingface/pytorch/text-classification/deployment/sparse/distilbert_base_uncased/requirements.txt +++ b/examples/huggingface/pytorch/text-classification/deployment/sparse/distilbert_base_uncased/requirements.txt @@ -1,4 +1,5 @@ -neural-compressor +intel-extension-for-transformers==1.4.2 +neural-compressor==2.6 transformers accelerate datasets >= 1.8.0 diff --git a/examples/huggingface/pytorch/text-classification/deployment/sst2/bert_mini/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/sst2/bert_mini/requirements.txt index 94b03297a4a..3e3e2e9d604 100644 --- a/examples/huggingface/pytorch/text-classification/deployment/sst2/bert_mini/requirements.txt +++ b/examples/huggingface/pytorch/text-classification/deployment/sst2/bert_mini/requirements.txt @@ -1,4 +1,5 @@ -neural-compressor +intel-extension-for-transformers==1.4.2 +neural-compressor==2.6 transformers accelerate datasets >= 1.8.0 diff --git a/examples/huggingface/pytorch/text-classification/deployment/sst2/distilbert_base_uncased/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/sst2/distilbert_base_uncased/requirements.txt index 94b03297a4a..3e3e2e9d604 100644 --- a/examples/huggingface/pytorch/text-classification/deployment/sst2/distilbert_base_uncased/requirements.txt +++ b/examples/huggingface/pytorch/text-classification/deployment/sst2/distilbert_base_uncased/requirements.txt @@ -1,4 +1,5 @@ -neural-compressor +intel-extension-for-transformers==1.4.2 +neural-compressor==2.6 transformers accelerate datasets >= 1.8.0 diff --git a/examples/huggingface/pytorch/text-classification/deployment/sst2/minilm_l6_h384_uncased/requirements.txt b/examples/huggingface/pytorch/text-classification/deployment/sst2/minilm_l6_h384_uncased/requirements.txt index 94b03297a4a..3e3e2e9d604 100644 --- a/examples/huggingface/pytorch/text-classification/deployment/sst2/minilm_l6_h384_uncased/requirements.txt +++ b/examples/huggingface/pytorch/text-classification/deployment/sst2/minilm_l6_h384_uncased/requirements.txt @@ -1,4 +1,5 @@ -neural-compressor +intel-extension-for-transformers==1.4.2 +neural-compressor==2.6 transformers accelerate datasets >= 1.8.0 From 317b9133ccde44d82c99c4b312cf8e3e4afc925f Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Tue, 2 Jul 2024 13:13:46 +0800 Subject: [PATCH 8/9] Clean INC import (#1645) Signed-off-by: changwangss --- .../transformers/config.py | 2 +- .../transformers/utils/utility_tf.py | 107 ------------------ 2 files changed, 1 insertion(+), 108 deletions(-) delete mode 100644 intel_extension_for_transformers/transformers/utils/utility_tf.py diff --git a/intel_extension_for_transformers/transformers/config.py b/intel_extension_for_transformers/transformers/config.py index a0009e7d3ed..f5918267491 100644 --- a/intel_extension_for_transformers/transformers/config.py +++ b/intel_extension_for_transformers/transformers/config.py @@ -19,7 +19,7 @@ import yaml from enum import Enum -from neural_compressor.conf.dotdict import DotDict +from neural_compressor.utils.utility import DotDict from .utils.metrics import Metric from .utils.objectives import Objective, performance diff --git a/intel_extension_for_transformers/transformers/utils/utility_tf.py b/intel_extension_for_transformers/transformers/utils/utility_tf.py deleted file mode 100644 index f19785740af..00000000000 --- a/intel_extension_for_transformers/transformers/utils/utility_tf.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Utils for tensorflow framework.""" - -import os -import json -from collections import OrderedDict, UserDict -from neural_compressor.experimental import common - -TMPPATH = os.path.join('tmp', 'model') -TEACHERPATH = os.path.join('tmp', 'teacher_model') -class TFDataloader(object): - """Tensorflow dataloader. - - Args: - dataset (string): Dataset - """ - - def __init__(self, dataset, batch_size=None): - """Init an instance.""" - self.dataset = dataset - self.batch_size = batch_size - - def __iter__(self): - """Get the iteration of dataset.""" - for inputs, labels in self.dataset: - if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) \ - or isinstance(inputs, UserDict): - for name in inputs.keys(): - inputs[name] = inputs[name].numpy() - elif isinstance(inputs, list) or isinstance(inputs, tuple): - inputs = [input.numpy() for input in inputs] - else: - inputs = inputs.numpy() - - if isinstance(labels, dict) or isinstance(labels, OrderedDict) \ - or isinstance(labels, UserDict): # pragma: no cover - for name in labels.keys(): - labels[name] = labels[name].numpy() - elif isinstance(labels, list) or isinstance(labels, tuple): - labels = [label.numpy() for label in labels] - else: - labels = labels.numpy() - yield inputs, labels - - def __len__(self): - """Return the length of dataset.""" - return len(self.dataset) - - -def distributed_init(worker_addresses, type='worker', index=0): - """Init distribute environment. - - Args: - worker_addresses: Addresses of all nodes. - type: The type of node, such as worker. - index: When index is 0, the node treat as a chief. - """ - tf_config = { - 'cluster': { - 'worker': worker_addresses - }, - 'task': {'type': type, 'index': index} - } - os.environ['TF_CONFIG'] = json.dumps(tf_config) - -def _is_chief(task_type, task_id): - # here only consider the case in which TF_CONFIG task_type is set as worker - # and task_id=0 represents the chief - return (task_type == 'worker' and task_id == 0) - -# get model folder path for the distributed environment -def get_filepath(base_dirpath, task_type, task_id): - """Get model folder path for the distributed environment. - - Args: - base_dirpath: The basic folder path. - task_type: Task_type is set as worker. - task_id: Task id. When task_id=0, the node treat as a chief. - """ - if task_type is None: # single node - return base_dirpath - elif _is_chief(task_type, task_id): - return os.path.join(base_dirpath, 'chief') - else: - return os.path.join(base_dirpath, 'worker_' + str(task_id)) - - -# convert a Keras model to SavedModel -def keras2SavedModel(model): # pragma: no cover - """Transfer keras model into save_model.""" - model = common.Model(model) - return model.model From 86087dc4a1d0ed74c1360c7906cd4eae9a59704e Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Tue, 2 Jul 2024 17:29:30 +0800 Subject: [PATCH 9/9] Set lm-eval to 0.4.2 (#1647) Signed-off-by: changwangss --- .github/workflows/script/formatScan/pylint.sh | 2 +- .../pytorch/language-modeling/inference/requirements.txt | 2 +- .../pytorch/language-modeling/pruning/requirements.txt | 2 +- .../pytorch/language-modeling/quantization/requirements.txt | 2 +- .../huggingface/pytorch/text2text-generation/requirements.txt | 2 +- examples/modelscope/requirements.txt | 2 +- .../neural_chat/requirements_cpu.txt | 2 +- .../neural_chat/requirements_hpu.txt | 2 +- .../neural_chat/requirements_win.txt | 2 +- .../neural_chat/tests/requirements.txt | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/script/formatScan/pylint.sh b/.github/workflows/script/formatScan/pylint.sh index eeb71beb604..41e22c470b9 100644 --- a/.github/workflows/script/formatScan/pylint.sh +++ b/.github/workflows/script/formatScan/pylint.sh @@ -28,7 +28,7 @@ else echo "Not found requirements.txt file." fi # install packages -pip install lm-eval +pip install lm-eval==0.4.2 pip install accelerate nlpaug nltk schema optimum-intel optimum peft pip install --upgrade --force-reinstall transformers==4.36.2 pip install optimum-habana diff --git a/examples/huggingface/pytorch/language-modeling/inference/requirements.txt b/examples/huggingface/pytorch/language-modeling/inference/requirements.txt index e87bc861ca8..cd6cd604899 100644 --- a/examples/huggingface/pytorch/language-modeling/inference/requirements.txt +++ b/examples/huggingface/pytorch/language-modeling/inference/requirements.txt @@ -1,4 +1,4 @@ transformers accelerate sentencepiece != 0.1.92 -lm-eval +lm-eval==0.4.2 diff --git a/examples/huggingface/pytorch/language-modeling/pruning/requirements.txt b/examples/huggingface/pytorch/language-modeling/pruning/requirements.txt index b60bac56d76..a1ea63132a8 100644 --- a/examples/huggingface/pytorch/language-modeling/pruning/requirements.txt +++ b/examples/huggingface/pytorch/language-modeling/pruning/requirements.txt @@ -7,5 +7,5 @@ transformers torch==2.0.1 tqdm neural_compressor -lm-eval +lm-eval==0.4.2 diff --git a/examples/huggingface/pytorch/language-modeling/quantization/requirements.txt b/examples/huggingface/pytorch/language-modeling/quantization/requirements.txt index c7b5b6fcf83..36ee5a1b55a 100644 --- a/examples/huggingface/pytorch/language-modeling/quantization/requirements.txt +++ b/examples/huggingface/pytorch/language-modeling/quantization/requirements.txt @@ -9,5 +9,5 @@ wandb einops neural-compressor pytest==8.0.0 -lm-eval +lm-eval==0.4.2 git+https://github.com/huggingface/peft.git@6c44096c7b8d55a2ecf24be9bc68393467e1584a diff --git a/examples/huggingface/pytorch/text2text-generation/requirements.txt b/examples/huggingface/pytorch/text2text-generation/requirements.txt index 8a585f9fd9e..73e4ae2e655 100644 --- a/examples/huggingface/pytorch/text2text-generation/requirements.txt +++ b/examples/huggingface/pytorch/text2text-generation/requirements.txt @@ -11,4 +11,4 @@ neural-compressor optimum-intel > 1.12.0 onnxruntime intel-extension-for-pytorch -lm-eval +lm-eval==0.4.2 diff --git a/examples/modelscope/requirements.txt b/examples/modelscope/requirements.txt index bc7a3e65de6..b04bd189db0 100644 --- a/examples/modelscope/requirements.txt +++ b/examples/modelscope/requirements.txt @@ -1,6 +1,6 @@ intel_extension_for_transformers neural-speed -lm-eval +lm-eval==0.4.2 sentencepiece gguf --extra-index-url https://download.pytorch.org/whl/cpu diff --git a/intel_extension_for_transformers/neural_chat/requirements_cpu.txt b/intel_extension_for_transformers/neural_chat/requirements_cpu.txt index 6097d2e2a0d..7b38113697b 100644 --- a/intel_extension_for_transformers/neural_chat/requirements_cpu.txt +++ b/intel_extension_for_transformers/neural_chat/requirements_cpu.txt @@ -7,7 +7,7 @@ fastapi fschat==0.2.32 huggingface_hub intel_extension_for_pytorch==2.3.0 -lm-eval +lm-eval==0.4.2 neural-compressor neural_speed==1.0a0 numpy==1.23.5 diff --git a/intel_extension_for_transformers/neural_chat/requirements_hpu.txt b/intel_extension_for_transformers/neural_chat/requirements_hpu.txt index 1c6dfa0d47a..f3983b6d3c5 100644 --- a/intel_extension_for_transformers/neural_chat/requirements_hpu.txt +++ b/intel_extension_for_transformers/neural_chat/requirements_hpu.txt @@ -4,7 +4,7 @@ evaluate fastapi fschat==0.2.35 huggingface_hub -lm-eval +lm-eval==0.4.2 neural-compressor numpy==1.23.5 optimum diff --git a/intel_extension_for_transformers/neural_chat/requirements_win.txt b/intel_extension_for_transformers/neural_chat/requirements_win.txt index c417c5ca01a..56ac6027ab4 100644 --- a/intel_extension_for_transformers/neural_chat/requirements_win.txt +++ b/intel_extension_for_transformers/neural_chat/requirements_win.txt @@ -6,7 +6,7 @@ fastapi fschat==0.2.35 huggingface_hub intel-extension-for-transformers -lm-eval +lm-eval==0.4.2 neural-compressor numpy==1.23.5 optimum diff --git a/intel_extension_for_transformers/neural_chat/tests/requirements.txt b/intel_extension_for_transformers/neural_chat/tests/requirements.txt index a4243865087..97a46d2e502 100644 --- a/intel_extension_for_transformers/neural_chat/tests/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/tests/requirements.txt @@ -38,7 +38,7 @@ langchain-community==0.0.27 langchain_core==0.1.35 langid librosa -lm-eval +lm-eval==0.4.2 markdown neural-compressor neural_speed==1.0a0