From c47dcb8466ee1aee5a7fa6d5e47461331cb56db5 Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Fri, 28 Jun 2024 09:29:26 +0800 Subject: [PATCH] Support phi series SQ (#1639) Signed-off-by: changwangss --- examples/.config/pytorch_optimize.json | 52 +++++++++++++++++++ .../quantization/run_benchmark.sh | 13 +++-- .../quantization/run_tuning.sh | 17 ++++-- 3 files changed, 75 insertions(+), 7 deletions(-) diff --git a/examples/.config/pytorch_optimize.json b/examples/.config/pytorch_optimize.json index 1c1573ceb41..791dc367033 100644 --- a/examples/.config/pytorch_optimize.json +++ b/examples/.config/pytorch_optimize.json @@ -2450,6 +2450,58 @@ } } }, + "phi_2b_gen_ipex_static": { + "working_dir": "huggingface/pytorch/text-generation/quantization", + "tune": { + "cmd": "bash run_tuning.sh", + "params": { + "topology": "phi_2b", + "task": "generation", + "approach": "static", + "output_model": "saved_results" + } + }, + "benchmark": { + "cmd": "bash run_benchmark.sh", + "params": { + "topology": "phi_2b", + "task": "generation", + "approach": "static", + "backend": "ipex", + "mode": "benchmark", + "batch_size": "112", + "iters": "100", + "int8": "false", + "config": "saved_results" + } + } + }, + "phi_3b_gen_ipex_static": { + "working_dir": "huggingface/pytorch/text-generation/quantization", + "tune": { + "cmd": "bash run_tuning.sh", + "params": { + "topology": "phi_3b", + "task": "generation", + "approach": "static", + "output_model": "saved_results" + } + }, + "benchmark": { + "cmd": "bash run_benchmark.sh", + "params": { + "topology": "phi_3b", + "task": "generation", + "approach": "static", + "backend": "ipex", + "mode": "benchmark", + "batch_size": "112", + "iters": "100", + "int8": "false", + "config": "saved_results" + } + } + }, "flan-t5-large_gen_ipex_static": { "working_dir": "huggingface/pytorch/text2text-generation", "tune": { diff --git a/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh b/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh index c92e733e9fe..61cd923588b 100644 --- a/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh +++ b/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh @@ -166,13 +166,18 @@ function run_benchmark { model_name_or_path="Intel/neural-chat-7b-v3" script="run_generation_sq.py" elif [ "${topology}" = "phi_1b" ]; then - model_name_or_path="susnato/phi-1_dev" - pip install transformers==4.36.1 + model_name_or_path="microsoft/phi-1" script="run_generation_sq.py" elif [ "${topology}" = "phi_1_5b" ]; then - model_name_or_path="susnato/phi-1_5_dev" - pip install transformers==4.36.1 + model_name_or_path="microsoft/phi-1_5" script="run_generation_sq.py" + elif [ "${topology}" = "phi_2b" ]; then + model_name_or_path="microsoft/phi-2" + script="run_generation_sq.py" + elif [ "${topology}" = "phi_3b" ]; then + model_name_or_path="microsoft/Phi-3-mini-4k-instruct" + script="run_generation_sq.py" + extra_cmd=$extra_cmd" --trust_remote_code" elif [ "${topology}" = "llama2_7b_gptq" ] && [ "$model_source" != "huggingface" ]; then model_name_or_path="/tf_dataset2/models/nlp_toolkit/llama-2-7b-chat/Llama-2-7b-chat-hf" script="run_generation_cpu_woq.py" diff --git a/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh b/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh index 7c3919a132a..7dfa912f90e 100644 --- a/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh +++ b/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh @@ -216,14 +216,25 @@ function run_tuning { script="run_generation_sq.py" elif [ "${topology}" = "phi_1b" ]; then alpha=0.5 - model_name_or_path="susnato/phi-1_dev" + model_name_or_path="microsoft/phi-1" extra_cmd=$extra_cmd" --sq --alpha ${alpha}" extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}" - extra_cmd=$extra_cmd" --trust_remote_code" script="run_generation_sq.py" elif [ "${topology}" = "phi_1_5b" ]; then alpha=0.5 - model_name_or_path="susnato/phi-1_5_dev" + model_name_or_path="microsoft/phi-1_5" + extra_cmd=$extra_cmd" --sq --alpha ${alpha}" + extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}" + script="run_generation_sq.py" + elif [ "${topology}" = "phi_2b" ]; then + alpha=0.5 + model_name_or_path="microsoft/phi-2" + extra_cmd=$extra_cmd" --sq --alpha ${alpha}" + extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}" + script="run_generation_sq.py" + elif [ "${topology}" = "phi_3b" ]; then + alpha=0.5 + model_name_or_path="microsoft/Phi-3-mini-4k-instruct" extra_cmd=$extra_cmd" --sq --alpha ${alpha}" extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}" extra_cmd=$extra_cmd" --trust_remote_code"