From c47dcb8466ee1aee5a7fa6d5e47461331cb56db5 Mon Sep 17 00:00:00 2001
From: "Wang, Chang" <chang1.wang@intel.com>
Date: Fri, 28 Jun 2024 09:29:26 +0800
Subject: [PATCH] Support phi series SQ (#1639)

Signed-off-by: changwangss <chang1.wang@intel.com>
---
 examples/.config/pytorch_optimize.json        | 52 +++++++++++++++++++
 .../quantization/run_benchmark.sh             | 13 +++--
 .../quantization/run_tuning.sh                | 17 ++++--
 3 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/examples/.config/pytorch_optimize.json b/examples/.config/pytorch_optimize.json
index 1c1573ceb41..791dc367033 100644
--- a/examples/.config/pytorch_optimize.json
+++ b/examples/.config/pytorch_optimize.json
@@ -2450,6 +2450,58 @@
       }
     }
   },
+  "phi_2b_gen_ipex_static": {
+    "working_dir": "huggingface/pytorch/text-generation/quantization",
+    "tune": {
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "phi_2b",
+        "task": "generation",
+        "approach": "static",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "phi_2b",
+        "task": "generation",
+        "approach": "static",
+        "backend": "ipex",
+        "mode": "benchmark",
+        "batch_size": "112",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
+  "phi_3b_gen_ipex_static": {
+    "working_dir": "huggingface/pytorch/text-generation/quantization",
+    "tune": {
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "phi_3b",
+        "task": "generation",
+        "approach": "static",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "phi_3b",
+        "task": "generation",
+        "approach": "static",
+        "backend": "ipex",
+        "mode": "benchmark",
+        "batch_size": "112",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "flan-t5-large_gen_ipex_static": {
     "working_dir": "huggingface/pytorch/text2text-generation",
     "tune": {
diff --git a/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh b/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh
index c92e733e9fe..61cd923588b 100644
--- a/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh
+++ b/examples/huggingface/pytorch/text-generation/quantization/run_benchmark.sh
@@ -166,13 +166,18 @@ function run_benchmark {
         model_name_or_path="Intel/neural-chat-7b-v3"
         script="run_generation_sq.py"
     elif [ "${topology}" = "phi_1b" ]; then
-        model_name_or_path="susnato/phi-1_dev"
-	    pip install transformers==4.36.1
+        model_name_or_path="microsoft/phi-1"
         script="run_generation_sq.py"
     elif [ "${topology}" = "phi_1_5b" ]; then
-        model_name_or_path="susnato/phi-1_5_dev"
-	    pip install transformers==4.36.1
+        model_name_or_path="microsoft/phi-1_5"
         script="run_generation_sq.py"
+    elif [ "${topology}" = "phi_2b" ]; then
+        model_name_or_path="microsoft/phi-2"
+        script="run_generation_sq.py"
+    elif [ "${topology}" = "phi_3b" ]; then
+        model_name_or_path="microsoft/Phi-3-mini-4k-instruct"
+        script="run_generation_sq.py"
+        extra_cmd=$extra_cmd" --trust_remote_code"
     elif [ "${topology}" = "llama2_7b_gptq" ] && [ "$model_source" != "huggingface" ]; then
         model_name_or_path="/tf_dataset2/models/nlp_toolkit/llama-2-7b-chat/Llama-2-7b-chat-hf"
         script="run_generation_cpu_woq.py"
diff --git a/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh b/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh
index 7c3919a132a..7dfa912f90e 100644
--- a/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh
+++ b/examples/huggingface/pytorch/text-generation/quantization/run_tuning.sh
@@ -216,14 +216,25 @@ function run_tuning {
         script="run_generation_sq.py"
     elif [ "${topology}" = "phi_1b" ]; then
         alpha=0.5
-        model_name_or_path="susnato/phi-1_dev"
+        model_name_or_path="microsoft/phi-1"
         extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
-        extra_cmd=$extra_cmd" --trust_remote_code"
         script="run_generation_sq.py"
     elif [ "${topology}" = "phi_1_5b" ]; then
         alpha=0.5
-        model_name_or_path="susnato/phi-1_5_dev"
+        model_name_or_path="microsoft/phi-1_5"
+        extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
+        extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
+        script="run_generation_sq.py"
+    elif [ "${topology}" = "phi_2b" ]; then
+        alpha=0.5
+        model_name_or_path="microsoft/phi-2"
+        extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
+        extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
+        script="run_generation_sq.py"
+    elif [ "${topology}" = "phi_3b" ]; then
+        alpha=0.5
+        model_name_or_path="microsoft/Phi-3-mini-4k-instruct"
         extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
         extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
         extra_cmd=$extra_cmd" --trust_remote_code"