support for qwen2.5,llama 3.1,gemma-2,phi-3 (#52)

* support for qwen2.5, rename template name qwen1.5 to qwen. llama 3.1 for llama3,gemma-2 for gemma,phi-3 for phi3. update transformers version. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add version limit * add support name limit * Update template.py * delete moe * Update requirements.txt --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Nick W <[email protected]>
FLock-io · Sep 26, 2024 · 4ec132a · 4ec132a
1 parent df97f44
commit 4ec132a
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 1 deletion.
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 torch>=1.13.1
 huggingface-hub>=0.24.7,<0.25
-transformers>=4.37.2
+transformers>=4.43.0,<=4.45.0
 datasets>=2.14.3
 accelerate>=0.27.2
 loguru==0.7.0

diff --git a/src/core/constant.py b/src/core/constant.py
@@ -23,6 +23,21 @@
     "Qwen/Qwen2-7B-Instruct",
     "Qwen/Qwen2-72B",
     "Qwen/Qwen2-72B-Instruct",
+    # qwen2.5
+    "Qwen/Qwen2.5-0.5B",
+    "Qwen/Qwen2.5-0.5B-Instruct",
+    "Qwen/Qwen2.5-1.5B",
+    "Qwen/Qwen2.5-1.5B-Instruct",
+    "Qwen/Qwen2.5-3B",
+    "Qwen/Qwen2.5-3B-Instruct",
+    "Qwen/Qwen2.5-7B",
+    "Qwen/Qwen2.5-7B-Instruct",
+    "Qwen/Qwen2.5-14B",
+    "Qwen/Qwen2.5-14B-Instruct",
+    "Qwen/Qwen2.5-32B",
+    "Qwen/Qwen2.5-32B-Instruct",
+    "Qwen/Qwen2.5-72B",
+    "Qwen/Qwen2.5-72B-Instruct",
     # Yi
     "01-ai/Yi-6B",
     "01-ai/Yi-6B-Chat",
@@ -51,6 +66,13 @@
     "google/gemma-7b",
     "google/gemma-2b-it",
     "google/gemma-7b-it",
+    # gemma2
+    "google/gemma-2-2b",
+    "google/gemma-2-9b",
+    "google/gemma-2-27b",
+    "google/gemma-2-2b-it",
+    "google/gemma-2-9b-it",
+    "google/gemma-2-27b-it",
     # zephyr
     "HuggingFaceH4/zephyr-7b-alpha",
     "HuggingFaceH4/zephyr-7b-beta",
@@ -66,4 +88,14 @@
     "meta-llama/Meta-Llama-3-8B-Instruct",
     "meta-llama/Meta-Llama-3-70B",
     "meta-llama/Meta-Llama-3-70B-Instruct",
+    # llama3.1
+    "meta-llama/Meta-Llama-3.1-8B",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "meta-llama/Meta-Llama-3.1-70B",
+    "meta-llama/Meta-Llama-3.1-70B-Instruct",
+    # phi3
+    "microsoft/Phi-3.5-mini-instruct",
+    "microsoft/Phi-3-mini-4k-instruct",
+    "microsoft/Phi-3-small-8k-instruct",
+    "microsoft/Phi-3-medium-4k-instruct",
 ]
diff --git a/src/core/template.py b/src/core/template.py
@@ -117,3 +117,12 @@ def register_template(
     system=None,
     stop_word="<|eot_id|>",
 )
+
+register_template(
+    template_name="phi3",
+    system_format=None,
+    user_format="<|user|>\n{content}<|end|>\n<|assistant|>",
+    assistant_format="{content}<|end|>\n",
+    system=None,
+    stop_word="<|end|>",
+)