InternLM · lvhan028 · Nov 29, 2024 · Nov 21, 2024 · Nov 21, 2024 · Nov 21, 2024
diff --git a/autotest/config-v100.yaml b/autotest/config-v100.yaml
@@ -1,4 +1,5 @@
 model_path: /nvme/qa_test_models
+resource_path: /nvme/qa_test_models/resource
 dst_path: /nvme/qa_test_models/autotest_model
 log_path: /nvme/qa_test_models/autotest_model/log
 benchmark_path: /nvme/qa_test_models/benchmark-reports
@@ -100,12 +101,22 @@ turbomind_quatization:
         - meta-llama/Meta-Llama-3-8B-Instruct
         - internlm/internlm-xcomposer2d5-7b
         - OpenGVLab/Mini-InternVL-Chat-2B-V1-5
+        - Qwen/Qwen2-VL-2B-Instruct
+        - Qwen/Qwen2-VL-7B-Instruct
         - mistralai/Mistral-7B-Instruct-v0.3
         - THUDM/glm-4-9b-chat
+        - deepseek-ai/deepseek-coder-1.3b-instruct
+        - codellama/CodeLlama-7b-Instruct-hf
     gptq:
         - internlm/internlm2_5-7b-chat
     no_kvint4:
         - openbmb/MiniCPM-V-2_6
+        - Qwen/Qwen2-7B-Instruct
+        - Qwen/Qwen2-7B-Instruct-AWQ
+        - Qwen/Qwen2-1.5B-Instruct
+        - Qwen/Qwen2.5-0.5B-Instruct
+        - Qwen/Qwen2.5-7B-Instruct
+        - Qwen/Qwen2-7B-Instruct-GPTQ-Int4
     no_kvint8:
         - deepseek-ai/DeepSeek-V2-Lite-Chat
 
@@ -120,6 +131,10 @@ pytorch_quatization:
     no_kvint4:
         - OpenGVLab/InternVL2-1B
         - OpenGVLab/InternVL2-4B
+        - Qwen/Qwen2-7B-Instruct
+        - Qwen/Qwen2-1.5B-Instruct
+        - Qwen/Qwen2-VL-2B-Instruct
+        - Qwen/Qwen2-VL-7B-Instruct
         - deepseek-ai/DeepSeek-V2-Lite-Chat
         - microsoft/Phi-3-mini-4k-instruct
         - microsoft/Phi-3-vision-128k-instruct
@@ -128,7 +143,6 @@ pytorch_quatization:
     no_kvint8:
         - deepseek-ai/DeepSeek-V2-Lite-Chat
 
-
 longtext_model:
     - meta-llama/Meta-Llama-3-1-8B-Instruct
     - meta-llama/Meta-Llama-3-8B-Instruct

diff --git a/autotest/config.yaml b/autotest/config.yaml
@@ -1,4 +1,5 @@
 model_path: /nvme/qa_test_models
+resource_path: /nvme/qa_test_models/resource
 dst_path: /nvme/qa_test_models/autotest_model
 log_path: /nvme/qa_test_models/autotest_model/log
 benchmark_path: /nvme/qa_test_models/benchmark-reports
@@ -18,6 +19,7 @@ tp_config:
     Qwen2-7B-Instruct-GPTQ-Int4: 2
     InternVL2-40B: 2
     MiniCPM-V-2_6: 2
+    Qwen2.5-72B-Instruct: 4
 
 turbomind_chat_model:
     - meta-llama/Llama-3.2-1B-Instruct
@@ -164,14 +166,24 @@ pytorch_base_model:
 
 turbomind_quatization:
     no_awq:
+        - Qwen/Qwen1.5-MoE-A2.7B-Chat
+        - Qwen/Qwen2-VL-2B-Instruct
+        - Qwen/Qwen2-VL-7B-Instruct
         - mistralai/Mistral-7B-Instruct-v0.3
+        - mistralai/Mistral-Nemo-Instruct-2407
         - deepseek-ai/deepseek-coder-1.3b-instruct
         - deepseek-ai/DeepSeek-V2-Lite-Chat
         - codellama/CodeLlama-7b-Instruct-hf
     gptq:
         - internlm/internlm2_5-7b-chat
     no_kvint4:
         - openbmb/MiniCPM-V-2_6
+        - Qwen/Qwen2-7B-Instruct
+        - Qwen/Qwen2-7B-Instruct-AWQ
+        - Qwen/Qwen2-1.5B-Instruct
+        - Qwen/Qwen2.5-0.5B-Instruct
+        - Qwen/Qwen2.5-7B-Instruct
+        - Qwen/Qwen2-7B-Instruct-GPTQ-Int4
     no_kvint8:
         - deepseek-ai/DeepSeek-V2-Lite-Chat
 
@@ -203,6 +215,10 @@ pytorch_quatization:
     no_kvint4:
         - OpenGVLab/InternVL2-1B
         - OpenGVLab/InternVL2-4B
+        - Qwen/Qwen2-7B-Instruct
+        - Qwen/Qwen2-1.5B-Instruct
+        - Qwen/Qwen2-VL-2B-Instruct
+        - Qwen/Qwen2-VL-7B-Instruct
         - deepseek-ai/DeepSeek-V2-Lite-Chat
         - microsoft/Phi-3-mini-4k-instruct
         - microsoft/Phi-3-vision-128k-instruct
@@ -211,7 +227,6 @@ pytorch_quatization:
     no_kvint8:
         - deepseek-ai/DeepSeek-V2-Lite-Chat
 
-
 longtext_model:
     - meta-llama/Meta-Llama-3-1-8B-Instruct
     - meta-llama/Meta-Llama-3-8B-Instruct
@@ -227,7 +242,8 @@ benchmark_model:
     - internlm/internlm2_5-7b-chat
     - internlm/internlm2_5-20b-chat
     - THUDM/glm-4-9b-chat
-    - Qwen/Qwen2-7B-Instruct
+    - Qwen/Qwen2.5-7B-Instruct
+    - Qwen/Qwen2.5-72B-Instruct
     - mistralai/Mistral-7B-Instruct-v0.3
     - mistralai/Mixtral-8x7B-Instruct-v0.1
     - deepseek-ai/DeepSeek-V2-Lite-Chat
diff --git a/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py b/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py
@@ -67,8 +67,6 @@ def test_pipeline_chat_pytorch_tp2(config, common_case_config, model,
                                               exclude_dup=True))
 def test_pipeline_chat_kvint4_tp1(config, common_case_config, model,
                                   worker_id):
-    if 'Qwen2' in model:
-        return  # kvint4 for qwen2 is not support
     if 'gw' in worker_id:
         os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
     spawn_context = get_context('spawn')

diff --git a/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py b/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py
@@ -50,8 +50,6 @@ def test_pipeline_chat_tp2(config, model, worker_id):
                                               quant_policy=4,
                                               model_type='vl_model'))
 def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
-    if 'Qwen2' in model:
-        return  # kvint4 for qwen2 is not support
     if 'gw' in worker_id:
         os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
     spawn_context = get_context('spawn')
@@ -70,8 +68,6 @@ def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
                                               quant_policy=4,
                                               model_type='vl_model'))
 def test_pipeline_chat_kvint4_tp2(config, model, worker_id):
-    if 'Qwen2' in model:
-        return  # kvint4 for qwen2 is not support
     if 'gw' in worker_id:
         os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
                                                                      tp_num=2)

diff --git a/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py b/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py
@@ -56,8 +56,6 @@ def test_pipeline_chat_tp2(config, common_case_config, model, worker_id):
 @pytest.mark.parametrize('model', get_all_model_list(tp_num=1, quant_policy=4))
 def test_pipeline_chat_kvint4_tp1(config, common_case_config, model,
                                   worker_id):
-    if 'Qwen2' in model:
-        return  # kvint4 for qwen2 is not support
     if 'gw' in worker_id:
         os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
     spawn_context = get_context('spawn')

diff --git a/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py b/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py
@@ -50,8 +50,6 @@ def test_pipeline_chat_tp2(config, model, worker_id):
                                             quant_policy=4,
                                             model_type='vl_model'))
 def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
-    if 'Qwen2' in model:
-        return  # kvint4 for qwen2 is not support
     if 'gw' in worker_id:
         os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
     spawn_context = get_context('spawn')
@@ -70,8 +68,6 @@ def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
                                             quant_policy=4,
                                             model_type='vl_model'))
 def test_pipeline_chat_kvint4_tp2(config, model, worker_id):
-    if 'Qwen2' in model:
-        return  # kvint4 for qwen2 is not support
     if 'gw' in worker_id:
         os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
                                                                      tp_num=2)

diff --git a/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py b/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py
@@ -67,8 +67,7 @@ def getKvintModelList(tp_num, quant_policy):
         'tp_num': tp_num,
         'extra': f'--quant-policy {quant_policy}'
     } for item in get_torch_model_list(
-        tp_num, quant_policy=quant_policy, exclude_dup=True)
-            if 'qwen2' not in item.lower() or quant_policy == 8]
+        tp_num, quant_policy=quant_policy, exclude_dup=True)]
 
 
 @pytest.mark.order(7)

diff --git a/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py b/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py
@@ -60,8 +60,7 @@ def getKvintModelList(tp_num, quant_policy: int = None):
         'tp_num': tp_num,
         'extra': f'--quant-policy {quant_policy}'
     } for item in get_torch_model_list(
-        tp_num, quant_policy=quant_policy, model_type='vl_model')
-            if 'qwen2' not in item.lower() or quant_policy == 8]
+        tp_num, quant_policy=quant_policy, model_type='vl_model')]
 
 
 @pytest.mark.order(7)

diff --git a/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py b/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py
@@ -66,8 +66,7 @@ def getKvintModelList(tp_num, quant_policy):
         'cuda_prefix': None,
         'tp_num': tp_num,
         'extra': f'--quant-policy {quant_policy}'
-    } for item in get_all_model_list(tp_num, quant_policy=quant_policy)
-            if 'qwen2' not in item.lower() or quant_policy == 8]
+    } for item in get_all_model_list(tp_num, quant_policy=quant_policy)]
 
 
 @pytest.mark.order(7)

diff --git a/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py b/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py
@@ -60,8 +60,7 @@ def getKvintModelList(tp_num, quant_policy: int = None):
         'tp_num': tp_num,
         'extra': f'--quant-policy {quant_policy}'
     } for item in get_all_model_list(
-        tp_num, quant_policy=quant_policy, model_type='vl_model')
-            if 'qwen2' not in item.lower() or quant_policy == 8]
+        tp_num, quant_policy=quant_policy, model_type='vl_model')]
 
 
 @pytest.mark.order(7)