From 34d32a5761e532cfd018018ade7be03a8f8add2b Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Thu, 21 Nov 2024 16:52:01 +0800 Subject: [PATCH 1/9] update --- autotest/config-v100.yaml | 26 +- autotest/config.yaml | 13 +- .../test_pipeline_chat_pytorch_llm.py | 2 - .../test_pipeline_chat_pytorch_mllm.py | 4 - .../test_pipeline_chat_turbomind_llm.py | 2 - .../test_pipeline_chat_turbomind_mllm.py | 4 - .../test_restful_chat_hf_pytorch_llm.py | 3 +- .../test_restful_chat_hf_pytorch_mllm.py | 3 +- .../test_restful_chat_hf_turbomind_llm.py | 3 +- .../test_restful_chat_hf_turbomind_mllm.py | 3 +- autotest/utils/pipeline_chat.py | 323 ++++++++++++++++++ autotest/utils/run_restful_chat.py | 15 +- docs/en/supported_models/supported_models.md | 4 +- .../supported_models/supported_models.md | 4 +- 14 files changed, 375 insertions(+), 34 deletions(-) diff --git a/autotest/config-v100.yaml b/autotest/config-v100.yaml index 41216cb730..adfca51ab8 100644 --- a/autotest/config-v100.yaml +++ b/autotest/config-v100.yaml @@ -96,16 +96,21 @@ pytorch_vl_model: turbomind_quatization: no_awq: - - meta-llama/Meta-Llama-3-1-8B-Instruct - - meta-llama/Meta-Llama-3-8B-Instruct - - internlm/internlm-xcomposer2d5-7b - - OpenGVLab/Mini-InternVL-Chat-2B-V1-5 + - Qwen/Qwen2-VL-2B-Instruct + - Qwen/Qwen2-VL-7B-Instruct - mistralai/Mistral-7B-Instruct-v0.3 - - THUDM/glm-4-9b-chat + - deepseek-ai/deepseek-coder-1.3b-instruct + - codellama/CodeLlama-7b-Instruct-hf gptq: - internlm/internlm2_5-7b-chat no_kvint4: - openbmb/MiniCPM-V-2_6 + - Qwen/Qwen2-7B-Instruct + - Qwen/Qwen2-7B-Instruct-AWQ + - Qwen/Qwen2-1.5B-Instruct + - Qwen/Qwen2.5-0.5B-Instruct + - Qwen/Qwen2.5-7B-Instruct + - Qwen/Qwen2-7B-Instruct-GPTQ-Int4 no_kvint8: - deepseek-ai/DeepSeek-V2-Lite-Chat @@ -115,11 +120,21 @@ pytorch_quatization: - internlm/internlm2_5-20b-chat - Qwen/Qwen2-1.5B-Instruct w8a8: + - meta-llama/Meta-Llama-3-8B-Instruct + - meta-llama/Llama-2-7b-chat-hf + - internlm/internlm2-chat-20b - internlm/internlm2_5-7b-chat + - internlm/internlm2_5-20b-chat + - 01-ai/Yi-6B-Chat + - internlm/internlm2_5-20b - internlm/internlm2_5-7b no_kvint4: - OpenGVLab/InternVL2-1B - OpenGVLab/InternVL2-4B + - Qwen/Qwen2-7B-Instruct + - Qwen/Qwen2-1.5B-Instruct + - Qwen/Qwen2-VL-2B-Instruct + - Qwen/Qwen2-VL-7B-Instruct - deepseek-ai/DeepSeek-V2-Lite-Chat - microsoft/Phi-3-mini-4k-instruct - microsoft/Phi-3-vision-128k-instruct @@ -128,7 +143,6 @@ pytorch_quatization: no_kvint8: - deepseek-ai/DeepSeek-V2-Lite-Chat - longtext_model: - meta-llama/Meta-Llama-3-1-8B-Instruct - meta-llama/Meta-Llama-3-8B-Instruct diff --git a/autotest/config.yaml b/autotest/config.yaml index 6c92d2cf0b..2ba60e3ed2 100644 --- a/autotest/config.yaml +++ b/autotest/config.yaml @@ -163,15 +163,23 @@ pytorch_base_model: turbomind_quatization: no_awq: + - Qwen/Qwen1.5-MoE-A2.7B-Chat - Qwen/Qwen2-VL-2B-Instruct - Qwen/Qwen2-VL-7B-Instruct - mistralai/Mistral-7B-Instruct-v0.3 + - mistralai/Mistral-Nemo-Instruct-2407 - deepseek-ai/deepseek-coder-1.3b-instruct - codellama/CodeLlama-7b-Instruct-hf gptq: - internlm/internlm2_5-7b-chat no_kvint4: - openbmb/MiniCPM-V-2_6 + - Qwen/Qwen2-7B-Instruct + - Qwen/Qwen2-7B-Instruct-AWQ + - Qwen/Qwen2-1.5B-Instruct + - Qwen/Qwen2.5-0.5B-Instruct + - Qwen/Qwen2.5-7B-Instruct + - Qwen/Qwen2-7B-Instruct-GPTQ-Int4 no_kvint8: - deepseek-ai/DeepSeek-V2-Lite-Chat @@ -201,6 +209,10 @@ pytorch_quatization: no_kvint4: - OpenGVLab/InternVL2-1B - OpenGVLab/InternVL2-4B + - Qwen/Qwen2-7B-Instruct + - Qwen/Qwen2-1.5B-Instruct + - Qwen/Qwen2-VL-2B-Instruct + - Qwen/Qwen2-VL-7B-Instruct - deepseek-ai/DeepSeek-V2-Lite-Chat - microsoft/Phi-3-mini-4k-instruct - microsoft/Phi-3-vision-128k-instruct @@ -209,7 +221,6 @@ pytorch_quatization: no_kvint8: - deepseek-ai/DeepSeek-V2-Lite-Chat - longtext_model: - meta-llama/Meta-Llama-3-1-8B-Instruct - meta-llama/Meta-Llama-3-8B-Instruct diff --git a/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py b/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py index a828e17a09..58674fa173 100644 --- a/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py +++ b/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py @@ -67,8 +67,6 @@ def test_pipeline_chat_pytorch_tp2(config, common_case_config, model, exclude_dup=True)) def test_pipeline_chat_kvint4_tp1(config, common_case_config, model, worker_id): - if 'Qwen2' in model: - return # kvint4 for qwen2 is not support if 'gw' in worker_id: os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id) spawn_context = get_context('spawn') diff --git a/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py b/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py index 276ced5bcb..8403ced94f 100644 --- a/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py +++ b/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py @@ -50,8 +50,6 @@ def test_pipeline_chat_tp2(config, model, worker_id): quant_policy=4, model_type='vl_model')) def test_pipeline_chat_kvint4_tp1(config, model, worker_id): - if 'Qwen2' in model: - return # kvint4 for qwen2 is not support if 'gw' in worker_id: os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id) spawn_context = get_context('spawn') @@ -70,8 +68,6 @@ def test_pipeline_chat_kvint4_tp1(config, model, worker_id): quant_policy=4, model_type='vl_model')) def test_pipeline_chat_kvint4_tp2(config, model, worker_id): - if 'Qwen2' in model: - return # kvint4 for qwen2 is not support if 'gw' in worker_id: os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id, tp_num=2) diff --git a/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py b/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py index 17560e754d..d1865175cf 100644 --- a/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py +++ b/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py @@ -56,8 +56,6 @@ def test_pipeline_chat_tp2(config, common_case_config, model, worker_id): @pytest.mark.parametrize('model', get_all_model_list(tp_num=1, quant_policy=4)) def test_pipeline_chat_kvint4_tp1(config, common_case_config, model, worker_id): - if 'Qwen2' in model: - return # kvint4 for qwen2 is not support if 'gw' in worker_id: os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id) spawn_context = get_context('spawn') diff --git a/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py b/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py index 8f1bc7d8b1..8c845fa77a 100644 --- a/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py +++ b/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py @@ -50,8 +50,6 @@ def test_pipeline_chat_tp2(config, model, worker_id): quant_policy=4, model_type='vl_model')) def test_pipeline_chat_kvint4_tp1(config, model, worker_id): - if 'Qwen2' in model: - return # kvint4 for qwen2 is not support if 'gw' in worker_id: os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id) spawn_context = get_context('spawn') @@ -70,8 +68,6 @@ def test_pipeline_chat_kvint4_tp1(config, model, worker_id): quant_policy=4, model_type='vl_model')) def test_pipeline_chat_kvint4_tp2(config, model, worker_id): - if 'Qwen2' in model: - return # kvint4 for qwen2 is not support if 'gw' in worker_id: os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id, tp_num=2) diff --git a/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py b/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py index ab1f5595ae..fc95e288ca 100644 --- a/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py +++ b/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py @@ -67,8 +67,7 @@ def getKvintModelList(tp_num, quant_policy): 'tp_num': tp_num, 'extra': f'--quant-policy {quant_policy}' } for item in get_torch_model_list( - tp_num, quant_policy=quant_policy, exclude_dup=True) - if 'qwen2' not in item.lower() or quant_policy == 8] + tp_num, quant_policy=quant_policy, exclude_dup=True)] @pytest.mark.order(7) diff --git a/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py b/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py index b210733db4..bf20c45e6e 100644 --- a/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py +++ b/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py @@ -60,8 +60,7 @@ def getKvintModelList(tp_num, quant_policy: int = None): 'tp_num': tp_num, 'extra': f'--quant-policy {quant_policy}' } for item in get_torch_model_list( - tp_num, quant_policy=quant_policy, model_type='vl_model') - if 'qwen2' not in item.lower() or quant_policy == 8] + tp_num, quant_policy=quant_policy, model_type='vl_model')] @pytest.mark.order(7) diff --git a/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py b/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py index 91e65ee51a..1c9131b32e 100644 --- a/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py +++ b/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py @@ -66,8 +66,7 @@ def getKvintModelList(tp_num, quant_policy): 'cuda_prefix': None, 'tp_num': tp_num, 'extra': f'--quant-policy {quant_policy}' - } for item in get_all_model_list(tp_num, quant_policy=quant_policy) - if 'qwen2' not in item.lower() or quant_policy == 8] + } for item in get_all_model_list(tp_num, quant_policy=quant_policy)] @pytest.mark.order(7) diff --git a/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py b/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py index 091e18e6e3..641f2f760f 100644 --- a/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py +++ b/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py @@ -60,8 +60,7 @@ def getKvintModelList(tp_num, quant_policy: int = None): 'tp_num': tp_num, 'extra': f'--quant-policy {quant_policy}' } for item in get_all_model_list( - tp_num, quant_policy=quant_policy, model_type='vl_model') - if 'qwen2' not in item.lower() or quant_policy == 8] + tp_num, quant_policy=quant_policy, model_type='vl_model')] @pytest.mark.order(7) diff --git a/autotest/utils/pipeline_chat.py b/autotest/utils/pipeline_chat.py index 562a707efe..c09e46a42b 100644 --- a/autotest/utils/pipeline_chat.py +++ b/autotest/utils/pipeline_chat.py @@ -3,7 +3,10 @@ from subprocess import PIPE import allure +import numpy as np import torch +from decord import VideoReader, cpu +from PIL import Image from pytest_assume.plugin import assume from utils.get_run_config import get_model_name, get_tp_num from utils.rule_condition_assert import assert_result @@ -13,6 +16,7 @@ from lmdeploy.utils import is_bf16_supported from lmdeploy.vl import load_image from lmdeploy.vl.constants import IMAGE_TOKEN +from lmdeploy.vl.utils import encode_image_base64 def run_pipeline_chat_test(config, @@ -275,6 +279,11 @@ def assert_pipeline_single_element(output, PIC1 = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' # noqa E501 PIC2 = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg' # noqa E501 +PIC_BEIJING = 'https://raw.githubusercontent.com/QwenLM/Qwen-VL/master/assets/mm_tutorial/Beijing_Small.jpeg' # noqa E501 +PIC_CHONGQING = 'https://raw.githubusercontent.com/QwenLM/Qwen-VL/master/assets/mm_tutorial/Chongqing_Small.jpeg' # noqa E501 +PIC_RACCON = 'https://raw.githubusercontent.com/OpenGVLab/InternVL/main/internvl_chat/examples/image1.jpg' # noqa E501 +PIC_PANDA = 'https://raw.githubusercontent.com/OpenGVLab/InternVL/main/internvl_chat/examples/image2.jpg' # noqa E501 +DESC = 'What are the similarities and differences between these two images.' # noqa E501 def run_pipeline_vl_chat_test(config, @@ -386,12 +395,326 @@ def run_pipeline_vl_chat_test(config, ', reason: Multi-turn example: ski not in ' + sess.response.text + '\n') + if 'internvl' in model_case.lower(): + internvl_vl_testcase(config, pipe, file) + if 'llava' in model_case.lower(): + llava_vl_testcase(config, pipe, file) + if 'minicpm' in model_case.lower(): + MiniCPM_vl_testcase(config, pipe, file) + if 'qwen' in model_case.lower(): + Qwen_vl_testcase(config, pipe, file) + file.close() del pipe torch.cuda.empty_cache() +def internvl_vl_testcase(config, pipe, file): + # multi-image multi-round conversation, combined images + messages = [ + dict( + role='user', + content=[ + dict( + type='text', + text=f'{IMAGE_TOKEN}{IMAGE_TOKEN}\n{DESC}' # noqa E251,E501 + ), + dict(type='image_url', + image_url=dict(max_dynamic_patch=12, url=PIC_RACCON)), + dict(type='image_url', + image_url=dict(max_dynamic_patch=12, url=PIC_PANDA)) + ]) + ] + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append(dict(role='user', content=DESC)) + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + # multi-image multi-round conversation, separate images + messages = [ + dict( + role='user', + content=[ + dict( + type='text', + text=f'Image-1: {IMAGE_TOKEN}\nImage-2: {IMAGE_TOKEN}\n' + + # noqa E251,E501 + DESC), + dict(type='image_url', + image_url=dict(max_dynamic_patch=12, url=PIC_RACCON)), + dict(type='image_url', + image_url=dict(max_dynamic_patch=12, url=PIC_PANDA)) + ]) + ] + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append(dict(role='user', content=DESC)) + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + # video multi-round conversation + def get_index(bound, fps, max_frame, first_idx=0, num_segments=32): + if bound: + start, end = bound[0], bound[1] + else: + start, end = -100000, 100000 + start_idx = max(first_idx, round(start * fps)) + end_idx = min(round(end * fps), max_frame) + seg_size = float(end_idx - start_idx) / num_segments + frame_indices = np.array([ + int(start_idx + (seg_size / 2) + np.round(seg_size * idx)) + for idx in range(num_segments) + ]) + return frame_indices + + def load_video(video_path, bound=None, num_segments=32): + vr = VideoReader(video_path, ctx=cpu(0), num_threads=1) + max_frame = len(vr) - 1 + fps = float(vr.get_avg_fps()) + frame_indices = get_index(bound, + fps, + max_frame, + first_idx=0, + num_segments=num_segments) + imgs = [] + for frame_index in frame_indices: + img = Image.fromarray(vr[frame_index].asnumpy()).convert('RGB') + imgs.append(img) + return imgs + + resource_path = config.get('resource_path') + video_path = resource_path + '/red-panda.mp4' + imgs = load_video(video_path, num_segments=8) + + question = '' + for i in range(len(imgs)): + question = question + f'Frame{i+1}: {IMAGE_TOKEN}\n' + + question += 'What is the red panda doing?' + + content = [{'type': 'text', 'text': question}] + for img in imgs: + content.append({ + 'type': 'image_url', + 'image_url': { + 'max_dynamic_patch': 1, + 'url': f'data:image/jpeg;base64,{encode_image_base64(img)}' + } + }) + + messages = [dict(role='user', content=content)] + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append( + dict(role='user', + content='Describe this video in detail. Don\'t repeat.')) + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + +def llava_vl_testcase(config, pipe, file): + # multi-image multi-round conversation, combined images + messages = [ + dict(role='user', + content=[ + dict(type='text', text='Describe the two images in detail.'), + dict(type='image_url', image_url=dict(url=PIC_BEIJING)), + dict(type='image_url', image_url=dict(url=PIC_CHONGQING)) + ]) + ] + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append(dict(role='user', content=DESC)) + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + +def MiniCPM_vl_testcase(config, pipe, file): + # Chat with multiple images + messages = [ + dict(role='user', + content=[ + dict(type='text', text='Describe the two images in detail.'), + dict(type='image_url', + image_url=dict(max_slice_nums=9, url=PIC_RACCON)), + dict(type='image_url', + image_url=dict(max_slice_nums=9, url=PIC_PANDA)) + ]) + ] + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append(dict(role='user', content=DESC)) + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + # In-context few-shot learning + question = 'production date' + messages = [ + dict(role='user', + content=[ + dict(type='text', text=question), + dict(type='image_url', image_url=dict(url='example1.jpg')), + ]), + dict(role='assistant', content='2023.08.04'), + dict(role='user', + content=[ + dict(type='text', text=question), + dict(type='image_url', image_url=dict(url='example2.jpg')), + ]), + dict(role='assistant', content='2007.04.24'), + dict(role='user', + content=[ + dict(type='text', text=question), + dict(type='image_url', image_url=dict(url='test.jpg')), + ]) + ] + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + # Chat with video + MAX_NUM_FRAMES = 64 # if cuda OOM set a smaller number + + def encode_video(video_path): + + def uniform_sample(length, n): + gap = len(length) / n + idxs = [int(i * gap + gap / 2) for i in range(n)] + return [length[i] for i in idxs] + + vr = VideoReader(video_path, ctx=cpu(0)) + sample_fps = round(vr.get_avg_fps() / 1) # FPS + frame_idx = [i for i in range(0, len(vr), sample_fps)] + if len(frame_idx) > MAX_NUM_FRAMES: + frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES) + frames = vr.get_batch(frame_idx).asnumpy() + frames = [Image.fromarray(v.astype('uint8')) for v in frames] + print('num frames:', len(frames)) + return frames + + resource_path = config.get('resource_path') + video_path = resource_path + '/video_test.mp4' + frames = encode_video(video_path) + question = 'Describe the video' + + content = [dict(type='text', text=question)] + for frame in frames: + content.append( + dict(type='image_url', + image_url=dict( + use_image_id=False, + max_slice_nums=2, + url=f'data:image/jpeg;base64,{encode_image_base64(frame)}' + ))) + + messages = [dict(role='user', content=content)] + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + +def Qwen_vl_testcase(config, pipe, file): + # multi-image multi-round conversation, combined images + messages = [ + dict(role='user', + content=[ + dict(type='text', text='Describe the two images in detail.'), + dict(type='image_url', image_url=dict(url=PIC_BEIJING)), + dict(type='image_url', image_url=dict(url=PIC_CHONGQING)) + ]) + ] + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append(dict(role='user', content=DESC)) + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + # image resolution for performance boost + min_pixels = 64 * 28 * 28 + max_pixels = 64 * 28 * 28 + messages = [ + dict(role='user', + content=[ + dict(type='text', text='Describe the two images in detail.'), + dict(type='image_url', + image_url=dict(min_pixels=min_pixels, + max_pixels=max_pixels, + url=PIC_BEIJING)), + dict(type='image_url', + image_url=dict(min_pixels=min_pixels, + max_pixels=max_pixels, + url=PIC_CHONGQING)) + ]) + ] + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append(dict(role='user', content=DESC)) + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: Multi-turn example: ski not in ' + + response.text + '\n') + + def assert_pipeline_vl_chat_log(config, model_case, worker_id): log_path = config.get('log_path') diff --git a/autotest/utils/run_restful_chat.py b/autotest/utils/run_restful_chat.py index 77af1975be..082a61bcda 100644 --- a/autotest/utils/run_restful_chat.py +++ b/autotest/utils/run_restful_chat.py @@ -282,6 +282,7 @@ def get_model(url): PIC = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' # noqa E501 +PIC2 = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg' # noqa E501 def run_vl_testcase(config, port: int = DEFAULT_PORT): @@ -307,6 +308,11 @@ def run_vl_testcase(config, port: int = DEFAULT_PORT): 'image_url': { 'url': PIC, }, + }, { + 'type': 'image_url', + 'image_url': { + 'url': PIC2, + }, }], }] @@ -315,8 +321,6 @@ def run_vl_testcase(config, port: int = DEFAULT_PORT): temperature=0.8, top_p=0.8) file.writelines(str(response).lower() + '\n') - assert 'tiger' in str(response).lower() or '虎' in str( - response).lower(), response api_client = APIClient(http_url) model_name = api_client.available_models[0] @@ -324,7 +328,12 @@ def run_vl_testcase(config, port: int = DEFAULT_PORT): messages=prompt_messages): continue file.writelines(str(item) + '\n') - assert 'tiger' in str(item).lower() or '虎' in str(item).lower(), item allure.attach.file(restful_log, attachment_type=allure.attachment_type.TEXT) + + assert 'tiger' in str(response).lower() or '虎' in str( + response).lower() or 'ski' in str(response).lower() or '滑雪' in str( + response).lower(), response + assert 'tiger' in str(item).lower() or '虎' in str(item).lower( + ) or 'ski' in str(item).lower() or '滑雪' in str(item).lower(), item diff --git a/docs/en/supported_models/supported_models.md b/docs/en/supported_models/supported_models.md index 283ce596f6..d685e869cd 100644 --- a/docs/en/supported_models/supported_models.md +++ b/docs/en/supported_models/supported_models.md @@ -19,7 +19,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine | Qwen | 1.8B - 72B | LLM | Yes | Yes | Yes | Yes | | Qwen1.5 | 1.8B - 110B | LLM | Yes | Yes | Yes | Yes | | Qwen2 | 0.5B - 72B | LLM | Yes | Yes | Yes | Yes | -| Mistral | 7B | LLM | Yes | Yes | Yes | Yes | +| Mistral | 7B | LLM | Yes | Yes | Yes | No | | Mixtral | 8x7B, 8x22B | LLM | Yes | Yes | Yes | Yes | | Qwen-VL | 7B | MLLM | Yes | Yes | Yes | Yes | | DeepSeek-VL | 7B | MLLM | Yes | Yes | Yes | Yes | @@ -36,7 +36,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine | MiniGeminiLlama | 7B | MLLM | Yes | - | - | Yes | | GLM4 | 9B | LLM | Yes | Yes | Yes | Yes | | CodeGeeX4 | 9B | LLM | Yes | Yes | Yes | - | -| Molmo | 7B-D,72B | MLLM | Yes | Yes | Yes | NO | +| Molmo | 7B-D,72B | MLLM | Yes | Yes | Yes | No | "-" means not verified yet. diff --git a/docs/zh_cn/supported_models/supported_models.md b/docs/zh_cn/supported_models/supported_models.md index 908f9a17f5..8918423115 100644 --- a/docs/zh_cn/supported_models/supported_models.md +++ b/docs/zh_cn/supported_models/supported_models.md @@ -19,7 +19,7 @@ | Qwen | 1.8B - 72B | LLM | Yes | Yes | Yes | Yes | | Qwen1.5 | 1.8B - 110B | LLM | Yes | Yes | Yes | Yes | | Qwen2 | 0.5B - 72B | LLM | Yes | Yes | Yes | Yes | -| Mistral | 7B | LLM | Yes | Yes | Yes | Yes | +| Mistral | 7B | LLM | Yes | Yes | Yes | No | | Mixtral | 8x7B, 8x22B | LLM | Yes | Yes | Yes | Yes | | Qwen-VL | 7B | MLLM | Yes | Yes | Yes | Yes | | DeepSeek-VL | 7B | MLLM | Yes | Yes | Yes | Yes | @@ -36,7 +36,7 @@ | MiniGeminiLlama | 7B | MLLM | Yes | - | - | Yes | | GLM4 | 9B | LLM | Yes | Yes | Yes | Yes | | CodeGeeX4 | 9B | LLM | Yes | Yes | Yes | - | -| Molmo | 7B-D,72B | MLLM | Yes | Yes | Yes | NO | +| Molmo | 7B-D,72B | MLLM | Yes | Yes | Yes | No | “-” 表示还没有验证。 From e7150ce304360a00aa75183ac67969f13887ab1c Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Thu, 21 Nov 2024 18:40:08 +0800 Subject: [PATCH 2/9] update --- autotest/config-v100.yaml | 7 +------ autotest/config.yaml | 1 + 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/autotest/config-v100.yaml b/autotest/config-v100.yaml index adfca51ab8..b2714087f4 100644 --- a/autotest/config-v100.yaml +++ b/autotest/config-v100.yaml @@ -1,4 +1,5 @@ model_path: /nvme/qa_test_models +resource_path: /nvme/qa_test_models/resource dst_path: /nvme/qa_test_models/autotest_model log_path: /nvme/qa_test_models/autotest_model/log benchmark_path: /nvme/qa_test_models/benchmark-reports @@ -120,13 +121,7 @@ pytorch_quatization: - internlm/internlm2_5-20b-chat - Qwen/Qwen2-1.5B-Instruct w8a8: - - meta-llama/Meta-Llama-3-8B-Instruct - - meta-llama/Llama-2-7b-chat-hf - - internlm/internlm2-chat-20b - internlm/internlm2_5-7b-chat - - internlm/internlm2_5-20b-chat - - 01-ai/Yi-6B-Chat - - internlm/internlm2_5-20b - internlm/internlm2_5-7b no_kvint4: - OpenGVLab/InternVL2-1B diff --git a/autotest/config.yaml b/autotest/config.yaml index 2ba60e3ed2..b11a21523c 100644 --- a/autotest/config.yaml +++ b/autotest/config.yaml @@ -1,4 +1,5 @@ model_path: /nvme/qa_test_models +resource_path: /nvme/qa_test_models/resource dst_path: /nvme/qa_test_models/autotest_model log_path: /nvme/qa_test_models/autotest_model/log benchmark_path: /nvme/qa_test_models/benchmark-reports From 4dd83fda6aee0b9480ae64f345d68372ef519176 Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Thu, 21 Nov 2024 20:47:48 +0800 Subject: [PATCH 3/9] update --- autotest/utils/pipeline_chat.py | 94 +++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 41 deletions(-) diff --git a/autotest/utils/pipeline_chat.py b/autotest/utils/pipeline_chat.py index c09e46a42b..a198d8e29d 100644 --- a/autotest/utils/pipeline_chat.py +++ b/autotest/utils/pipeline_chat.py @@ -281,7 +281,7 @@ def assert_pipeline_single_element(output, PIC2 = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg' # noqa E501 PIC_BEIJING = 'https://raw.githubusercontent.com/QwenLM/Qwen-VL/master/assets/mm_tutorial/Beijing_Small.jpeg' # noqa E501 PIC_CHONGQING = 'https://raw.githubusercontent.com/QwenLM/Qwen-VL/master/assets/mm_tutorial/Chongqing_Small.jpeg' # noqa E501 -PIC_RACCON = 'https://raw.githubusercontent.com/OpenGVLab/InternVL/main/internvl_chat/examples/image1.jpg' # noqa E501 +PIC_REDPANDA = 'https://raw.githubusercontent.com/OpenGVLab/InternVL/main/internvl_chat/examples/image1.jpg' # noqa E501 PIC_PANDA = 'https://raw.githubusercontent.com/OpenGVLab/InternVL/main/internvl_chat/examples/image2.jpg' # noqa E501 DESC = 'What are the similarities and differences between these two images.' # noqa E501 @@ -421,23 +421,23 @@ def internvl_vl_testcase(config, pipe, file): text=f'{IMAGE_TOKEN}{IMAGE_TOKEN}\n{DESC}' # noqa E251,E501 ), dict(type='image_url', - image_url=dict(max_dynamic_patch=12, url=PIC_RACCON)), + image_url=dict(max_dynamic_patch=12, url=PIC_REDPANDA)), dict(type='image_url', image_url=dict(max_dynamic_patch=12, url=PIC_PANDA)) ]) ] response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: combined images: panda not in ' + response.text + '\n') messages.append(dict(role='assistant', content=response.text)) messages.append(dict(role='user', content=DESC)) response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: combined images second: panda not in ' + response.text + '\n') # multi-image multi-round conversation, separate images @@ -451,23 +451,23 @@ def internvl_vl_testcase(config, pipe, file): + # noqa E251,E501 DESC), dict(type='image_url', - image_url=dict(max_dynamic_patch=12, url=PIC_RACCON)), + image_url=dict(max_dynamic_patch=12, url=PIC_REDPANDA)), dict(type='image_url', image_url=dict(max_dynamic_patch=12, url=PIC_PANDA)) ]) ] response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: separate images: panda not in ' + response.text + '\n') messages.append(dict(role='assistant', content=response.text)) messages.append(dict(role='user', content=DESC)) response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: separate images second: panda not in ' + response.text + '\n') # video multi-round conversation @@ -522,9 +522,9 @@ def load_video(video_path, bound=None, num_segments=32): messages = [dict(role='user', content=content)] response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: video images: red panda not in ' + response.text + '\n') messages.append(dict(role='assistant', content=response.text)) @@ -532,9 +532,10 @@ def load_video(video_path, bound=None, num_segments=32): dict(role='user', content='Describe this video in detail. Don\'t repeat.')) response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'red pandas' in response.text.lower( + ) or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: video images: red panda not in ' + response.text + '\n') @@ -549,17 +550,19 @@ def llava_vl_testcase(config, pipe, file): ]) ] response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'buildings' in response.text.lower( + ) or '楼' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: combined images: buildings not in ' + response.text + '\n') messages.append(dict(role='assistant', content=response.text)) messages.append(dict(role='user', content=DESC)) response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'buildings' in response.text.lower( + ) or '楼' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: combined images second: buildings not in ' + response.text + '\n') @@ -570,50 +573,53 @@ def MiniCPM_vl_testcase(config, pipe, file): content=[ dict(type='text', text='Describe the two images in detail.'), dict(type='image_url', - image_url=dict(max_slice_nums=9, url=PIC_RACCON)), + image_url=dict(max_slice_nums=9, url=PIC_REDPANDA)), dict(type='image_url', image_url=dict(max_slice_nums=9, url=PIC_PANDA)) ]) ] response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: multiple images: panda not in ' + response.text + '\n') messages.append(dict(role='assistant', content=response.text)) messages.append(dict(role='user', content=DESC)) response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: multiple images second: panda not in ' + response.text + '\n') # In-context few-shot learning + EXAMPLE1 = 'https://github.com/user-attachments/assets/405d9147-95f6-4f78-8879-606a0aed6707' # noqa E251,E501 + EXAMPLE2 = 'https://github.com/user-attachments/assets/9f2c6ed9-2aa5-4189-9c4f-0b9753024ba1' # noqa E251,E501 + EXAMPLE3 = 'https://github.com/user-attachments/assets/f335b507-1957-4c22-84ae-ed69ff79df38' # noqa E251,E501 question = 'production date' messages = [ dict(role='user', content=[ dict(type='text', text=question), - dict(type='image_url', image_url=dict(url='example1.jpg')), + dict(type='image_url', image_url=dict(url=EXAMPLE1)), ]), - dict(role='assistant', content='2023.08.04'), + dict(role='assistant', content='2021.08.29'), dict(role='user', content=[ dict(type='text', text=question), - dict(type='image_url', image_url=dict(url='example2.jpg')), + dict(type='image_url', image_url=dict(url=EXAMPLE2)), ]), - dict(role='assistant', content='2007.04.24'), + dict(role='assistant', content='1999.05.15'), dict(role='user', content=[ dict(type='text', text=question), - dict(type='image_url', image_url=dict(url='test.jpg')), + dict(type='image_url', image_url=dict(url=EXAMPLE3)), ]) ] response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = '2021' in response.text.lower() or '14' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: in context learning: 2021 not in ' + response.text + '\n') # Chat with video @@ -653,10 +659,11 @@ def uniform_sample(length, n): messages = [dict(role='user', content=content)] response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'red panda' in response.text.lower( + ) or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + - response.text + '\n') + ', reason: video example: panda not in ' + response.text + + '\n') def Qwen_vl_testcase(config, pipe, file): @@ -670,17 +677,19 @@ def Qwen_vl_testcase(config, pipe, file): ]) ] response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'buildings' in response.text.lower( + ) or '楼' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: combined images: buildings not in ' + response.text + '\n') messages.append(dict(role='assistant', content=response.text)) messages.append(dict(role='user', content=DESC)) response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'buildings' in response.text.lower( + ) or '楼' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: combined images second: buildings not in ' + response.text + '\n') # image resolution for performance boost @@ -702,16 +711,19 @@ def Qwen_vl_testcase(config, pipe, file): ] response = pipe(messages) result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'buildings' in response.text.lower( + ) or '楼' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: performance boost: buildings not in ' + response.text + '\n') messages.append(dict(role='assistant', content=response.text)) messages.append(dict(role='user', content=DESC)) response = pipe(messages) - result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'buildings' in response.text.lower( + ) or '楼' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: Multi-turn example: ski not in ' + + ', reason: performance boost second: buildings not in ' + response.text + '\n') From d970ad3f380689dcdb8ab7ee23c08d247ce77249 Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Fri, 22 Nov 2024 09:41:08 +0800 Subject: [PATCH 4/9] update --- autotest/utils/pipeline_chat.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/autotest/utils/pipeline_chat.py b/autotest/utils/pipeline_chat.py index a198d8e29d..004be0d695 100644 --- a/autotest/utils/pipeline_chat.py +++ b/autotest/utils/pipeline_chat.py @@ -427,7 +427,7 @@ def internvl_vl_testcase(config, pipe, file): ]) ] response = pipe(messages) - result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + ', reason: combined images: panda not in ' + response.text + '\n') @@ -435,7 +435,7 @@ def internvl_vl_testcase(config, pipe, file): messages.append(dict(role='assistant', content=response.text)) messages.append(dict(role='user', content=DESC)) response = pipe(messages) - result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + ', reason: combined images second: panda not in ' + response.text + '\n') @@ -457,7 +457,7 @@ def internvl_vl_testcase(config, pipe, file): ]) ] response = pipe(messages) - result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + ', reason: separate images: panda not in ' + response.text + '\n') @@ -465,7 +465,7 @@ def internvl_vl_testcase(config, pipe, file): messages.append(dict(role='assistant', content=response.text)) messages.append(dict(role='user', content=DESC)) response = pipe(messages) - result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + ', reason: separate images second: panda not in ' + response.text + '\n') @@ -522,7 +522,7 @@ def load_video(video_path, bound=None, num_segments=32): messages = [dict(role='user', content=content)] response = pipe(messages) - result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + ', reason: video images: red panda not in ' + response.text + '\n') @@ -551,7 +551,7 @@ def llava_vl_testcase(config, pipe, file): ] response = pipe(messages) result = 'buildings' in response.text.lower( - ) or '楼' in response.text.lower() + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower() file.writelines('result:' + str(result) + ', reason: combined images: buildings not in ' + response.text + '\n') @@ -560,7 +560,7 @@ def llava_vl_testcase(config, pipe, file): messages.append(dict(role='user', content=DESC)) response = pipe(messages) result = 'buildings' in response.text.lower( - ) or '楼' in response.text.lower() + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower() file.writelines('result:' + str(result) + ', reason: combined images second: buildings not in ' + response.text + '\n') @@ -579,7 +579,7 @@ def MiniCPM_vl_testcase(config, pipe, file): ]) ] response = pipe(messages) - result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + ', reason: multiple images: panda not in ' + response.text + '\n') @@ -587,7 +587,7 @@ def MiniCPM_vl_testcase(config, pipe, file): messages.append(dict(role='assistant', content=response.text)) messages.append(dict(role='user', content=DESC)) response = pipe(messages) - result = 'pandas' in response.text.lower() or '熊猫' in response.text.lower() + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + ', reason: multiple images second: panda not in ' + response.text + '\n') @@ -678,7 +678,7 @@ def Qwen_vl_testcase(config, pipe, file): ] response = pipe(messages) result = 'buildings' in response.text.lower( - ) or '楼' in response.text.lower() + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower() file.writelines('result:' + str(result) + ', reason: combined images: buildings not in ' + response.text + '\n') @@ -687,7 +687,7 @@ def Qwen_vl_testcase(config, pipe, file): messages.append(dict(role='user', content=DESC)) response = pipe(messages) result = 'buildings' in response.text.lower( - ) or '楼' in response.text.lower() + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower() file.writelines('result:' + str(result) + ', reason: combined images second: buildings not in ' + response.text + '\n') @@ -712,7 +712,7 @@ def Qwen_vl_testcase(config, pipe, file): response = pipe(messages) result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() result = 'buildings' in response.text.lower( - ) or '楼' in response.text.lower() + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower() file.writelines('result:' + str(result) + ', reason: performance boost: buildings not in ' + response.text + '\n') @@ -721,7 +721,7 @@ def Qwen_vl_testcase(config, pipe, file): messages.append(dict(role='user', content=DESC)) response = pipe(messages) result = 'buildings' in response.text.lower( - ) or '楼' in response.text.lower() + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower() file.writelines('result:' + str(result) + ', reason: performance boost second: buildings not in ' + response.text + '\n') From accc42b9e0b635d44fb941732005bfbd9fd2949b Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Fri, 22 Nov 2024 10:00:50 +0800 Subject: [PATCH 5/9] update --- autotest/utils/pipeline_chat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autotest/utils/pipeline_chat.py b/autotest/utils/pipeline_chat.py index 004be0d695..a1a873d431 100644 --- a/autotest/utils/pipeline_chat.py +++ b/autotest/utils/pipeline_chat.py @@ -619,7 +619,7 @@ def MiniCPM_vl_testcase(config, pipe, file): response = pipe(messages) result = '2021' in response.text.lower() or '14' in response.text.lower() file.writelines('result:' + str(result) + - ', reason: in context learning: 2021 not in ' + + ', reason: in context learning: 2021 or 14 not in ' + response.text + '\n') # Chat with video @@ -643,7 +643,7 @@ def uniform_sample(length, n): return frames resource_path = config.get('resource_path') - video_path = resource_path + '/video_test.mp4' + video_path = resource_path + '/red-panda.mp4' frames = encode_video(video_path) question = 'Describe the video' From d68d45f16b83273e68c46f5736cb5b7aa8be643d Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Fri, 22 Nov 2024 10:21:42 +0800 Subject: [PATCH 6/9] update --- autotest/utils/pipeline_chat.py | 49 +++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/autotest/utils/pipeline_chat.py b/autotest/utils/pipeline_chat.py index a1a873d431..4318eaca16 100644 --- a/autotest/utils/pipeline_chat.py +++ b/autotest/utils/pipeline_chat.py @@ -284,6 +284,7 @@ def assert_pipeline_single_element(output, PIC_REDPANDA = 'https://raw.githubusercontent.com/OpenGVLab/InternVL/main/internvl_chat/examples/image1.jpg' # noqa E501 PIC_PANDA = 'https://raw.githubusercontent.com/OpenGVLab/InternVL/main/internvl_chat/examples/image2.jpg' # noqa E501 DESC = 'What are the similarities and differences between these two images.' # noqa E501 +DESC_ZH = '两张图有什么相同和不同的地方.' # noqa E501 def run_pipeline_vl_chat_test(config, @@ -397,6 +398,7 @@ def run_pipeline_vl_chat_test(config, if 'internvl' in model_case.lower(): internvl_vl_testcase(config, pipe, file) + internvl_vl_testcase(config, pipe, file, 'cn') if 'llava' in model_case.lower(): llava_vl_testcase(config, pipe, file) if 'minicpm' in model_case.lower(): @@ -410,21 +412,22 @@ def run_pipeline_vl_chat_test(config, torch.cuda.empty_cache() -def internvl_vl_testcase(config, pipe, file): +def internvl_vl_testcase(config, pipe, file, lang='en'): + if lang == 'cn': + description = DESC_ZH + else: + description = DESC # multi-image multi-round conversation, combined images messages = [ - dict( - role='user', - content=[ - dict( - type='text', - text=f'{IMAGE_TOKEN}{IMAGE_TOKEN}\n{DESC}' # noqa E251,E501 - ), - dict(type='image_url', - image_url=dict(max_dynamic_patch=12, url=PIC_REDPANDA)), - dict(type='image_url', - image_url=dict(max_dynamic_patch=12, url=PIC_PANDA)) - ]) + dict(role='user', + content=[ + dict(type='text', + text=f'{IMAGE_TOKEN}{IMAGE_TOKEN}\n{description}'), + dict(type='image_url', + image_url=dict(max_dynamic_patch=12, url=PIC_REDPANDA)), + dict(type='image_url', + image_url=dict(max_dynamic_patch=12, url=PIC_PANDA)) + ]) ] response = pipe(messages) result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() @@ -433,7 +436,7 @@ def internvl_vl_testcase(config, pipe, file): response.text + '\n') messages.append(dict(role='assistant', content=response.text)) - messages.append(dict(role='user', content=DESC)) + messages.append(dict(role='user', content=description)) response = pipe(messages) result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + @@ -449,7 +452,7 @@ def internvl_vl_testcase(config, pipe, file): type='text', text=f'Image-1: {IMAGE_TOKEN}\nImage-2: {IMAGE_TOKEN}\n' + # noqa E251,E501 - DESC), + description), dict(type='image_url', image_url=dict(max_dynamic_patch=12, url=PIC_REDPANDA)), dict(type='image_url', @@ -463,7 +466,7 @@ def internvl_vl_testcase(config, pipe, file): response.text + '\n') messages.append(dict(role='assistant', content=response.text)) - messages.append(dict(role='user', content=DESC)) + messages.append(dict(role='user', content=description)) response = pipe(messages) result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + @@ -508,7 +511,10 @@ def load_video(video_path, bound=None, num_segments=32): for i in range(len(imgs)): question = question + f'Frame{i+1}: {IMAGE_TOKEN}\n' - question += 'What is the red panda doing?' + if lang == 'cn': + question += '小熊猫在做什么?' + else: + question += 'What is the red panda doing?' content = [{'type': 'text', 'text': question}] for img in imgs: @@ -528,9 +534,12 @@ def load_video(video_path, bound=None, num_segments=32): response.text + '\n') messages.append(dict(role='assistant', content=response.text)) - messages.append( - dict(role='user', - content='Describe this video in detail. Don\'t repeat.')) + if lang == 'cn': + messages.append(dict(role='user', content='描述视频详情,不要重复')) + else: + messages.append( + dict(role='user', + content='Describe this video in detail. Don\'t repeat.')) response = pipe(messages) result = 'red pandas' in response.text.lower( ) or '熊猫' in response.text.lower() From 4ee75d94d7350103e83fb3f8e3a52eabef5af903 Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Fri, 22 Nov 2024 12:55:22 +0800 Subject: [PATCH 7/9] update --- autotest/utils/pipeline_chat.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/autotest/utils/pipeline_chat.py b/autotest/utils/pipeline_chat.py index 4318eaca16..023e4ac142 100644 --- a/autotest/utils/pipeline_chat.py +++ b/autotest/utils/pipeline_chat.py @@ -399,8 +399,6 @@ def run_pipeline_vl_chat_test(config, if 'internvl' in model_case.lower(): internvl_vl_testcase(config, pipe, file) internvl_vl_testcase(config, pipe, file, 'cn') - if 'llava' in model_case.lower(): - llava_vl_testcase(config, pipe, file) if 'minicpm' in model_case.lower(): MiniCPM_vl_testcase(config, pipe, file) if 'qwen' in model_case.lower(): @@ -541,7 +539,7 @@ def load_video(video_path, bound=None, num_segments=32): dict(role='user', content='Describe this video in detail. Don\'t repeat.')) response = pipe(messages) - result = 'red pandas' in response.text.lower( + result = 'red panda' in response.text.lower( ) or '熊猫' in response.text.lower() file.writelines('result:' + str(result) + ', reason: video images: red panda not in ' + @@ -560,7 +558,8 @@ def llava_vl_testcase(config, pipe, file): ] response = pipe(messages) result = 'buildings' in response.text.lower( - ) or '楼' in response.text.lower() or 'skyline' in response.text.lower() + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower( + ) or 'cityscape' in response.text.lower() file.writelines('result:' + str(result) + ', reason: combined images: buildings not in ' + response.text + '\n') @@ -569,7 +568,8 @@ def llava_vl_testcase(config, pipe, file): messages.append(dict(role='user', content=DESC)) response = pipe(messages) result = 'buildings' in response.text.lower( - ) or '楼' in response.text.lower() or 'skyline' in response.text.lower() + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower( + ) or 'cityscape' in response.text.lower() file.writelines('result:' + str(result) + ', reason: combined images second: buildings not in ' + response.text + '\n') @@ -687,7 +687,8 @@ def Qwen_vl_testcase(config, pipe, file): ] response = pipe(messages) result = 'buildings' in response.text.lower( - ) or '楼' in response.text.lower() or 'skyline' in response.text.lower() + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower( + ) or 'cityscape' in response.text.lower() file.writelines('result:' + str(result) + ', reason: combined images: buildings not in ' + response.text + '\n') @@ -696,7 +697,8 @@ def Qwen_vl_testcase(config, pipe, file): messages.append(dict(role='user', content=DESC)) response = pipe(messages) result = 'buildings' in response.text.lower( - ) or '楼' in response.text.lower() or 'skyline' in response.text.lower() + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower( + ) or 'cityscape' in response.text.lower() file.writelines('result:' + str(result) + ', reason: combined images second: buildings not in ' + response.text + '\n') @@ -721,7 +723,8 @@ def Qwen_vl_testcase(config, pipe, file): response = pipe(messages) result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() result = 'buildings' in response.text.lower( - ) or '楼' in response.text.lower() or 'skyline' in response.text.lower() + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower( + ) or 'cityscape' in response.text.lower() file.writelines('result:' + str(result) + ', reason: performance boost: buildings not in ' + response.text + '\n') @@ -730,7 +733,8 @@ def Qwen_vl_testcase(config, pipe, file): messages.append(dict(role='user', content=DESC)) response = pipe(messages) result = 'buildings' in response.text.lower( - ) or '楼' in response.text.lower() or 'skyline' in response.text.lower() + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower( + ) or 'cityscape' in response.text.lower() file.writelines('result:' + str(result) + ', reason: performance boost second: buildings not in ' + response.text + '\n') From 10ff0339cb8bd1b2f3c6ddd0c47a903d08d27aaf Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Tue, 26 Nov 2024 13:37:10 +0800 Subject: [PATCH 8/9] update --- autotest/config-v100.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/autotest/config-v100.yaml b/autotest/config-v100.yaml index b2714087f4..507f81ceb6 100644 --- a/autotest/config-v100.yaml +++ b/autotest/config-v100.yaml @@ -97,9 +97,14 @@ pytorch_vl_model: turbomind_quatization: no_awq: + - meta-llama/Meta-Llama-3-1-8B-Instruct + - meta-llama/Meta-Llama-3-8B-Instruct + - internlm/internlm-xcomposer2d5-7b + - OpenGVLab/Mini-InternVL-Chat-2B-V1-5 - Qwen/Qwen2-VL-2B-Instruct - Qwen/Qwen2-VL-7B-Instruct - mistralai/Mistral-7B-Instruct-v0.3 + - THUDM/glm-4-9b-chat - deepseek-ai/deepseek-coder-1.3b-instruct - codellama/CodeLlama-7b-Instruct-hf gptq: From a221cd60969fb518d033cdc485eca79941ff53bd Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Tue, 26 Nov 2024 18:31:13 +0800 Subject: [PATCH 9/9] update --- autotest/config.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/autotest/config.yaml b/autotest/config.yaml index b11a21523c..1f78411ee9 100644 --- a/autotest/config.yaml +++ b/autotest/config.yaml @@ -19,6 +19,7 @@ tp_config: Qwen2-7B-Instruct-GPTQ-Int4: 2 InternVL2-40B: 2 MiniCPM-V-2_6: 2 + Qwen2.5-72B-Instruct: 4 turbomind_chat_model: - meta-llama/Llama-3.2-1B-Instruct @@ -237,7 +238,8 @@ benchmark_model: - internlm/internlm2_5-7b-chat - internlm/internlm2_5-20b-chat - THUDM/glm-4-9b-chat - - Qwen/Qwen2-7B-Instruct + - Qwen/Qwen2.5-7B-Instruct + - Qwen/Qwen2.5-72B-Instruct - mistralai/Mistral-7B-Instruct-v0.3 - mistralai/Mixtral-8x7B-Instruct-v0.1 - deepseek-ai/DeepSeek-V2-Lite-Chat