From 637796268280e867d60b0bedb82cac2dc50fc44c Mon Sep 17 00:00:00 2001 From: kcz358 Date: Wed, 18 Dec 2024 03:28:00 +0000 Subject: [PATCH] Add kino --- lmms_eval/models/__init__.py | 1 + lmms_eval/models/kino.py | 327 +++++++++++++++++++++++++++++++++++ 2 files changed, 328 insertions(+) create mode 100644 lmms_eval/models/kino.py diff --git a/lmms_eval/models/__init__.py b/lmms_eval/models/__init__.py index ddf281f6..4134ef77 100644 --- a/lmms_eval/models/__init__.py +++ b/lmms_eval/models/__init__.py @@ -55,6 +55,7 @@ "videochat2": "VideoChat2", "llama_vision": "LlamaVision", "aria": "Aria", + "kino": "Kino", } diff --git a/lmms_eval/models/kino.py b/lmms_eval/models/kino.py new file mode 100644 index 00000000..be5d072f --- /dev/null +++ b/lmms_eval/models/kino.py @@ -0,0 +1,327 @@ +import warnings +from typing import List, Optional, Tuple, Union + +import numpy as np +import PIL +import torch +from accelerate import Accelerator, DistributedType +from accelerate.state import AcceleratorState +from decord import VideoReader, cpu +from synvo_engine.models.kino import KinoForConditionalGeneration +from synvo_engine.models.kino.processing_kino import KinoProcessor +from tqdm import tqdm +from transformers import AutoConfig, AutoProcessor + +from lmms_eval import utils +from lmms_eval.api.instance import Instance +from lmms_eval.api.model import lmms +from lmms_eval.api.registry import register_model +from lmms_eval.models.model_utils.audio_processing import downsample_audio + +warnings.filterwarnings("ignore") + +from loguru import logger as eval_logger + +DEFAULT_IMAGE_TOKEN = "" +DEFAULT_VIDEO_TOKEN = "