diff --git a/lmms_eval/__init__.py b/lmms_eval/__init__.py index 88a91ce1..317c0291 100755 --- a/lmms_eval/__init__.py +++ b/lmms_eval/__init__.py @@ -1 +1 @@ -from .evaluator import evaluate,simple_evaluate \ No newline at end of file +from .evaluator import evaluate, simple_evaluate diff --git a/lmms_eval/evaluator.py b/lmms_eval/evaluator.py index fff43fb4..ed0e8115 100755 --- a/lmms_eval/evaluator.py +++ b/lmms_eval/evaluator.py @@ -44,8 +44,6 @@ @positional_deprecated def simple_evaluate( - tuned_model, - tuned_model_tokenizer, model, model_args: Optional[Union[str, dict]] = None, tasks: Optional[List[Union[str, dict, object]]] = None, @@ -75,6 +73,7 @@ def simple_evaluate( torch_random_seed: int = 1234, fewshot_random_seed: int = 1234, cli_args=None, + **kwargs, ): """Instantiate and evaluate a model on a list of tasks. @@ -163,15 +162,16 @@ def simple_evaluate( model_args = "" ModelClass = get_model(model) - lm = ModelClass.create_from_arg_string( - tuned_model, - tuned_model_tokenizer, - model_args, - { - "batch_size": batch_size, - "device": device, - }, - ) + additional_config = { + "batch_size": batch_size, + "device": device, + } + + if "tuned_model" in kwargs and "tuned_model_tokenizer" in kwargs: + additional_config["tuned_model"] = kwargs["tuned_model"] + additional_config["tuned_model_tokenizer"] = kwargs["tuned_model_tokenizer"] + + lm = ModelClass.create_from_arg_string(model_args, additional_config) if task_manager is None: task_manager = TaskManager(verbosity, model_name=model) @@ -598,10 +598,9 @@ def evaluate( if os.path.exists(f"{cli_args.output_path}/rank{int(os.environ.get('RANK', 0))}_metric_eval_done.txt"): os.remove(f"{cli_args.output_path}/rank{int(os.environ.get('RANK', 0))}_metric_eval_done.txt") - if not cli_args.output_path.exists(): cli_args.output_path.mkdir(parents=True, exist_ok=True) - + if lm.rank == 0: ### Get task ordering for correct sample-wide aggregation group_to_task = {} @@ -714,20 +713,19 @@ def evaluate( } if log_samples: results_dict["samples"] = dict(samples) - + with open(f"{cli_args.output_path}/rank{int(os.environ.get('RANK', 0))}_metric_eval_done.txt", "w") as f: f.write(f"rank {int(os.environ.get('RANK', 0))} eval done") return results_dict - + else: results_dict = None - with open(f"{cli_args.output_path}/rank{int(os.environ.get('RANK', 0))}_metric_eval_done.txt", "w") as f: f.write(f"rank {int(os.environ.get('RANK', 0))} eval done") while len([file for file in os.listdir(cli_args.output_path) if file.endswith("metric_eval_done.txt")]) < lm._world_size: time.sleep(1) - + else: return None diff --git a/lmms_eval/models/gpt4v.py b/lmms_eval/models/gpt4v.py index 2eebaf3c..aac62a87 100755 --- a/lmms_eval/models/gpt4v.py +++ b/lmms_eval/models/gpt4v.py @@ -109,11 +109,11 @@ def encode_video(self, video_path, for_get_frames_num): vr = VideoReader(video_path, ctx=cpu(0)) total_frame_num = len(vr) uniform_sampled_frames = np.linspace(0, total_frame_num - 1, for_get_frames_num, dtype=int) - + # Ensure the last frame is included if total_frame_num - 1 not in uniform_sampled_frames: uniform_sampled_frames = np.append(uniform_sampled_frames, total_frame_num - 1) - + frame_idx = uniform_sampled_frames.tolist() frames = vr.get_batch(frame_idx).asnumpy() diff --git a/lmms_eval/models/model_utils/load_video.py b/lmms_eval/models/model_utils/load_video.py index 5bf9b56a..0c4ea23d 100644 --- a/lmms_eval/models/model_utils/load_video.py +++ b/lmms_eval/models/model_utils/load_video.py @@ -38,11 +38,11 @@ def read_video_pyav(video_path, num_frm=8): total_frames = container.streams.video[0].frames sampled_frm = min(total_frames, num_frm) indices = np.linspace(0, total_frames - 1, sampled_frm, dtype=int) - + # Append the last frame index if not already included if total_frames - 1 not in indices: indices = np.append(indices, total_frames - 1) - + frames = record_video_length_stream(container, indices) except: container = av.open(video_path) @@ -50,11 +50,11 @@ def read_video_pyav(video_path, num_frm=8): total_frames = len(frames) sampled_frm = min(total_frames, num_frm) indices = np.linspace(0, total_frames - 1, sampled_frm, dtype=int) - + # Append the last frame index if not already included if total_frames - 1 not in indices: indices = np.append(indices, total_frames - 1) - + frames = [frames[i] for i in indices] else: container = av.open(video_path) @@ -62,10 +62,10 @@ def read_video_pyav(video_path, num_frm=8): total_frames = len(frames) sampled_frm = min(total_frames, num_frm) indices = np.linspace(0, total_frames - 1, sampled_frm, dtype=int) - + # Append the last frame index if not already included if total_frames - 1 not in indices: indices = np.append(indices, total_frames - 1) - + frames = [frames[i] for i in indices] return np.stack([x.to_ndarray(format="rgb24") for x in frames]) diff --git a/lmms_eval/utils.py b/lmms_eval/utils.py index e101e2b7..0e940395 100755 --- a/lmms_eval/utils.py +++ b/lmms_eval/utils.py @@ -32,6 +32,7 @@ import gc from itertools import islice + import numpy as np import pytz import torch