Lint

EvolvingLMMs-Lab · Sep 1, 2024 · e973027 · e973027
1 parent ad0f9cb
commit e973027
Show file tree

Hide file tree

Showing 15 changed files with 162 additions and 147 deletions.
diff --git a/lmms_eval/models/gpt4v.py b/lmms_eval/models/gpt4v.py
@@ -109,11 +109,11 @@ def encode_video(self, video_path, for_get_frames_num):
         vr = VideoReader(video_path, ctx=cpu(0))
         total_frame_num = len(vr)
         uniform_sampled_frames = np.linspace(0, total_frame_num - 1, for_get_frames_num, dtype=int)
-        
+
         # Ensure the last frame is included
         if total_frame_num - 1 not in uniform_sampled_frames:
             uniform_sampled_frames = np.append(uniform_sampled_frames, total_frame_num - 1)
-        
+
         frame_idx = uniform_sampled_frames.tolist()
         frames = vr.get_batch(frame_idx).asnumpy()
 

diff --git a/lmms_eval/models/mantis.py b/lmms_eval/models/mantis.py
@@ -4,29 +4,29 @@
 
 
 import copy
-from tqdm import tqdm
+import warnings
 from datetime import timedelta
+from typing import List, Optional, Tuple, Union
+
+from accelerate import Accelerator, DistributedType, InitProcessGroupKwargs
+from accelerate.state import AcceleratorState
+from loguru import logger as eval_logger
+from packaging import version
+from tqdm import tqdm
 
 from lmms_eval import utils
 from lmms_eval.api.instance import Instance
 from lmms_eval.api.model import lmms
 from lmms_eval.api.registry import register_model
 from lmms_eval.utils import stop_sequences_criteria
 
-from accelerate import Accelerator, DistributedType, InitProcessGroupKwargs
-from accelerate.state import AcceleratorState
-from typing import List, Optional, Union, Tuple
-from packaging import version
-import warnings
-
-from loguru import logger as eval_logger
-
 warnings.filterwarnings("ignore")
 
 try:
-    from mantis.models.mllava import LlavaForConditionalGeneration, MLlavaProcessor
+    from mantis.models.conversation import conv_mllava_v1 as default_conv
+    from mantis.models.conversation import conv_templates
     from mantis.models.mfuyu import MFuyuForCausalLM, MFuyuProcessor
-    from mantis.models.conversation import conv_mllava_v1 as default_conv, conv_templates
+    from mantis.models.mllava import LlavaForConditionalGeneration, MLlavaProcessor
 
 except Exception as e:
     eval_logger.debug("Mantis is not installed. Please install Mantis to use this model.\nError: %s" % e)

diff --git a/lmms_eval/models/model_utils/load_video.py b/lmms_eval/models/model_utils/load_video.py
@@ -38,34 +38,34 @@ def read_video_pyav(video_path, num_frm=8):
             total_frames = container.streams.video[0].frames
             sampled_frm = min(total_frames, num_frm)
             indices = np.linspace(0, total_frames - 1, sampled_frm, dtype=int)
-            
+
             # Append the last frame index if not already included
             if total_frames - 1 not in indices:
                 indices = np.append(indices, total_frames - 1)
-                
+
             frames = record_video_length_stream(container, indices)
         except:
             container = av.open(video_path)
             frames = record_video_length_packet(container)
             total_frames = len(frames)
             sampled_frm = min(total_frames, num_frm)
             indices = np.linspace(0, total_frames - 1, sampled_frm, dtype=int)
-            
+
             # Append the last frame index if not already included
             if total_frames - 1 not in indices:
                 indices = np.append(indices, total_frames - 1)
-                
+
             frames = [frames[i] for i in indices]
     else:
         container = av.open(video_path)
         frames = record_video_length_packet(container)
         total_frames = len(frames)
         sampled_frm = min(total_frames, num_frm)
         indices = np.linspace(0, total_frames - 1, sampled_frm, dtype=int)
-        
+
         # Append the last frame index if not already included
         if total_frames - 1 not in indices:
             indices = np.append(indices, total_frames - 1)
-            
+
         frames = [frames[i] for i in indices]
     return np.stack([x.to_ndarray(format="rgb24") for x in frames])
diff --git a/lmms_eval/tasks/mirb/utils.py b/lmms_eval/tasks/mirb/utils.py
@@ -1,10 +1,10 @@
-from lmms_eval.filters.extraction import ExtendedRegexFilter
-from lmms_eval.filters.transformation import MapFilter
+import logging
 import re
-import numpy as np
 
+import numpy as np
 
-import logging
+from lmms_eval.filters.extraction import ExtendedRegexFilter
+from lmms_eval.filters.transformation import MapFilter
 
 eval_logger = logging.getLogger("lmms-eval")
 

diff --git a/lmms_eval/tasks/refcoco+/utils_rec.py b/lmms_eval/tasks/refcoco+/utils_rec.py
@@ -1,5 +1,6 @@
-import re 
 import logging
+import re
+
 from datasets import Dataset
 
 eval_logger = logging.getLogger("lmms-eval")
@@ -14,22 +15,17 @@ def refcoco_bbox_rec_preprocess_dataset(dataset: Dataset):
 
     # Original bbox format (top x, top y, width, height)
     # Convert to (top-left x, top-left y, bottom-right x, bottom-right y)
-    # Normalize the bounding box coordinates to be between 0 and 1 
+    # Normalize the bounding box coordinates to be between 0 and 1
     # using the image width and height
-    dataset = dataset.map(
-        lambda x: {"bbox": [x["bbox"][0] / x["image_width"], 
-                            x["bbox"][1] / x["image_height"],
-                           (x["bbox"][0] + x["bbox"][2]) / x["image_width"],
-                           (x["bbox"][1] + x["bbox"][3]) / x["image_height"]]}
-    )
+    dataset = dataset.map(lambda x: {"bbox": [x["bbox"][0] / x["image_width"], x["bbox"][1] / x["image_height"], (x["bbox"][0] + x["bbox"][2]) / x["image_width"], (x["bbox"][1] + x["bbox"][3]) / x["image_height"]]})
 
     # currently, the dataset has `answer` as a list of strings
     # each answer should be its own row
     # we will explode the dataset to have one row per answer
     # duplicate the other columns
     def explode_answers(example):
-        answers = example.pop('answer')
-        return [{'answer': answer, **example} for answer in answers]
+        answers = example.pop("answer")
+        return [{"answer": answer, **example} for answer in answers]
 
     # Apply the function to each element, collecting the results
     exploded_rows = []
@@ -50,8 +46,11 @@ def refcoco_bbox_rec_doc_to_visual(doc):
 
 
 def refcoco_bbox_rec_doc_to_text(doc):
-    assert isinstance(doc['answer'], str), "Answer must be a string"
-    return "Bounding box coordinates are specified in the format (top-left x, top-left y, bottom-right x, bottom-right y). All values are floating point numbers bounded between 0 and 1. Please provide the bounding box coordinate of the region this sentence describes: " + doc['answer']
+    assert isinstance(doc["answer"], str), "Answer must be a string"
+    return (
+        "Bounding box coordinates are specified in the format (top-left x, top-left y, bottom-right x, bottom-right y). All values are floating point numbers bounded between 0 and 1. Please provide the bounding box coordinate of the region this sentence describes: "
+        + doc["answer"]
+    )
 
 
 def parse_float_sequence_within(input_str):
@@ -65,15 +64,15 @@ def parse_float_sequence_within(input_str):
     list: A list of four floats if the pattern is found, or a list of four zeros if the pattern is not found.
     """
     # Define the regex pattern to find the first instance of four floats within square brackets
-    pattern = r'\[\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?)\s*\]'
-    
+    pattern = r"\[\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?)\s*\]"
+
     # Use re.search to find the first match of the pattern in the input string
     match = re.search(pattern, input_str)
-    
+
     # If a match is found, convert the captured groups into a list of floats
     if match:
         return [float(match.group(i)) for i in range(1, 5)]
-    
+
     # If the input does not contain the pattern, return the null float sequence
     return [0, 0, 0, 0]
 
@@ -89,7 +88,7 @@ def refcoco_bbox_rec_process_result(doc, result):
     pred = result[0] if len(result) > 0 else ""
     pred = parse_float_sequence_within(pred)
     ann_id = doc["question_id"]
-    data_dict = {"answer": doc["answer"], "pred": pred, "ann_id": ann_id, 'bbox': doc['bbox']}
+    data_dict = {"answer": doc["answer"], "pred": pred, "ann_id": ann_id, "bbox": doc["bbox"]}
     return {f"refcoco_{metric}": data_dict for metric in COCO_REC_METRICS}
 
 
@@ -173,19 +172,19 @@ def refcoco_bbox_rec_aggregation_result(results, metric):
     - dict: Dictionary containing the aggregated results for the specified metric.
     """
     scorers = {
-        'IoU': compute_iou,
-        '[email protected]': lambda x, y: compute_accuracy(x, y, 0.1),
-        '[email protected]': lambda x, y: compute_accuracy(x, y, 0.3),
-        '[email protected]': lambda x, y: compute_accuracy(x, y, 0.5),
-        '[email protected]': lambda x, y: compute_accuracy(x, y, 0.7),
-        '[email protected]': lambda x, y: compute_accuracy(x, y, 0.9),
-        'Center_ACC': compute_center_accuracy
+        "IoU": compute_iou,
+        "[email protected]": lambda x, y: compute_accuracy(x, y, 0.1),
+        "[email protected]": lambda x, y: compute_accuracy(x, y, 0.3),
+        "[email protected]": lambda x, y: compute_accuracy(x, y, 0.5),
+        "[email protected]": lambda x, y: compute_accuracy(x, y, 0.7),
+        "[email protected]": lambda x, y: compute_accuracy(x, y, 0.9),
+        "Center_ACC": compute_center_accuracy,
     }
     results_dict = {metric: []}
     for result in results:
         # Extract the ground truth and predicted bounding boxes
-        gt_bbox = result['bbox']
-        pred_bbox = result['pred']
+        gt_bbox = result["bbox"]
+        pred_bbox = result["pred"]
         # Compute the specified metric between the ground truth and predicted bounding boxes
         score = scorers[metric](gt_bbox, pred_bbox)
         results_dict[metric].append(score)
@@ -201,6 +200,7 @@ def refcoco_bbox_rec_iou(results):
 def refcoco_bbox_rec_acc01(results):
     return refcoco_bbox_rec_aggregation_result(results, "[email protected]")
 
+
 def refcoco_bbox_rec_acc03(results):
     return refcoco_bbox_rec_aggregation_result(results, "[email protected]")
 

diff --git a/lmms_eval/tasks/refcoco/utils_rec.py b/lmms_eval/tasks/refcoco/utils_rec.py
@@ -1,5 +1,6 @@
-import re 
 import logging
+import re
+
 from datasets import Dataset
 
 eval_logger = logging.getLogger("lmms-eval")
@@ -14,22 +15,17 @@ def refcoco_bbox_rec_preprocess_dataset(dataset: Dataset):
 
     # Original bbox format (top x, top y, width, height)
     # Convert to (top-left x, top-left y, bottom-right x, bottom-right y)
-    # Normalize the bounding box coordinates to be between 0 and 1 
+    # Normalize the bounding box coordinates to be between 0 and 1
     # using the image width and height
-    dataset = dataset.map(
-        lambda x: {"bbox": [x["bbox"][0] / x["image_width"], 
-                            x["bbox"][1] / x["image_height"],
-                           (x["bbox"][0] + x["bbox"][2]) / x["image_width"],
-                           (x["bbox"][1] + x["bbox"][3]) / x["image_height"]]}
-    )
+    dataset = dataset.map(lambda x: {"bbox": [x["bbox"][0] / x["image_width"], x["bbox"][1] / x["image_height"], (x["bbox"][0] + x["bbox"][2]) / x["image_width"], (x["bbox"][1] + x["bbox"][3]) / x["image_height"]]})
 
     # currently, the dataset has `answer` as a list of strings
     # each answer should be its own row
     # we will explode the dataset to have one row per answer
     # duplicate the other columns
     def explode_answers(example):
-        answers = example.pop('answer')
-        return [{'answer': answer, **example} for answer in answers]
+        answers = example.pop("answer")
+        return [{"answer": answer, **example} for answer in answers]
 
     # Apply the function to each element, collecting the results
     exploded_rows = []
@@ -50,8 +46,11 @@ def refcoco_bbox_rec_doc_to_visual(doc):
 
 
 def refcoco_bbox_rec_doc_to_text(doc):
-    assert isinstance(doc['answer'], str), "Answer must be a string"
-    return "Bounding box coordinates are specified in the format (top-left x, top-left y, bottom-right x, bottom-right y). All values are floating point numbers bounded between 0 and 1. Please provide the bounding box coordinate of the region this sentence describes: " + doc['answer']
+    assert isinstance(doc["answer"], str), "Answer must be a string"
+    return (
+        "Bounding box coordinates are specified in the format (top-left x, top-left y, bottom-right x, bottom-right y). All values are floating point numbers bounded between 0 and 1. Please provide the bounding box coordinate of the region this sentence describes: "
+        + doc["answer"]
+    )
 
 
 def parse_float_sequence_within(input_str):
@@ -65,15 +64,15 @@ def parse_float_sequence_within(input_str):
     list: A list of four floats if the pattern is found, or a list of four zeros if the pattern is not found.
     """
     # Define the regex pattern to find the first instance of four floats within square brackets
-    pattern = r'\[\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?)\s*\]'
-    
+    pattern = r"\[\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?)\s*\]"
+
     # Use re.search to find the first match of the pattern in the input string
     match = re.search(pattern, input_str)
-    
+
     # If a match is found, convert the captured groups into a list of floats
     if match:
         return [float(match.group(i)) for i in range(1, 5)]
-    
+
     # If the input does not contain the pattern, return the null float sequence
     return [0, 0, 0, 0]
 
@@ -89,7 +88,7 @@ def refcoco_bbox_rec_process_result(doc, result):
     pred = result[0] if len(result) > 0 else ""
     pred = parse_float_sequence_within(pred)
     ann_id = doc["question_id"]
-    data_dict = {"answer": doc["answer"], "pred": pred, "ann_id": ann_id, 'bbox': doc['bbox']}
+    data_dict = {"answer": doc["answer"], "pred": pred, "ann_id": ann_id, "bbox": doc["bbox"]}
     return {f"refcoco_{metric}": data_dict for metric in COCO_REC_METRICS}
 
 
@@ -173,19 +172,19 @@ def refcoco_bbox_rec_aggregation_result(results, metric):
     - dict: Dictionary containing the aggregated results for the specified metric.
     """
     scorers = {
-        'IoU': compute_iou,
-        '[email protected]': lambda x, y: compute_accuracy(x, y, 0.1),
-        '[email protected]': lambda x, y: compute_accuracy(x, y, 0.3),
-        '[email protected]': lambda x, y: compute_accuracy(x, y, 0.5),
-        '[email protected]': lambda x, y: compute_accuracy(x, y, 0.7),
-        '[email protected]': lambda x, y: compute_accuracy(x, y, 0.9),
-        'Center_ACC': compute_center_accuracy
+        "IoU": compute_iou,
+        "[email protected]": lambda x, y: compute_accuracy(x, y, 0.1),
+        "[email protected]": lambda x, y: compute_accuracy(x, y, 0.3),
+        "[email protected]": lambda x, y: compute_accuracy(x, y, 0.5),
+        "[email protected]": lambda x, y: compute_accuracy(x, y, 0.7),
+        "[email protected]": lambda x, y: compute_accuracy(x, y, 0.9),
+        "Center_ACC": compute_center_accuracy,
     }
     results_dict = {metric: []}
     for result in results:
         # Extract the ground truth and predicted bounding boxes
-        gt_bbox = result['bbox']
-        pred_bbox = result['pred']
+        gt_bbox = result["bbox"]
+        pred_bbox = result["pred"]
         # Compute the specified metric between the ground truth and predicted bounding boxes
         score = scorers[metric](gt_bbox, pred_bbox)
         results_dict[metric].append(score)
@@ -201,6 +200,7 @@ def refcoco_bbox_rec_iou(results):
 def refcoco_bbox_rec_acc01(results):
     return refcoco_bbox_rec_aggregation_result(results, "[email protected]")
 
+
 def refcoco_bbox_rec_acc03(results):
     return refcoco_bbox_rec_aggregation_result(results, "[email protected]")