Skip to content

Commit

Permalink
Lint
Browse files Browse the repository at this point in the history
  • Loading branch information
kcz358 committed Sep 1, 2024
1 parent ad0f9cb commit e973027
Show file tree
Hide file tree
Showing 15 changed files with 162 additions and 147 deletions.
4 changes: 2 additions & 2 deletions lmms_eval/models/gpt4v.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,11 @@ def encode_video(self, video_path, for_get_frames_num):
vr = VideoReader(video_path, ctx=cpu(0))
total_frame_num = len(vr)
uniform_sampled_frames = np.linspace(0, total_frame_num - 1, for_get_frames_num, dtype=int)

# Ensure the last frame is included
if total_frame_num - 1 not in uniform_sampled_frames:
uniform_sampled_frames = np.append(uniform_sampled_frames, total_frame_num - 1)

frame_idx = uniform_sampled_frames.tolist()
frames = vr.get_batch(frame_idx).asnumpy()

Expand Down
22 changes: 11 additions & 11 deletions lmms_eval/models/mantis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,29 @@


import copy
from tqdm import tqdm
import warnings
from datetime import timedelta
from typing import List, Optional, Tuple, Union

from accelerate import Accelerator, DistributedType, InitProcessGroupKwargs
from accelerate.state import AcceleratorState
from loguru import logger as eval_logger
from packaging import version
from tqdm import tqdm

from lmms_eval import utils
from lmms_eval.api.instance import Instance
from lmms_eval.api.model import lmms
from lmms_eval.api.registry import register_model
from lmms_eval.utils import stop_sequences_criteria

from accelerate import Accelerator, DistributedType, InitProcessGroupKwargs
from accelerate.state import AcceleratorState
from typing import List, Optional, Union, Tuple
from packaging import version
import warnings

from loguru import logger as eval_logger

warnings.filterwarnings("ignore")

try:
from mantis.models.mllava import LlavaForConditionalGeneration, MLlavaProcessor
from mantis.models.conversation import conv_mllava_v1 as default_conv
from mantis.models.conversation import conv_templates
from mantis.models.mfuyu import MFuyuForCausalLM, MFuyuProcessor
from mantis.models.conversation import conv_mllava_v1 as default_conv, conv_templates
from mantis.models.mllava import LlavaForConditionalGeneration, MLlavaProcessor

except Exception as e:
eval_logger.debug("Mantis is not installed. Please install Mantis to use this model.\nError: %s" % e)
Expand Down
12 changes: 6 additions & 6 deletions lmms_eval/models/model_utils/load_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,34 +38,34 @@ def read_video_pyav(video_path, num_frm=8):
total_frames = container.streams.video[0].frames
sampled_frm = min(total_frames, num_frm)
indices = np.linspace(0, total_frames - 1, sampled_frm, dtype=int)

# Append the last frame index if not already included
if total_frames - 1 not in indices:
indices = np.append(indices, total_frames - 1)

frames = record_video_length_stream(container, indices)
except:
container = av.open(video_path)
frames = record_video_length_packet(container)
total_frames = len(frames)
sampled_frm = min(total_frames, num_frm)
indices = np.linspace(0, total_frames - 1, sampled_frm, dtype=int)

# Append the last frame index if not already included
if total_frames - 1 not in indices:
indices = np.append(indices, total_frames - 1)

frames = [frames[i] for i in indices]
else:
container = av.open(video_path)
frames = record_video_length_packet(container)
total_frames = len(frames)
sampled_frm = min(total_frames, num_frm)
indices = np.linspace(0, total_frames - 1, sampled_frm, dtype=int)

# Append the last frame index if not already included
if total_frames - 1 not in indices:
indices = np.append(indices, total_frames - 1)

frames = [frames[i] for i in indices]
return np.stack([x.to_ndarray(format="rgb24") for x in frames])
8 changes: 4 additions & 4 deletions lmms_eval/tasks/mirb/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from lmms_eval.filters.extraction import ExtendedRegexFilter
from lmms_eval.filters.transformation import MapFilter
import logging
import re
import numpy as np

import numpy as np

import logging
from lmms_eval.filters.extraction import ExtendedRegexFilter
from lmms_eval.filters.transformation import MapFilter

eval_logger = logging.getLogger("lmms-eval")

Expand Down
52 changes: 26 additions & 26 deletions lmms_eval/tasks/refcoco+/utils_rec.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import logging
import re

from datasets import Dataset

eval_logger = logging.getLogger("lmms-eval")
Expand All @@ -14,22 +15,17 @@ def refcoco_bbox_rec_preprocess_dataset(dataset: Dataset):

# Original bbox format (top x, top y, width, height)
# Convert to (top-left x, top-left y, bottom-right x, bottom-right y)
# Normalize the bounding box coordinates to be between 0 and 1
# Normalize the bounding box coordinates to be between 0 and 1
# using the image width and height
dataset = dataset.map(
lambda x: {"bbox": [x["bbox"][0] / x["image_width"],
x["bbox"][1] / x["image_height"],
(x["bbox"][0] + x["bbox"][2]) / x["image_width"],
(x["bbox"][1] + x["bbox"][3]) / x["image_height"]]}
)
dataset = dataset.map(lambda x: {"bbox": [x["bbox"][0] / x["image_width"], x["bbox"][1] / x["image_height"], (x["bbox"][0] + x["bbox"][2]) / x["image_width"], (x["bbox"][1] + x["bbox"][3]) / x["image_height"]]})

# currently, the dataset has `answer` as a list of strings
# each answer should be its own row
# we will explode the dataset to have one row per answer
# duplicate the other columns
def explode_answers(example):
answers = example.pop('answer')
return [{'answer': answer, **example} for answer in answers]
answers = example.pop("answer")
return [{"answer": answer, **example} for answer in answers]

# Apply the function to each element, collecting the results
exploded_rows = []
Expand All @@ -50,8 +46,11 @@ def refcoco_bbox_rec_doc_to_visual(doc):


def refcoco_bbox_rec_doc_to_text(doc):
assert isinstance(doc['answer'], str), "Answer must be a string"
return "Bounding box coordinates are specified in the format (top-left x, top-left y, bottom-right x, bottom-right y). All values are floating point numbers bounded between 0 and 1. Please provide the bounding box coordinate of the region this sentence describes: " + doc['answer']
assert isinstance(doc["answer"], str), "Answer must be a string"
return (
"Bounding box coordinates are specified in the format (top-left x, top-left y, bottom-right x, bottom-right y). All values are floating point numbers bounded between 0 and 1. Please provide the bounding box coordinate of the region this sentence describes: "
+ doc["answer"]
)


def parse_float_sequence_within(input_str):
Expand All @@ -65,15 +64,15 @@ def parse_float_sequence_within(input_str):
list: A list of four floats if the pattern is found, or a list of four zeros if the pattern is not found.
"""
# Define the regex pattern to find the first instance of four floats within square brackets
pattern = r'\[\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?)\s*\]'
pattern = r"\[\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?)\s*\]"

# Use re.search to find the first match of the pattern in the input string
match = re.search(pattern, input_str)

# If a match is found, convert the captured groups into a list of floats
if match:
return [float(match.group(i)) for i in range(1, 5)]

# If the input does not contain the pattern, return the null float sequence
return [0, 0, 0, 0]

Expand All @@ -89,7 +88,7 @@ def refcoco_bbox_rec_process_result(doc, result):
pred = result[0] if len(result) > 0 else ""
pred = parse_float_sequence_within(pred)
ann_id = doc["question_id"]
data_dict = {"answer": doc["answer"], "pred": pred, "ann_id": ann_id, 'bbox': doc['bbox']}
data_dict = {"answer": doc["answer"], "pred": pred, "ann_id": ann_id, "bbox": doc["bbox"]}
return {f"refcoco_{metric}": data_dict for metric in COCO_REC_METRICS}


Expand Down Expand Up @@ -173,19 +172,19 @@ def refcoco_bbox_rec_aggregation_result(results, metric):
- dict: Dictionary containing the aggregated results for the specified metric.
"""
scorers = {
'IoU': compute_iou,
'[email protected]': lambda x, y: compute_accuracy(x, y, 0.1),
'[email protected]': lambda x, y: compute_accuracy(x, y, 0.3),
'[email protected]': lambda x, y: compute_accuracy(x, y, 0.5),
'[email protected]': lambda x, y: compute_accuracy(x, y, 0.7),
'[email protected]': lambda x, y: compute_accuracy(x, y, 0.9),
'Center_ACC': compute_center_accuracy
"IoU": compute_iou,
"[email protected]": lambda x, y: compute_accuracy(x, y, 0.1),
"[email protected]": lambda x, y: compute_accuracy(x, y, 0.3),
"[email protected]": lambda x, y: compute_accuracy(x, y, 0.5),
"[email protected]": lambda x, y: compute_accuracy(x, y, 0.7),
"[email protected]": lambda x, y: compute_accuracy(x, y, 0.9),
"Center_ACC": compute_center_accuracy,
}
results_dict = {metric: []}
for result in results:
# Extract the ground truth and predicted bounding boxes
gt_bbox = result['bbox']
pred_bbox = result['pred']
gt_bbox = result["bbox"]
pred_bbox = result["pred"]
# Compute the specified metric between the ground truth and predicted bounding boxes
score = scorers[metric](gt_bbox, pred_bbox)
results_dict[metric].append(score)
Expand All @@ -201,6 +200,7 @@ def refcoco_bbox_rec_iou(results):
def refcoco_bbox_rec_acc01(results):
return refcoco_bbox_rec_aggregation_result(results, "[email protected]")


def refcoco_bbox_rec_acc03(results):
return refcoco_bbox_rec_aggregation_result(results, "[email protected]")

Expand Down
52 changes: 26 additions & 26 deletions lmms_eval/tasks/refcoco/utils_rec.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import logging
import re

from datasets import Dataset

eval_logger = logging.getLogger("lmms-eval")
Expand All @@ -14,22 +15,17 @@ def refcoco_bbox_rec_preprocess_dataset(dataset: Dataset):

# Original bbox format (top x, top y, width, height)
# Convert to (top-left x, top-left y, bottom-right x, bottom-right y)
# Normalize the bounding box coordinates to be between 0 and 1
# Normalize the bounding box coordinates to be between 0 and 1
# using the image width and height
dataset = dataset.map(
lambda x: {"bbox": [x["bbox"][0] / x["image_width"],
x["bbox"][1] / x["image_height"],
(x["bbox"][0] + x["bbox"][2]) / x["image_width"],
(x["bbox"][1] + x["bbox"][3]) / x["image_height"]]}
)
dataset = dataset.map(lambda x: {"bbox": [x["bbox"][0] / x["image_width"], x["bbox"][1] / x["image_height"], (x["bbox"][0] + x["bbox"][2]) / x["image_width"], (x["bbox"][1] + x["bbox"][3]) / x["image_height"]]})

# currently, the dataset has `answer` as a list of strings
# each answer should be its own row
# we will explode the dataset to have one row per answer
# duplicate the other columns
def explode_answers(example):
answers = example.pop('answer')
return [{'answer': answer, **example} for answer in answers]
answers = example.pop("answer")
return [{"answer": answer, **example} for answer in answers]

# Apply the function to each element, collecting the results
exploded_rows = []
Expand All @@ -50,8 +46,11 @@ def refcoco_bbox_rec_doc_to_visual(doc):


def refcoco_bbox_rec_doc_to_text(doc):
assert isinstance(doc['answer'], str), "Answer must be a string"
return "Bounding box coordinates are specified in the format (top-left x, top-left y, bottom-right x, bottom-right y). All values are floating point numbers bounded between 0 and 1. Please provide the bounding box coordinate of the region this sentence describes: " + doc['answer']
assert isinstance(doc["answer"], str), "Answer must be a string"
return (
"Bounding box coordinates are specified in the format (top-left x, top-left y, bottom-right x, bottom-right y). All values are floating point numbers bounded between 0 and 1. Please provide the bounding box coordinate of the region this sentence describes: "
+ doc["answer"]
)


def parse_float_sequence_within(input_str):
Expand All @@ -65,15 +64,15 @@ def parse_float_sequence_within(input_str):
list: A list of four floats if the pattern is found, or a list of four zeros if the pattern is not found.
"""
# Define the regex pattern to find the first instance of four floats within square brackets
pattern = r'\[\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?)\s*\]'
pattern = r"\[\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?),\s*(-?\d+(?:\.\d+)?)\s*\]"

# Use re.search to find the first match of the pattern in the input string
match = re.search(pattern, input_str)

# If a match is found, convert the captured groups into a list of floats
if match:
return [float(match.group(i)) for i in range(1, 5)]

# If the input does not contain the pattern, return the null float sequence
return [0, 0, 0, 0]

Expand All @@ -89,7 +88,7 @@ def refcoco_bbox_rec_process_result(doc, result):
pred = result[0] if len(result) > 0 else ""
pred = parse_float_sequence_within(pred)
ann_id = doc["question_id"]
data_dict = {"answer": doc["answer"], "pred": pred, "ann_id": ann_id, 'bbox': doc['bbox']}
data_dict = {"answer": doc["answer"], "pred": pred, "ann_id": ann_id, "bbox": doc["bbox"]}
return {f"refcoco_{metric}": data_dict for metric in COCO_REC_METRICS}


Expand Down Expand Up @@ -173,19 +172,19 @@ def refcoco_bbox_rec_aggregation_result(results, metric):
- dict: Dictionary containing the aggregated results for the specified metric.
"""
scorers = {
'IoU': compute_iou,
'[email protected]': lambda x, y: compute_accuracy(x, y, 0.1),
'[email protected]': lambda x, y: compute_accuracy(x, y, 0.3),
'[email protected]': lambda x, y: compute_accuracy(x, y, 0.5),
'[email protected]': lambda x, y: compute_accuracy(x, y, 0.7),
'[email protected]': lambda x, y: compute_accuracy(x, y, 0.9),
'Center_ACC': compute_center_accuracy
"IoU": compute_iou,
"[email protected]": lambda x, y: compute_accuracy(x, y, 0.1),
"[email protected]": lambda x, y: compute_accuracy(x, y, 0.3),
"[email protected]": lambda x, y: compute_accuracy(x, y, 0.5),
"[email protected]": lambda x, y: compute_accuracy(x, y, 0.7),
"[email protected]": lambda x, y: compute_accuracy(x, y, 0.9),
"Center_ACC": compute_center_accuracy,
}
results_dict = {metric: []}
for result in results:
# Extract the ground truth and predicted bounding boxes
gt_bbox = result['bbox']
pred_bbox = result['pred']
gt_bbox = result["bbox"]
pred_bbox = result["pred"]
# Compute the specified metric between the ground truth and predicted bounding boxes
score = scorers[metric](gt_bbox, pred_bbox)
results_dict[metric].append(score)
Expand All @@ -201,6 +200,7 @@ def refcoco_bbox_rec_iou(results):
def refcoco_bbox_rec_acc01(results):
return refcoco_bbox_rec_aggregation_result(results, "[email protected]")


def refcoco_bbox_rec_acc03(results):
return refcoco_bbox_rec_aggregation_result(results, "[email protected]")

Expand Down
Loading

0 comments on commit e973027

Please sign in to comment.