Skip to content

Commit

Permalink
Joshua/vizwizvqa refactor (EvolvingLMMs-Lab#42)
Browse files Browse the repository at this point in the history
* refactor vizwizvqa task

* Merge commit '59c7d67077c315657a02bdee2eace0e64c1ee0d4'

* Fix exact_match accuracy calculation in vizwiz_vqa_process_results

* Update vizwiz_vqa tasks

---------

Co-authored-by: Fanyi Pu <[email protected]>
  • Loading branch information
JvThunder and pufanyi authored Feb 6, 2024
1 parent 3b0e5e9 commit 058a7d4
Show file tree
Hide file tree
Showing 15 changed files with 174 additions and 342 deletions.
2 changes: 1 addition & 1 deletion example_eval.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
- model: llava
model_args: pretrained=liuhaotian/llava-v1.5-7b
tasks: docvqa_val,docvqa_test,infovqa_val,infovqa_test
tasks: vizwiz_vqa
batch_size: 1
log_samples: true
log_samples_suffix: debug
Expand Down
3 changes: 2 additions & 1 deletion lmms_eval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
accelerator.wait_for_everyone()
if is_main_process:
wandb_logger.log_eval_result(results)
wandb_logger.write_to_report(results)
if wandb_logger.online():
wandb_logger.write_to_report(results)
wandb_logger.finish()
results_list.append(results)

Expand Down
5 changes: 4 additions & 1 deletion lmms_eval/logging_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def write_to_report(self, results):
report = self.wr.Report(
project=self.run.project,
entity=self.run.entity,
title=f"({datetime.now().strftime('%Y-%m-%d %H:%M:%S')}) xxx - Evaluation report",
title=f"({datetime.now().strftime('%Y-%m-%d %H:%M:%S')}) {self.run.id} - Evaluation report",
description=f"Evaluation run by: {self.run.entity} logged to {self.run.url}",
)

Expand Down Expand Up @@ -301,3 +301,6 @@ def write_to_report(self, results):

def finish(self):
self.run.finish()

def online(self):
return self.run.offline is False
10 changes: 10 additions & 0 deletions lmms_eval/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,13 @@
from .qwen_vl import Qwen_VL
from .fuyu import Fuyu
from .gpt4v import GPT4V

import os

try:
# enabling faster model download
import hf_transfer

os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
except ImportError:
pass
79 changes: 72 additions & 7 deletions lmms_eval/models/otterhd.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from accelerate import Accelerator, DistributedType
from transformers import FuyuForCausalLM, AutoTokenizer, FuyuImageProcessor, FuyuProcessor
from lmms_eval.api.model import lmms
from lmms_eval.api.registry import register_model
Expand All @@ -8,6 +9,13 @@
from lmms_eval.api.instance import Instance
from tqdm import tqdm

import warnings
import logging

warnings.filterwarnings("ignore")

eval_logger = logging.getLogger("lmms-eval")


@register_model("otterhd")
class OtterHD(lmms):
Expand All @@ -28,29 +36,86 @@ def __init__(
# Do not use kwargs for now
assert kwargs == {}, f"Unexpected kwargs: {kwargs}"

self.device = device if torch.cuda.is_available() else "cpu"
self.model = FuyuForCausalLM.from_pretrained(pretrained, torch_dtype=torch.bfloat16, device_map=self.device)
accelerator = Accelerator()
if accelerator.num_processes > 1:
self._device = torch.device(f"cuda:{accelerator.local_process_index}")
else:
self._device = device

self._model = FuyuForCausalLM.from_pretrained(pretrained, torch_dtype=torch.bfloat16, device_map=self._device)
self.model.eval()
self.tokenizer = AutoTokenizer.from_pretrained(pretrained)
self.model.tie_weights()
self._tokenizer = AutoTokenizer.from_pretrained(pretrained)
self._config = self.model.config

height, width = map(int, resolution.split("x"))
self.image_processor = FuyuImageProcessor(size={"height": height, "width": width})
self.processor = FuyuProcessor(image_processor=self.image_processor, tokenizer=self.tokenizer)
self.max_new_tokens = max_new_tokens
self.batch_size_per_gpu = int(batch_size)

if accelerator.num_processes > 1:
assert accelerator.distributed_type in [
DistributedType.FSDP,
DistributedType.MULTI_GPU,
], "Unsupported distributed type provided. Only DDP and FSDP are supported."
if accelerator.distributed_type == DistributedType.FSDP:
self._model = accelerator.prepare(self.model)
else:
self._model = accelerator.prepare_model(self.model, evaluation_mode=True)
self.accelerator = accelerator
if self.accelerator.is_local_main_process:
eval_logger.info(f"Using {accelerator.num_processes} devices with data parallelism")
self._rank = self.accelerator.local_process_index
self._world_size = self.accelerator.num_processes
else:
self.model.to(self._device)
self._rank = 0
self._word_size = 1

@property
def config(self):
# return the associated transformers.AutoConfig for the given pretrained model.
return self._config

@property
def tokenizer(self):
return self._tokenizer

@property
def model(self):
# returns the model, unwrapping it if using Accelerate
if hasattr(self, "accelerator"):
return self.accelerator.unwrap_model(self._model)
else:
return self._model

@property
def eot_token_id(self):
# we use EOT because end of *text* is more accurate for what we're doing than end of *sentence*
return self.tokenizer.eos_token_id

@property
def max_length(self):
# Assuming max_length is the sum of max context tokens and max new tokens
return self.tokenizer.model_max_length

# @property
# def max_gen_toks(self) -> int:
# return self.max_new_tokens

@property
def batch_size(self):
return self.batch_size_per_gpu

@property
def device(self):
return self._device

@property
def rank(self):
return self._rank

@property
def world_size(self):
return self._world_size

def flatten(self, input, only_get_first=False):
new_list = []
for i in input:
Expand Down
2 changes: 2 additions & 0 deletions lmms_eval/tasks/seedbench/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ def seed_aggregation_result_all(results):

return score


def seed_doc_to_text_mc(doc):
question = doc["question"]
return f"{question} Answer :"


def seed_doc_to_choice(doc):
return [doc["choice_a"], doc["choice_b"], doc["choice_c"], doc["choice_d"]]

Expand Down
10 changes: 10 additions & 0 deletions lmms_eval/tasks/vizwiz_vqa/_default_template_vqa_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
dataset_path: lmms-lab/VizWiz-VQA
output_type: generate_until
doc_to_visual: !function utils.vizwiz_vqa_doc_to_visual
doc_to_text: !function utils.vizwiz_vqa_doc_to_text
doc_to_target: "answer"
generation_kwargs:
until:
- "ASSISTANT:"
metadata:
- version: 0.0
25 changes: 25 additions & 0 deletions lmms_eval/tasks/vizwiz_vqa/_generate_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os
import yaml

splits = ["val", "test"]
tasks = ["vqa"]

if __name__ == "__main__":
dump_tasks = []
for task in tasks:
for split in splits:
yaml_dict = {"group": f"vizwiz_{task}", "task": f"vizwiz_{task}_{split}", "include": f"_default_template_{task}_yaml", "test_split": split}
if split == "train":
yaml_dict.pop("group")
else:
dump_tasks.append(f"vizwiz_{task}_{split}")

save_path = f"./vizwiz_{task}_{split}.yaml"
print(f"Saving to {save_path}")
with open(save_path, "w") as f:
yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)

group_dict = {"group": "vizwiz_vqa", "task": dump_tasks}

with open("./_vizwiz_vqa.yaml", "w") as f:
yaml.dump(group_dict, f, default_flow_style=False, indent=4)
4 changes: 4 additions & 0 deletions lmms_eval/tasks/vizwiz_vqa/_vizwiz_vqa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
group: vizwiz_vqa
task:
- vizwiz_vqa_val
- vizwiz_vqa_test
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,6 @@

eval_logger = logging.getLogger("lmms-eval")

with open(pathlib.Path(__file__).parent / "vizwizvqa_test.yaml", "r") as f:
raw_data = f.readlines()
for i in range(len(raw_data)):
raw_data[i] = raw_data[i].replace("!function", "function")

config = yaml.safe_load("".join(raw_data))


class EvalAIAnswerProcessor:
CONTRACTIONS = {
Expand Down Expand Up @@ -223,11 +216,11 @@ def __call__(self, item):
return item


def vizwizvqa_doc_to_visual(doc):
def vizwiz_vqa_doc_to_visual(doc):
return [doc["image"].convert("RGB")]


def vizwizvqa_process_results(doc, result):
def vizwiz_vqa_process_results(doc, result):
eval_ai_processor = EvalAIAnswerProcessor()
assert len(result) == 1, f"The result should be a list of length 1, but got {len(result)}."
resAns = eval_ai_processor(result[0])
Expand All @@ -250,23 +243,33 @@ def vizwizvqa_process_results(doc, result):
accuracy = 0

return {
# "exact_match": accuracy,
"exact_match": accuracy,
"submission": {
"image": f"{doc['question_id']}.jpg",
"answer": resAns,
},
}


def vizwizvqa_doc_to_text(doc):
def vizwiz_vqa_process_results_test(doc, result):
res = vizwiz_vqa_process_results(doc, result)
return {"submission": res["submission"]}


def vizwiz_vqa_process_results_val(doc, result):
res = vizwiz_vqa_process_results(doc, result)
return {"exact_match": res["exact_match"]}


def vizwiz_vqa_doc_to_text(doc):
text = f"{doc['question'].capitalize()}\nWhen the provided information is insufficient, respond with 'Unanswerable'.\nAnswer the question using a single word or phrase."
return text


def vizwizvqa_aggreate_submissions(results):
def vizwiz_vqa_aggreate_submissions(results):
now_date_time = datetime.datetime.now().strftime("%Y-%m%d-%H%M-%S")
os.makedirs("./submissions", exist_ok=True)
submission_file_name = f"./submissions/vizwizvqa-test-submission-{now_date_time}.json"
submission_file_name = f"./submissions/vizwiz_vqa-test-submission-{now_date_time}.json"
path = os.path.abspath(submission_file_name)
with open(path, "w") as f:
json.dump(results, f)
Expand Down
14 changes: 14 additions & 0 deletions lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
group: vizwiz_vqa
task: vizwiz_vqa_test
test_split: test
include: _default_template_vqa_yaml
process_results: !function utils.vizwiz_vqa_process_results_test
metric_list:
# - metric: exact_match
# aggregation: mean
# higher_is_better: true
# ignore_case: true
# ignore_punctuation: true
- metric: submission
aggregation: !function utils.vizwiz_vqa_aggreate_submissions
higher_is_better: true
14 changes: 14 additions & 0 deletions lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_val.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
group: vizwiz_vqa
task: vizwiz_vqa_val
test_split: val
include: _default_template_vqa_yaml
process_results: !function utils.vizwiz_vqa_process_results_val
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
# - metric: submission
# aggregation: !function utils.vizwiz_vqa_aggreate_submissions
# higher_is_better: true
20 changes: 0 additions & 20 deletions lmms_eval/tasks/vizwizvqa_test/vizwizvqa_test.yaml

This file was deleted.

Loading

0 comments on commit 058a7d4

Please sign in to comment.