Skip to content

Commit

Permalink
Merge pull request #128 from EvolvingLMMs-Lab/videomme
Browse files Browse the repository at this point in the history
Update videomme task [w,w/o subtitle] and modified prompt for ablations
  • Loading branch information
choiszt authored Jun 16, 2024
2 parents f4eeaa9 + c58271a commit 791e087
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 2 deletions.
31 changes: 29 additions & 2 deletions lmms_eval/tasks/videomme/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,37 @@ def videomme_doc_to_visual(doc):


def videomme_doc_to_text(doc, model_specific_prompt_kwargs=None):

option_prompt="Select the best answer to the following multiple-choice question based on the video and the subtitles. Respond with only the letter (A, B, C, or D) of the correct option."
question = doc["question"]
option = str(doc["options"])
question = question + "\n" + option
full_prompt=option_prompt+"\n"+question+"\n"+"The best answer is:"
return full_prompt
# Frames + Subs
# This video's subtitles are listed below:
# 【subtitles】

# Select the best answer to the following multiple-choice question based on the video and the subtitles. Respond with only the letter (A, B, C, or D) of the correct option.
# 【question】
# The best answer is:
# Frames / Frames + Audio
# Select the best answer to the following multiple-choice question based on the video. Respond with only the letter (A, B, C, or D) of the correct option.
# 【question】
# The best answer is:

def videomme_doc_to_text_subtitle(doc, model_specific_prompt_kwargs=None):
subtitles_prompt="This video's subtitles are listed below: \n"
if doc["Subtitle"]=="":
subtitle="No subtitles available"
else:
subtitle=doc["Subtitle"]
option_prompt="Select the best answer to the following multiple-choice question based on the video and the subtitles. Respond with only the letter (A, B, C, or D) of the correct option."
question = doc["question"]
option = str(doc["options"])
question = question + "\n" + option + model_specific_prompt_kwargs["post_prompt"]
return question
question = question + "\n" + option
full_prompt=subtitles_prompt+subtitle+"\n"+option_prompt+"\n"+question+"\n"+"The best answer is:"
return full_prompt


def extract_characters_regex(s):
Expand Down
43 changes: 43 additions & 0 deletions lmms_eval/tasks/videomme/videomme_subtitle.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
dataset_path: lmms-lab/Video-MME
dataset_kwargs:
token: True
cache_dir: videomme
video: True
# From_YouTube: True
task: videomme_subtitle
test_split: test
output_type: generate_until
doc_to_visual: !function utils.videomme_doc_to_visual
doc_to_text: !function utils.videomme_doc_to_text_subtitle
doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 16
temperature: 0
top_p: 1.0
num_beams: 1
do_sample: false
# The return value of process_results will be used by metrics
process_results: !function utils.videomme_process_results
# Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
metric_list:
- metric: videomme_percetion_score
aggregation: !function utils.videomme_aggregate_results
higher_is_better: true
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: ""
# gpt4v:
# pre_prompt: ""
# post_prompt:
# # qwen_vl:
# # pre_prompt: ""
# # post_prompt: " Answer:"
# # otterhd:
# # pre_prompt: ""
# # post_prompt: " Answer:"
# xcomposer2_4khd:
# pre_prompt: "[UNUSED_TOKEN_146]user\n"
# post_prompt: " Answer this question with A, B, C, or D.[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n"
metadata:
- version: 0.0

0 comments on commit 791e087

Please sign in to comment.