add two tasks

EvolvingLMMs-Lab · Sep 19, 2024 · 7746e1d · 7746e1d
1 parent 1618dc7
commit 7746e1d
Show file tree

Hide file tree

Showing 14 changed files with 1,218 additions and 7 deletions.
diff --git a/check_missing.py b/check_missing.py
@@ -0,0 +1,20 @@
+from datasets import load_dataset, Dataset
+
+# Load the deduplicated VideoSearch dataset
+videosearch_dataset = load_dataset('lmms-lab/VideoSearch', 'deduplicated_combined_milestone', split='test')
+
+# ID to be removed
+id_to_remove = 'validation_Biology_18'
+
+# Filter out the row with the missing ID
+filtered_rows = [row for row in videosearch_dataset if row['id'] != id_to_remove]
+
+# Create a new dataset from the filtered rows
+filtered_dataset = Dataset.from_list(filtered_rows)
+
+# Save the filtered dataset locally or push it to Hugging Face hub
+filtered_dataset.push_to_hub("lmms-lab/VideoSearch", "final_combined_milestone", split="test")
+
+# Check and print the number of rows before and after filtering
+print(f"Original dataset size: {len(videosearch_dataset)}")
+print(f"Filtered dataset size: {len(filtered_dataset)}")
diff --git a/check_reverse.py b/check_reverse.py
@@ -0,0 +1,33 @@
+import os
+from datasets import load_dataset
+
+# Load the VideoSearch dataset
+videosearch_dataset = load_dataset('lmms-lab/VideoSearch', 'final_combined_milestone', split='test')
+
+# Path to the videos directory (replace with your actual path)
+videos_directory = '/mnt/sfs-common/krhu/.cache/huggingface/Combined_milestone/videos/'
+
+# Get all IDs from the dataset
+videosearch_ids = set(videosearch_dataset['id'])
+
+# List to store IDs of files that are not in the dataset
+extra_files = []
+
+# Loop through all .mp4 files in the videos directory
+for file in os.listdir(videos_directory):
+    if file.endswith('.mp4'):
+        # Extract the ID from the file name (remove the .mp4 extension)
+        file_id = file.replace('.mp4', '')
+
+        # Check if the file ID exists in the VideoSearch dataset
+        if file_id not in videosearch_ids:
+            extra_files.append(file_id)
+
+# Print the IDs of .mp4 files that are not in the dataset
+if extra_files:
+    print(f"MP4 files not included in the VideoSearch dataset: {extra_files}")
+else:
+    print("All MP4 files have corresponding entries in the VideoSearch dataset.")
+
+# Optionally, print the total number of extra files
+print(f"Total extra MP4 files: {len(extra_files)}")
diff --git a/lmms_eval/models/gpt4v.py b/lmms_eval/models/gpt4v.py
@@ -23,6 +23,7 @@
 from PIL import Image
 
 API_TYPE = os.getenv("API_TYPE", "openai")
+# API_TYPE = "azure"
 NUM_SECONDS_TO_SLEEP = 30
 from loguru import logger as eval_logger
 
@@ -46,9 +47,9 @@
 class GPT4V(lmms):
     def __init__(
         self,
-        model_version: str = "gpt-4-vision-preview",
+        #model_version: str = "gpt-4-vision-preview",
         modality: str = "video",
-        max_frames_num: int = 10,
+        max_frames_num: int = 32,
         timeout: int = 120,
         continual_mode: bool = False,
         response_persistent_folder: str = None,
@@ -58,7 +59,7 @@ def __init__(
         # Manually set a image token for GPT4V so that we can search for it
         # and split the text and image
         # Here we just use the same token as llava for convenient
-        self.model_version = model_version
+        #self.model_version = model_version
         self.modality = modality
         self.max_frames_num = max_frames_num
         self.image_token = "<image>"
@@ -157,8 +158,15 @@ def generate_until(self, requests) -> List[str]:
                     img = self.encode_image(visual)
                     imgs.append(img)
                 elif self.modality == "video":
-                    frames = self.encode_video(visual, self.max_frames_num)
-                    imgs.extend(frames)
+                    # frames = self.encode_video(visual, self.max_frames_num)
+                    # imgs.extend(frames)
+                    try:
+                        frames = self.encode_video(visual, self.max_frames_num)
+                        imgs.extend(frames)
+                    except Exception as e:
+                        # Log the error and skip to the next visual
+                        eval_logger.error(f"Error {e} in encoding video for {visual}")
+                        continue  # Skip this visual and continue with the others
 
             payload = {"messages": []}
             if API_TYPE == "openai":
@@ -185,7 +193,7 @@ def generate_until(self, requests) -> List[str]:
                 payload["messages"][-1]["content"].append({"type": "text", "text": contexts[-1]})
 
             if "max_new_tokens" not in gen_kwargs:
-                gen_kwargs["max_new_tokens"] = 1024
+                gen_kwargs["max_new_tokens"] = 4096
             if gen_kwargs["max_new_tokens"] > 4096:
                 gen_kwargs["max_new_tokens"] = 4096
             if "temperature" not in gen_kwargs:

diff --git a/lmms_eval/tasks/mmmu/mmmu_val.yaml b/lmms_eval/tasks/mmmu/mmmu_val.yaml
@@ -12,5 +12,8 @@ metric_list:
   - metric: mmmu_acc
     aggregation: !function utils.mmmu_aggregate_results
     higher_is_better: true
+  - metric: submission
+    aggregation: !function utils.mmmu_test_aggregate_results_for_submission
+    higher_is_better: true
 
 include: _default_template_yaml
diff --git a/lmms_eval/tasks/mmmu_for_testing/_default_template_yaml b/lmms_eval/tasks/mmmu_for_testing/_default_template_yaml
@@ -0,0 +1,6 @@
+generation_kwargs:
+  max_new_tokens: 4096
+
+metadata:
+  version: 0.0
+  interleaved_format: false
diff --git a/lmms_eval/tasks/mmmu_for_testing/mmmu.yaml b/lmms_eval/tasks/mmmu_for_testing/mmmu.yaml
@@ -0,0 +1,3 @@
+group: mmmu
+task:
+- mmmu_testing_val
diff --git a/lmms_eval/tasks/mmmu_for_testing/mmmu_for_testing.yaml b/lmms_eval/tasks/mmmu_for_testing/mmmu_for_testing.yaml
@@ -0,0 +1,17 @@
+dataset_path: lmms-lab/MMMU_for_testing
+dataset_name: "updated_first_milestone"
+task: "mmmu_testing_val"
+test_split: train
+output_type: generate_until
+doc_to_visual: !function utils.mmmu_doc_to_visual
+doc_to_text: !function utils.mmmu_doc_to_text
+doc_to_target: "answer"
+# The return value of process_results will be used by metrics
+process_results: !function utils.mmmu_process_results
+
+metric_list:
+  - metric: mmmu_acc
+    aggregation: !function utils.mmmu_aggregate_results
+    higher_is_better: true
+
+include: _default_template_yaml
diff --git a/lmms_eval/tasks/mmmu_for_testing/testing_combined_milestone.yaml b/lmms_eval/tasks/mmmu_for_testing/testing_combined_milestone.yaml
@@ -0,0 +1,17 @@
+dataset_path: lmms-lab/MMMU_for_testing
+dataset_name: "combined_milestone"
+task: "mmmu_testing_combined_milestone"
+test_split: test
+output_type: generate_until
+doc_to_visual: !function utils.mmmu_doc_to_visual
+doc_to_text: !function utils.mmmu_doc_to_text
+doc_to_target: "answer"
+# The return value of process_results will be used by metrics
+process_results: !function utils.mmmu_process_results
+
+metric_list:
+  - metric: mmmu_acc
+    aggregation: !function utils.mmmu_aggregate_results
+    higher_is_better: true
+
+include: _default_template_yaml