diff --git a/lmms_eval/models/llava_vid.py b/lmms_eval/models/llava_vid.py index 1da10083..ebffb6dc 100755 --- a/lmms_eval/models/llava_vid.py +++ b/lmms_eval/models/llava_vid.py @@ -32,6 +32,7 @@ try: from llavavid.model.language_model.llava_qwen import LlavaQwenConfig + AutoConfig.register("llava_qwen", LlavaQwenConfig) except: eval_logger.debug("No llava vid qwen yet for llavavid") @@ -346,7 +347,7 @@ def generate_until(self, requests) -> List[str]: if "max_new_tokens" not in gen_kwargs: gen_kwargs["max_new_tokens"] = 1024 if "temperature" not in gen_kwargs: - gen_kwargs["temperature"] = 0 + gen_kwargs["temperature"] = 0.2 if "top_p" not in gen_kwargs: gen_kwargs["top_p"] = None if "num_beams" not in gen_kwargs: diff --git a/lmms_eval/tasks/cvrr/cvrr_fine_grained_action_understanding.yaml b/lmms_eval/tasks/cvrr/cvrr_fine_grained_action_understanding.yaml new file mode 100755 index 00000000..a1cc069c --- /dev/null +++ b/lmms_eval/tasks/cvrr/cvrr_fine_grained_action_understanding.yaml @@ -0,0 +1,14 @@ +dataset_name: "fine_grained_action_understanding" +task: "cvrr_fine_grained_action_understanding" +test_split: test +output_type: generate_until +doc_to_visual: !function utils.cvrr_doc_to_visual +doc_to_text: !function utils.cvrr_doc_to_text +doc_to_target: !function utils.cvrr_doc_to_answer +process_results: !function utils.cvrr_process_results +metric_list: + - metric: submission + aggregation: !function utils.cvrr_aggregate_results_dim2 + higher_is_better: true +include: _default_template_yaml + diff --git a/lmms_eval/tasks/cvrr/cvrr_interpretation_of_social_context.yaml b/lmms_eval/tasks/cvrr/cvrr_interpretation_of_social_context.yaml new file mode 100755 index 00000000..cf8cbe7f --- /dev/null +++ b/lmms_eval/tasks/cvrr/cvrr_interpretation_of_social_context.yaml @@ -0,0 +1,14 @@ +dataset_name: "interpretation_of_social_context" +task: "cvrr_interpretation_of_social_context" +test_split: test +output_type: generate_until +doc_to_visual: !function utils.cvrr_doc_to_visual +doc_to_text: !function utils.cvrr_doc_to_text +doc_to_target: !function utils.cvrr_doc_to_answer +process_results: !function utils.cvrr_process_results +metric_list: + - metric: submission + aggregation: !function utils.cvrr_aggregate_results_dim3 + higher_is_better: true +include: _default_template_yaml + diff --git a/lmms_eval/tasks/cvrr/cvrr_interpretation_of_visual_context.yaml b/lmms_eval/tasks/cvrr/cvrr_interpretation_of_visual_context.yaml new file mode 100755 index 00000000..5cc10686 --- /dev/null +++ b/lmms_eval/tasks/cvrr/cvrr_interpretation_of_visual_context.yaml @@ -0,0 +1,14 @@ +dataset_name: "interpretation_of_visual_context" +task: "cvrr_interpretation_of_visual_context" +test_split: test +output_type: generate_until +doc_to_visual: !function utils.cvrr_doc_to_visual +doc_to_text: !function utils.cvrr_doc_to_text +doc_to_target: !function utils.cvrr_doc_to_answer +process_results: !function utils.cvrr_process_results +metric_list: + - metric: submission + aggregation: !function utils.cvrr_aggregate_results_dim4 + higher_is_better: true +include: _default_template_yaml + diff --git a/lmms_eval/tasks/cvrr/cvrr_multiple_actions_in_a_single_video.yaml b/lmms_eval/tasks/cvrr/cvrr_multiple_actions_in_a_single_video.yaml new file mode 100755 index 00000000..12f861a3 --- /dev/null +++ b/lmms_eval/tasks/cvrr/cvrr_multiple_actions_in_a_single_video.yaml @@ -0,0 +1,14 @@ +dataset_name: "multiple_actions_in_a_single_video" +task: "cvrr_multiple_actions_in_a_single_video" +test_split: test +output_type: generate_until +doc_to_visual: !function utils.cvrr_doc_to_visual +doc_to_text: !function utils.cvrr_doc_to_text +doc_to_target: !function utils.cvrr_doc_to_answer +process_results: !function utils.cvrr_process_results +metric_list: + - metric: submission + aggregation: !function utils.cvrr_aggregate_results_dim5 + higher_is_better: true +include: _default_template_yaml + diff --git a/lmms_eval/tasks/cvrr/cvrr_non_existent_actions_with_existent_scene_depictions.yaml b/lmms_eval/tasks/cvrr/cvrr_non_existent_actions_with_existent_scene_depictions.yaml new file mode 100755 index 00000000..a2da9410 --- /dev/null +++ b/lmms_eval/tasks/cvrr/cvrr_non_existent_actions_with_existent_scene_depictions.yaml @@ -0,0 +1,14 @@ +dataset_name: "non_existent_actions_with_existent_scene_depictions" +task: "cvrr_non_existent_actions_with_existent_scene_depictions" +test_split: test +output_type: generate_until +doc_to_visual: !function utils.cvrr_doc_to_visual +doc_to_text: !function utils.cvrr_doc_to_text +doc_to_target: !function utils.cvrr_doc_to_answer +process_results: !function utils.cvrr_process_results +metric_list: + - metric: submission + aggregation: !function utils.cvrr_aggregate_results_dim6 + higher_is_better: true +include: _default_template_yaml + diff --git a/lmms_eval/tasks/cvrr/cvrr_non_existent_actions_with_non_existent_scene_depictions.yaml b/lmms_eval/tasks/cvrr/cvrr_non_existent_actions_with_non_existent_scene_depictions.yaml new file mode 100755 index 00000000..b77dad4e --- /dev/null +++ b/lmms_eval/tasks/cvrr/cvrr_non_existent_actions_with_non_existent_scene_depictions.yaml @@ -0,0 +1,14 @@ +dataset_name: "non_existent_actions_with_non_existent_scene_depictions" +task: "cvrr_non_existent_actions_with_non_existent_scene_depictions" +test_split: test +output_type: generate_until +doc_to_visual: !function utils.cvrr_doc_to_visual +doc_to_text: !function utils.cvrr_doc_to_text +doc_to_target: !function utils.cvrr_doc_to_answer +process_results: !function utils.cvrr_process_results +metric_list: + - metric: submission + aggregation: !function utils.cvrr_aggregate_results_dim7 + higher_is_better: true +include: _default_template_yaml + diff --git a/lmms_eval/tasks/cvrr/cvrr_object_instance_count.yaml b/lmms_eval/tasks/cvrr/cvrr_object_instance_count.yaml index fc85014a..d469f619 100755 --- a/lmms_eval/tasks/cvrr/cvrr_object_instance_count.yaml +++ b/lmms_eval/tasks/cvrr/cvrr_object_instance_count.yaml @@ -8,7 +8,7 @@ doc_to_target: !function utils.cvrr_doc_to_answer process_results: !function utils.cvrr_process_results metric_list: - metric: submission - aggregation: !function utils.cvrr_aggregate_results + aggregation: !function utils.cvrr_aggregate_results_dim1 higher_is_better: true include: _default_template_yaml diff --git a/lmms_eval/tasks/cvrr/cvrr_partial_actions.yaml b/lmms_eval/tasks/cvrr/cvrr_partial_actions.yaml new file mode 100755 index 00000000..6379225e --- /dev/null +++ b/lmms_eval/tasks/cvrr/cvrr_partial_actions.yaml @@ -0,0 +1,14 @@ +dataset_name: "partial_actions" +task: "cvrr_partial_actions" +test_split: test +output_type: generate_until +doc_to_visual: !function utils.cvrr_doc_to_visual +doc_to_text: !function utils.cvrr_doc_to_text +doc_to_target: !function utils.cvrr_doc_to_answer +process_results: !function utils.cvrr_process_results +metric_list: + - metric: submission + aggregation: !function utils.cvrr_aggregate_results_dim8 + higher_is_better: true +include: _default_template_yaml + diff --git a/lmms_eval/tasks/cvrr/cvrr_time_order_understanding.yaml b/lmms_eval/tasks/cvrr/cvrr_time_order_understanding.yaml new file mode 100755 index 00000000..7d330fcd --- /dev/null +++ b/lmms_eval/tasks/cvrr/cvrr_time_order_understanding.yaml @@ -0,0 +1,14 @@ +dataset_name: "time_order_understanding" +task: "cvrr_time_order_understanding" +test_split: test +output_type: generate_until +doc_to_visual: !function utils.cvrr_doc_to_visual +doc_to_text: !function utils.cvrr_doc_to_text +doc_to_target: !function utils.cvrr_doc_to_answer +process_results: !function utils.cvrr_process_results +metric_list: + - metric: submission + aggregation: !function utils.cvrr_aggregate_results_dim9 + higher_is_better: true +include: _default_template_yaml + diff --git a/lmms_eval/tasks/cvrr/cvrr_understanding_emotional_context.yaml b/lmms_eval/tasks/cvrr/cvrr_understanding_emotional_context.yaml new file mode 100755 index 00000000..f5128acb --- /dev/null +++ b/lmms_eval/tasks/cvrr/cvrr_understanding_emotional_context.yaml @@ -0,0 +1,14 @@ +dataset_name: "understanding_emotional_context" +task: "cvrr_understanding_emotional_context" +test_split: test +output_type: generate_until +doc_to_visual: !function utils.cvrr_doc_to_visual +doc_to_text: !function utils.cvrr_doc_to_text +doc_to_target: !function utils.cvrr_doc_to_answer +process_results: !function utils.cvrr_process_results +metric_list: + - metric: submission + aggregation: !function utils.cvrr_aggregate_results_dim10 + higher_is_better: true +include: _default_template_yaml + diff --git a/lmms_eval/tasks/cvrr/cvrr_unusual_and_physically_anomalous_activities.yaml b/lmms_eval/tasks/cvrr/cvrr_unusual_and_physically_anomalous_activities.yaml new file mode 100755 index 00000000..92555a29 --- /dev/null +++ b/lmms_eval/tasks/cvrr/cvrr_unusual_and_physically_anomalous_activities.yaml @@ -0,0 +1,14 @@ +dataset_name: "unusual_and_physically_anomalous_activities" +task: "cvrr_unusual_and_physically_anomalous_activities" +test_split: test +output_type: generate_until +doc_to_visual: !function utils.cvrr_doc_to_visual +doc_to_text: !function utils.cvrr_doc_to_text +doc_to_target: !function utils.cvrr_doc_to_answer +process_results: !function utils.cvrr_process_results +metric_list: + - metric: submission + aggregation: !function utils.cvrr_aggregate_results_dim11 + higher_is_better: true +include: _default_template_yaml + diff --git a/lmms_eval/tasks/cvrr/utils.py b/lmms_eval/tasks/cvrr/utils.py index 0e300d4f..01dfc54e 100755 --- a/lmms_eval/tasks/cvrr/utils.py +++ b/lmms_eval/tasks/cvrr/utils.py @@ -41,25 +41,47 @@ "Content-Type": "application/json", } -# Unzip all the zip files to HF HOME cache dir -HF_HOME = os.environ["HF_HOME"] -cache_dir = config["dataset_kwargs"]["cache_dir"] -cache_dir = os.path.join(HF_HOME, cache_dir) -cache_dir = os.path.join(cache_dir, "CVRR-ES/continuity_and_object_instance_count") - # Pass in video path here # Can only work correctly with video llm def cvrr_doc_to_visual(doc): - video_path = doc["VideoID"] + + # Unzip all the zip files to HF HOME cache dir + HF_HOME = os.environ["HF_HOME"] + cache_dir = config["dataset_kwargs"]["cache_dir"] + cache_dir = os.path.join(HF_HOME, cache_dir) + cache_dir = os.path.join(cache_dir, "CVRR-ES") if doc["DimensionName"] == "Continuity and Object Instance Count": + cache_dir = os.path.join(cache_dir, "continuity_and_object_instance_count") + elif doc["DimensionName"] == "Fine-grained action understanding": + cache_dir = os.path.join(cache_dir, "fine_grained_action_understanding") + elif doc["DimensionName"] == "Interpretation of social context": + cache_dir = os.path.join(cache_dir, "interpretation_of_social_context") + elif doc["DimensionName"] == "Interpretation of visual context": + cache_dir = os.path.join(cache_dir, "interpretation_of_visual_context") + elif doc["DimensionName"] == "Multiple actions in a single video": + cache_dir = os.path.join(cache_dir, "multiple_actions_in_a_single_video") + elif doc["DimensionName"] == "Non-existent actions with existent scene depictions": + cache_dir = os.path.join(cache_dir, "non_existent_actions_with_existent_scene_depictions") + elif doc["DimensionName"] == "Non-existent actions with non-existent scene depictions": + cache_dir = os.path.join(cache_dir, "non_existent_actions_with_non_existent_scene_depictions") + elif doc["DimensionName"] == "Partial actions": + cache_dir = os.path.join(cache_dir, "partial_actions") + elif doc["DimensionName"] == "Time order understanding": + cache_dir = os.path.join(cache_dir, "time_order_understanding") + elif doc["DimensionName"] == "Understanding of emotional context": + cache_dir = os.path.join(cache_dir, "understanding_emotional_context") + elif doc["DimensionName"] == "Unusual and Physically Anomalous activities": + cache_dir = os.path.join(cache_dir, "unusual_and_physically_anomalous_activities") + + video_path = doc["VideoID"] + video_path = os.path.join(cache_dir, video_path) + if os.path.exists(video_path): + video_path = video_path + else: + sys.exit(f"video path:{video_path} does not exist, please check") - video_path = os.path.join(cache_dir, video_path) - if os.path.exists(video_path): - video_path = video_path - else: - sys.exit(f"video path:{video_path} does not exist, please check") return [video_path] @@ -109,34 +131,31 @@ def cvrr_aggregate_submissions(results, args, task): return path -def get_eval(question, answer, pred, task, max_tokens: int, retries: int = 5): +def get_eval(question, answer, pred, max_tokens: int, retries: int = 5): global headers - if task == "continuity_and_object_instance_count": - messages = [ - { - "role": "system", - "content": "You are an intelligent chatbot designed for evaluating the correctness of AI assistant predictions for question-answer pairs. " - "Your task is to compare the predicted answer with the ground-truth answer and determine if the predicted answer is correct or not. Here's how you can accomplish the task:" - "------" - "##INSTRUCTIONS: " - "- Focus on the correctness and accuracy of the predicted answer with the ground-truth.\n" - "- Consider predictions with less specific details as correct evaluation, unless such details are explicitly asked in the question.\n", - }, - { - "role": "user", - "content": "Please evaluate the following video-based question-answer pair:\n\n" - f"Question: {question}\n" - f"Ground truth correct Answer: {answer}\n" - f"Predicted Answer: {pred}\n\n" - "Provide your evaluation as a correct/incorrect prediction along with the score where the score is an integer value between 0 (fully wrong) and 5 (fully correct). The middle score provides the percentage of correctness." - "Please generate the response in the form of a Python dictionary string with keys 'pred', 'score' and 'reason', where value of 'pred' is a string of 'correct' or 'incorrect', value of 'score' is in INTEGER, not STRING and value of 'reason' should provide the reason behind the decision." - "Only provide the Python dictionary string." - "For example, your response should look like this: {'pred': 'correct', 'score': 4.8, 'reason': reason}.", - }, - ] - - print(messages) + messages = [ + { + "role": "system", + "content": "You are an intelligent chatbot designed for evaluating the correctness of AI assistant predictions for question-answer pairs. " + "Your task is to compare the predicted answer with the ground-truth answer and determine if the predicted answer is correct or not. Here's how you can accomplish the task:" + "------" + "##INSTRUCTIONS: " + "- Focus on the correctness and accuracy of the predicted answer with the ground-truth.\n" + "- Consider predictions with less specific details as correct evaluation, unless such details are explicitly asked in the question.\n", + }, + { + "role": "user", + "content": "Please evaluate the following video-based question-answer pair:\n\n" + f"Question: {question}\n" + f"Ground truth correct Answer: {answer}\n" + f"Predicted Answer: {pred}\n\n" + "Provide your evaluation as a correct/incorrect prediction along with the score where the score is an integer value between 0 (fully wrong) and 5 (fully correct). The middle score provides the percentage of correctness." + "Please generate the response in the form of a Python dictionary string with keys 'pred', 'score' and 'reason', where value of 'pred' is a string of 'correct' or 'incorrect', value of 'score' is in INTEGER, not STRING and value of 'reason' should provide the reason behind the decision." + "Only provide the Python dictionary string." + 'For example, your response should look like this: {"pred": "correct", "score": 4.8, "reason": reason}.', + }, + ] payload = { "model": GPT_EVAL_MODEL_NAME, @@ -179,6 +198,7 @@ def get_eval(question, answer, pred, task, max_tokens: int, retries: int = 5): def parse_score(review): try: # Convert the string representation of a dictionary to an actual dictionary + # Escape single quotes inside the dictionary string to prevent parsing errors review_dict = ast.literal_eval(review) correctness = review_dict.get("pred", "incorrect") score = review_dict.get("score", 0) @@ -203,38 +223,28 @@ def cvrr_print_scores(eval_file_path, args): score_file_name = "scores.json" path = file_utils.generate_submission_file(score_file_name, args) - # Compute average score and final accuracy - # Initialize counters - yes_count = 0 - no_count = 0 + # Compute average score total_score = 0 - # Iterate over the results to count correctness and sum scores + # Iterate over the results to sum scores for result_list in evaluated_list: eval_dict = result_list[0] total_score += eval_dict["score"] - if eval_dict["Correctness"] == "yes": - yes_count += 1 - else: - no_count += 1 - # Calculate accuracy and average score - accuracy = yes_count / (yes_count + no_count) if (yes_count + no_count) > 0 else 0 average_score = total_score / len(evaluated_list) if evaluated_list else 0 # Print the results - print(f"Accuracy: {accuracy}") print(f"Average Score: {average_score}") # Write the processed data to the scores file with open(path, "w") as f: - json.dump({"accuracy": accuracy, "average_score": average_score}, f, indent=4) + json.dump({"average_score": average_score}, f, indent=4) eval_logger.info(f"Score file saved to {path}") -def cvrr_gpt_eval(result_file_path, args, task): +def cvrr_gpt_eval(result_file_path, args): """ Process the result file containing predictions, score them using GPT, and save the results with added scores and correctness fields to a new file. @@ -264,7 +274,7 @@ def cvrr_gpt_eval(result_file_path, args, task): pred = data_dict.get("pred", "") # Assume get_eval returns a review and the model name, and parse_score parses this review - review, model_name = get_eval(question, answer, pred, task, 64) + review, model_name = get_eval(question, answer, pred, 512) correctness, score, reason = parse_score(review) except Exception as e: eval_logger.error(f"Error for Video Name: {data_dict.get('VideoID', 'Unknown')}: {e}") @@ -295,7 +305,67 @@ def cvrr_gpt_eval(result_file_path, args, task): return eval_file_path -def cvrr_aggregate_results(results, args): +def cvrr_aggregate_results_dim1(results, args): result_file_path = cvrr_aggregate_submissions(results, args, "continuity_and_object_instance_count") - eval_file_path = cvrr_gpt_eval(result_file_path, args, "continuity_and_object_instance_count") + eval_file_path = cvrr_gpt_eval(result_file_path, args) + cvrr_print_scores(eval_file_path, args) + + +def cvrr_aggregate_results_dim2(results, args): + result_file_path = cvrr_aggregate_submissions(results, args, "fine_grained_action_understanding") + eval_file_path = cvrr_gpt_eval(result_file_path, args) + cvrr_print_scores(eval_file_path, args) + + +def cvrr_aggregate_results_dim3(results, args): + result_file_path = cvrr_aggregate_submissions(results, args, "interpretation_of_social_context") + eval_file_path = cvrr_gpt_eval(result_file_path, args) + cvrr_print_scores(eval_file_path, args) + + +def cvrr_aggregate_results_dim4(results, args): + result_file_path = cvrr_aggregate_submissions(results, args, "interpretation_of_visual_context") + eval_file_path = cvrr_gpt_eval(result_file_path, args) + cvrr_print_scores(eval_file_path, args) + + +def cvrr_aggregate_results_dim5(results, args): + result_file_path = cvrr_aggregate_submissions(results, args, "multiple_actions_in_a_single_video") + eval_file_path = cvrr_gpt_eval(result_file_path, args) + cvrr_print_scores(eval_file_path, args) + + +def cvrr_aggregate_results_dim6(results, args): + result_file_path = cvrr_aggregate_submissions(results, args, "non_existent_actions_with_existent_scene_depictions") + eval_file_path = cvrr_gpt_eval(result_file_path, args) + cvrr_print_scores(eval_file_path, args) + + +def cvrr_aggregate_results_dim7(results, args): + result_file_path = cvrr_aggregate_submissions(results, args, "non_existent_actions_with_non_existent_scene_depictions") + eval_file_path = cvrr_gpt_eval(result_file_path, args) + cvrr_print_scores(eval_file_path, args) + + +def cvrr_aggregate_results_dim8(results, args): + result_file_path = cvrr_aggregate_submissions(results, args, "partial_actions") + eval_file_path = cvrr_gpt_eval(result_file_path, args) + cvrr_print_scores(eval_file_path, args) + + +def cvrr_aggregate_results_dim9(results, args): + result_file_path = cvrr_aggregate_submissions(results, args, "time_order_understanding") + eval_file_path = cvrr_gpt_eval(result_file_path, args) + cvrr_print_scores(eval_file_path, args) + + +def cvrr_aggregate_results_dim10(results, args): + result_file_path = cvrr_aggregate_submissions(results, args, "understanding_emotional_context") + eval_file_path = cvrr_gpt_eval(result_file_path, args) + cvrr_print_scores(eval_file_path, args) + + +def cvrr_aggregate_results_dim11(results, args): + result_file_path = cvrr_aggregate_submissions(results, args, "unusual_and_physically_anomalous_activities") + eval_file_path = cvrr_gpt_eval(result_file_path, args) cvrr_print_scores(eval_file_path, args) diff --git a/tools/make_activitynetqa.ipynb b/tools/make_activitynetqa.ipynb deleted file mode 100755 index efbef00e..00000000 --- a/tools/make_activitynetqa.ipynb +++ /dev/null @@ -1,120 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This notebook will guide you to make correct format of Huggingface dataset, in proper parquet format and visualizable in Huggingface dataset hub.\n", - "# We will take the example of the dataset \"Otter-AI/MMVet\" and convert it to the proper format." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from datasets import load_dataset\n", - "\n", - "data_path = \"Otter-AI/MMVet\"\n", - "df = load_dataset(data_path, split=\"test\").to_pandas()\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from datasets import Dataset, Features, Value, Image\n", - "import pandas as pd\n", - "\n", - "# Define the features for the dataset\n", - "features = Features(\n", - " {\n", - " \"question_id\": Value(dtype=\"string\"),\n", - " \"image\": Image(),\n", - " \"question\": Value(dtype=\"string\"),\n", - " \"answer\": Value(dtype=\"string\"),\n", - " \"image_source\": Value(dtype=\"string\"),\n", - " \"capability\": Value(dtype=\"string\"),\n", - " # Add other fields as necessary\n", - " }\n", - ")\n", - "\n", - "df_items = {\n", - " \"question_id\": [],\n", - " \"image\": [],\n", - " \"question\": [],\n", - " \"answer\": [],\n", - " \"image_source\": [],\n", - " \"capability\": [],\n", - "}\n", - "\n", - "for idx, row in df.iterrows():\n", - " df_items[\"question_id\"].append(str(row[\"id\"]))\n", - " image = {\"bytes\": row[\"images\"][0][\"bytes\"], \"path\": \"\"}\n", - " df_items[\"image\"].append(image)\n", - " df_items[\"question\"].append(str(row[\"instruction\"]))\n", - " df_items[\"answer\"].append(str(row[\"answer\"]))\n", - " df_items[\"image_source\"].append(str(row[\"image_source\"]))\n", - " df_items[\"capability\"].append(\",\".join(list(row[\"capability\"])))\n", - " # Add other fields as necessary\n", - "\n", - "df_items = pd.DataFrame(df_items)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_items.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = Dataset.from_pandas(df_items, features=features)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hub_dataset_path = \"lmms-lab/MMVet\"\n", - "dataset.push_to_hub(repo_id=hub_dataset_path, split=\"test\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "lmms-eval", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.18" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}