Merge pull request #1 from EvolvingLMMs-Lab/pufanyi/pip

EvolvingLMMs-Lab · Mar 8, 2024 · 6670a6b · 6670a6b
2 parents 191ed64 + 1a01ebd
commit 6670a6b
Show file tree

Hide file tree

Showing 13 changed files with 35 additions and 46 deletions.
diff --git a/lmms_eval/api/metrics.py b/lmms_eval/api/metrics.py
@@ -166,7 +166,6 @@ def perplexity_fn(items):  # This is a passthrough function
     return items
 
 
-
 def levenshtein_distance(s1, s2):
     if len(s1) > len(s2):
         s1, s2 = s2, s1

diff --git a/lmms_eval/api/model.py b/lmms_eval/api/model.py
@@ -54,7 +54,6 @@ def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float, bool]]:
         """
         pass
 
-
     # TODO: Add an optional max length
     @abc.abstractmethod
     def generate_until(self, requests) -> List[str]:

diff --git a/lmms_eval/api/samplers.py b/lmms_eval/api/samplers.py
@@ -37,7 +37,9 @@ def get_context(self, doc, num_fewshot):
                     + (
                         str(self.doc_to_target(doc)[0])
                         if type(self.doc_to_target(doc)) is list
-                        else self.doc_to_target(doc) if (self.config.doc_to_choice is None or type(self.doc_to_target(doc)) is str) else str(self.doc_to_choice(doc)[self.doc_to_target(doc)])
+                        else self.doc_to_target(doc)
+                        if (self.config.doc_to_choice is None or type(self.doc_to_target(doc)) is str)
+                        else str(self.doc_to_choice(doc)[self.doc_to_target(doc)])
                     )
                     for doc in selected_docs
                 ]

diff --git a/lmms_eval/api/task.py b/lmms_eval/api/task.py
@@ -687,7 +687,7 @@ def download(self, dataset_kwargs=None) -> None:
             download_mode=datasets.DownloadMode.REUSE_DATASET_IF_EXISTS,
             **dataset_kwargs if dataset_kwargs is not None else {},
         )
-        self.dataset_no_image =  datasets.load_dataset(
+        self.dataset_no_image = datasets.load_dataset(
             path=self.DATASET_PATH,
             name=self.DATASET_NAME,
             download_mode=datasets.DownloadMode.REUSE_DATASET_IF_EXISTS,

diff --git a/lmms_eval/models/__init__.py b/lmms_eval/models/__init__.py
@@ -1,15 +1,21 @@
 import os
 
-try:
-    # enabling faster model download
-    from .llava import Llava
-    from .qwen_vl import Qwen_VL
-    from .fuyu import Fuyu
-    from .gpt4v import GPT4V
-    from .instructblip import InstructBLIP
-    from .minicpm_v import MiniCPM_V
-    import hf_transfer
-
-    os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-except ImportError:
-    pass
+AVAILABLE_MODELS = {
+    "llava": "Llava",
+    "qwen_vl": "Qwen_VL",
+    "fuyu": "Fuyu",
+    "gpt4v": "GPT4V",
+    "instructblip": "InstructBLIP",
+    "minicpm_v": "MiniCPM_V",
+}
+
+for model_name, model_class in AVAILABLE_MODELS.items():
+    try:
+        exec(f"from .{model_name} import {model_class}")
+    except ImportError:
+        pass
+
+
+import hf_transfer
+
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
diff --git a/lmms_eval/models/fuyu.py b/lmms_eval/models/fuyu.py
@@ -253,8 +253,6 @@ def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float, bool]]:
         pbar.close()
         return res
 
-
-
     def tok_encode(self, string: str, left_truncate_len=None, add_special_tokens=None) -> List[int]:
         """ """
         add_special_tokens = False if add_special_tokens is None else add_special_tokens

diff --git a/lmms_eval/models/gpt4v.py b/lmms_eval/models/gpt4v.py
@@ -127,5 +127,3 @@ def generate_until(self, requests) -> List[str]:
     def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float, bool]]:
         # TODO
         assert False, "GPT4V not support"
-
-
diff --git a/lmms_eval/models/instructblip.py b/lmms_eval/models/instructblip.py
@@ -44,26 +44,22 @@ def __init__(
             self._device = torch.device(f"cuda:{accelerator.local_process_index}")
         else:
             self._device = device
-        self._model = InstructBlipForConditionalGeneration.from_pretrained(pretrained,device_map=self._device)
+        self._model = InstructBlipForConditionalGeneration.from_pretrained(pretrained, device_map=self._device)
         self._image_processor = InstructBlipProcessor.from_pretrained(pretrained)
         self._tokenizer = self._image_processor.tokenizer
         self._config = self._model.config
         self.model.eval()
         self.model.tie_weights()
         self.batch_size_per_gpu = int(batch_size)
         if accelerator.num_processes > 1:
-            assert accelerator.distributed_type in [
-                DistributedType.FSDP,
-                DistributedType.MULTI_GPU,
-                DistributedType.DEEPSPEED
-            ], "Unsupported distributed type provided. Only DDP and FSDP are supported."
+            assert accelerator.distributed_type in [DistributedType.FSDP, DistributedType.MULTI_GPU, DistributedType.DEEPSPEED], "Unsupported distributed type provided. Only DDP and FSDP are supported."
             # If you want to use DistributedType.DEEPSPEED, you have to run accelerate config before using the model
             # Also, you have to select zero stage 0 (equivalent to DDP) in order to make the prepare model works
             # I tried to set different parameters in the kwargs to let default zero 2 stage works, but it didn't work.
             if accelerator.distributed_type == DistributedType.DEEPSPEED:
                 kwargs = {
                     "train_micro_batch_size_per_gpu": self.batch_size_per_gpu,
-                    "train_batch_size" : self.batch_size_per_gpu * accelerator.num_processes,
+                    "train_batch_size": self.batch_size_per_gpu * accelerator.num_processes,
                 }
                 AcceleratorState().deepspeed_plugin.deepspeed_config_process(must_match=True, **kwargs)
                 eval_logger.info("Detected that you are using DistributedType.DEEPSPEED. Make sure you run `accelerate config` and set zero stage to 0")

diff --git a/lmms_eval/models/llava.py b/lmms_eval/models/llava.py
@@ -78,18 +78,14 @@ def __init__(
         self.truncate_context = truncate_context
         # assert self.batch_size_per_gpu == 1, "Llava currently does not support batched generation. See https://github.com/haotian-liu/LLaVA/issues/754. HF Llava also has this issue."
         if accelerator.num_processes > 1:
-            assert accelerator.distributed_type in [
-                DistributedType.FSDP,
-                DistributedType.MULTI_GPU,
-                DistributedType.DEEPSPEED
-            ], "Unsupported distributed type provided. Only DDP and FSDP are supported."
+            assert accelerator.distributed_type in [DistributedType.FSDP, DistributedType.MULTI_GPU, DistributedType.DEEPSPEED], "Unsupported distributed type provided. Only DDP and FSDP are supported."
             # If you want to use DistributedType.DEEPSPEED, you have to run accelerate config before using the model
             # Also, you have to select zero stage 0 (equivalent to DDP) in order to make the prepare model works
             # I tried to set different parameters in the kwargs to let default zero 2 stage works, but it didn't work.
             if accelerator.distributed_type == DistributedType.DEEPSPEED:
                 kwargs = {
                     "train_micro_batch_size_per_gpu": self.batch_size_per_gpu,
-                    "train_batch_size" : self.batch_size_per_gpu * accelerator.num_processes,
+                    "train_batch_size": self.batch_size_per_gpu * accelerator.num_processes,
                 }
                 AcceleratorState().deepspeed_plugin.deepspeed_config_process(must_match=True, **kwargs)
                 eval_logger.info("Detected that you are using DistributedType.DEEPSPEED. Make sure you run `accelerate config` and set zero stage to 0")

diff --git a/lmms_eval/models/minicpm_v.py b/lmms_eval/models/minicpm_v.py
@@ -49,18 +49,14 @@ def __init__(
         self.model.tie_weights()
         self.batch_size_per_gpu = int(batch_size)
         if accelerator.num_processes > 1:
-            assert accelerator.distributed_type in [
-                DistributedType.FSDP,
-                DistributedType.MULTI_GPU,
-                DistributedType.DEEPSPEED
-            ], "Unsupported distributed type provided. Only DDP and FSDP are supported."
+            assert accelerator.distributed_type in [DistributedType.FSDP, DistributedType.MULTI_GPU, DistributedType.DEEPSPEED], "Unsupported distributed type provided. Only DDP and FSDP are supported."
             # If you want to use DistributedType.DEEPSPEED, you have to run accelerate config before using the model
             # Also, you have to select zero stage 0 (equivalent to DDP) in order to make the prepare model works
             # I tried to set different parameters in the kwargs to let default zero 2 stage works, but it didn't work.
             if accelerator.distributed_type == DistributedType.DEEPSPEED:
                 kwargs = {
                     "train_micro_batch_size_per_gpu": self.batch_size_per_gpu,
-                    "train_batch_size" : self.batch_size_per_gpu * accelerator.num_processes,
+                    "train_batch_size": self.batch_size_per_gpu * accelerator.num_processes,
                 }
                 AcceleratorState().deepspeed_plugin.deepspeed_config_process(must_match=True, **kwargs)
                 eval_logger.info("Detected that you are using DistributedType.DEEPSPEED. Make sure you run `accelerate config` and set zero stage to 0")
@@ -189,7 +185,7 @@ def _collate(x):
             if "<image>" in context:
                 # minicpm does not expect the <image> tag
                 context = context.replace("<image>", "")
-            msgs = [{'role': 'user', 'content': context}]
+            msgs = [{"role": "user", "content": context}]
 
             gen_kwargs["image_sizes"] = [visuals[idx].size for idx in range(len(visuals))]
             if "max_new_tokens" not in gen_kwargs:

diff --git a/lmms_eval/models/qwen_vl.py b/lmms_eval/models/qwen_vl.py
@@ -174,8 +174,6 @@ def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float, bool]]:
         pbar.close()
         return res
 
-
-
     def flatten(self, input):
         new_list = []
         for i in input:

diff --git a/lmms_eval/tasks/cmmmu/utils.py b/lmms_eval/tasks/cmmmu/utils.py
@@ -122,7 +122,7 @@ def cmmmu_process_test_results_for_submission(doc, results):
 
 def cmmmu_test_aggregate_results_for_submission(results, args):
     file = generate_submission_file("cmmmu_test_for_submission.jsonl", args)
-    with open(file, "w", encoding='utf8') as f:
+    with open(file, "w", encoding="utf8") as f:
         for result in results:
             json.dump(result, f, ensure_ascii=False)
             f.write("\n")

diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta"
 name = "lmms_eval"
 version = "0.1.0"
 authors = [
-    { name = "LMMMs-Lab Evaluation Team", email = "lmms_eval@gmail.com" },
+    { name = "LMMMs-Lab Evaluation Team", email = "lmms_eval@outlook.com" },
 ]
 description = "A framework for evaluating large multi-modality language models"
 readme = "README.md"
@@ -53,6 +53,7 @@ dependencies = [
     "transformers-stream-generator",
     "tiktoken",
     "pre-commit",
+    "pydantic",
 ]
 
 [tool.setuptools.packages.find]
@@ -66,5 +67,5 @@ lmms-eval = "lmms_eval.__main__:cli_evaluate"
 lmms_eval = "lmms_eval.__main__:cli_evaluate"
 
 [project.urls]
-Homepage = "https://github.com/EvolvingLMMs-Lab/lmms-eval"
+Homepage = "https://lmms-lab.github.io/lmms-eval-blog/"
 Repository = "https://github.com/EvolvingLMMs-Lab/lmms-eval"
-Original file line number
+Diff line change
@@ Expand Up @@
             """
             pass
         # TODO: Add an optional max length
         @abc.abstractmethod
         def generate_until(self, requests) -> List[str]:
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -127,5 +127,3 @@ def generate_until(self, requests) -> List[str]:
		def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float, bool]]:
		# TODO
		assert False, "GPT4V not support"