diff --git a/src/lighteval/logging/hierarchical_logger.py b/src/lighteval/logging/hierarchical_logger.py index 99287f75..1c4c3a11 100644 --- a/src/lighteval/logging/hierarchical_logger.py +++ b/src/lighteval/logging/hierarchical_logger.py @@ -34,8 +34,11 @@ logger = get_logger(__name__, log_level="INFO") elif is_accelerate_available(): + from accelerate import Accelerator, InitProcessGroupKwargs from accelerate.logging import get_logger + # We must init the accelerator before using the logger + accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))]) logger = get_logger(__name__, log_level="INFO") else: logger = Logger(__name__, level="INFO") diff --git a/src/lighteval/models/adapter_model.py b/src/lighteval/models/adapter_model.py index 18fd6890..dbf762d7 100644 --- a/src/lighteval/models/adapter_model.py +++ b/src/lighteval/models/adapter_model.py @@ -41,7 +41,14 @@ class AdapterModel(BaseModel): def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConfig) -> PreTrainedTokenizer: # By default, we look at the model config for the model stored in `base_model` # (= the parent model, not the model of interest) - return self._create_auto_tokenizer_with_name(config.base_model, config=config, env_config=env_config) + return self._create_auto_tokenizer_with_name( + model_name=config.base_model, + revision=config.revision, + env_config=env_config, + tokenizer_name=config.tokenizer, + subfolder=config.subfolder, + trust_remote_code=config.trust_remote_code, + ) def _create_auto_model(self, config: AdapterModelConfig, env_config: EnvConfig) -> AutoModelForCausalLM: """Returns a PeftModel from a base model and a version fined tuned using PEFT.""" diff --git a/src/lighteval/models/base_model.py b/src/lighteval/models/base_model.py index debec448..993978d5 100644 --- a/src/lighteval/models/base_model.py +++ b/src/lighteval/models/base_model.py @@ -30,6 +30,7 @@ from torch.utils.data import DataLoader from tqdm import tqdm from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig +from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset, LoglikelihoodSingleTokenDataset from lighteval.logging.hierarchical_logger import hlog, hlog_err, hlog_warn @@ -57,6 +58,7 @@ if is_accelerate_available(): + from accelerate import Accelerator from accelerate.utils import calculate_maximum_sizes, convert_bytes, get_max_memory os.environ["TOKENIZERS_PARALLELISM"] = "false" @@ -67,8 +69,8 @@ class BaseModel(LightevalModel): def __init__( self, - config: BaseModelConfig, env_config: EnvConfig, + config: BaseModelConfig, ): """Initializes a HuggingFace `AutoModel` and `AutoTokenizer` for evaluation.""" self._config = config.init_configs(env_config) @@ -114,6 +116,72 @@ def __init__( self.pairwise_tokenization = config.pairwise_tokenization + @classmethod + def from_model( + cls, + model: Union[AutoModelForCausalLM, LightevalModel], + env_config: EnvConfig, + accelerator: "Accelerator" = None, + tokenizer_name: str = None, # custom tokenizer + trust_remote_code: bool = False, + use_chat_template: bool = False, + add_special_tokens: bool = True, + pairwise_tokenization: bool = False, + multichoice_continuations_start_space: bool = None, + ): + # Slightly hackish way to test if the model is a AutoModelForCausalLM, since the instances don't + # derive from this class explicitely + assert isinstance(model, LightevalModel) or type(model).__name__ in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.values() + + if isinstance(model, LightevalModel): + return model + + # Instanciate the object without using __init__ + self = cls.__new__(cls) + self._config = model.config + self._max_length = self._init_max_length(max_length=model.config.max_length) + self._tokenizer = self._create_auto_tokenizer_with_name( + model_name=model.name_or_path, + revision=model.config._commit_hash, + env_config=env_config, + trust_remote_code=trust_remote_code, + tokenizer_name=tokenizer_name, + ) + self.model_name = _simplify_name(model.name_or_path) + self.model_sha = model.config._commit_hash + + # If model_parallel is not set we compare the number of processes with the number of GPUs + self.model = model + self.model.eval() + torch.set_grad_enabled(False) + + self.accelerator = accelerator + if accelerator is not None: + self._device = accelerator.device + self.model = self.accelerator.prepare(self.model.to(accelerator.device)) + else: + self._device = "cpu" + + self.use_chat_template = use_chat_template + self._add_special_tokens = add_special_tokens if add_special_tokens is not None else False + self.pairwise_tokenization = pairwise_tokenization + self.multichoice_continuations_start_space = multichoice_continuations_start_space + + self.precision = _get_dtype(model.dtype, config=self._config) + + if is_accelerate_available(): + model_size, _ = calculate_maximum_sizes(self.model) + model_size = convert_bytes(model_size) + else: + model_size = -1 + self.model_info = ModelInfo( + model_name=self.model_name, + model_sha=self.model_sha, + model_dtype=self.precision, + model_size=model_size, + ) + return self + @property def tokenizer(self): return self._tokenizer @@ -207,10 +275,23 @@ def _create_auto_model(self, config: BaseModelConfig, env_config: EnvConfig) -> def _create_auto_tokenizer( self, config: BaseModelConfig, env_config: EnvConfig ) -> transformers.PreTrainedTokenizer: - return self._create_auto_tokenizer_with_name(config.pretrained, config=config, env_config=env_config) + return self._create_auto_tokenizer_with_name( + model_name=config.pretrained, + revision=config.revision, + env_config=env_config, + tokenizer_name=config.tokenizer, + subfolder=config.subfolder, + trust_remote_code=config.trust_remote_code, + ) def _create_auto_tokenizer_with_name( - self, model_name: str, config: BaseModelConfig, env_config: EnvConfig + self, + model_name: str, + revision: str, + env_config: EnvConfig, + tokenizer_name: str = None, + subfolder: str = None, + trust_remote_code: bool = False, ) -> transformers.PreTrainedTokenizer: """ Create a Hugging Face AutoTokenizer for language model. @@ -231,25 +312,35 @@ def _create_auto_tokenizer_with_name( """ try: tokenizer = AutoTokenizer.from_pretrained( - model_name if config.tokenizer is None else config.tokenizer, - revision=config.revision + (f"/{config.subfolder}" if config.subfolder is not None else ""), + model_name if tokenizer_name is None else tokenizer_name, + revision=revision + (f"/{subfolder}" if subfolder is not None else ""), cache_dir=env_config.cache_dir, token=env_config.token, - trust_remote_code=config.trust_remote_code, + trust_remote_code=trust_remote_code, padding_side="left", truncation_side="left", ) except RecursionError: tokenizer = AutoTokenizer.from_pretrained( - model_name if config.tokenizer is None else config.tokenizer, - revision=config.revision + (f"/{config.subfolder}" if config.subfolder is not None else ""), + model_name if tokenizer_name is None else tokenizer_name, + revision=revision + (f"/{subfolder}" if subfolder is not None else ""), cache_dir=env_config.cache_dir, token=env_config.token, - trust_remote_code=config.trust_remote_code, + trust_remote_code=trust_remote_code, unk_token="", padding_side="left", truncation_side="left", ) + except FileNotFoundError: + hlog_warn("Problem when loading the tokenizer in the cache - discarding the provided cache path value.") + tokenizer = AutoTokenizer.from_pretrained( + model_name if tokenizer_name is None else tokenizer_name, + revision=revision + (f"/{subfolder}" if subfolder is not None else ""), + token=env_config.token, + trust_remote_code=trust_remote_code, + padding_side="left", + truncation_side="left", + ) tokenizer.pad_token = tokenizer.eos_token tokenizer.model_max_length = self.max_length hlog("Tokenizer truncation and padding size set to the left side.") diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index db0ede47..da4fb045 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -34,7 +34,7 @@ from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.logging.hierarchical_logger import hlog, htrack_block from lighteval.metrics.utils.metric_utils import MetricCategory -from lighteval.models.model_loader import load_model +from lighteval.models.model_loader import BaseModel, load_model from lighteval.models.model_output import ModelResponse from lighteval.tasks.lighteval_task import LightevalTask, create_requests_from_tasks from lighteval.tasks.registry import Registry, taskinfo_selector @@ -164,7 +164,15 @@ def _init_model(self, model_config, model): ) else: return load_model(config=model_config, env_config=self.pipeline_parameters.env_config) - return model + if isinstance(model, BaseModel): + return model + else: + return BaseModel.from_model( + model=model, + use_chat_template=self.pipeline_parameters.use_chat_template, + env_config=self.pipeline_parameters.env_config, + accelerator=self.accelerator, + ) def _init_tasks_and_requests(self, tasks: str): with htrack_block("Tasks loading"):