Skip to content

Commit

Permalink
Merge branch 'main' into lcb-v4
Browse files Browse the repository at this point in the history
  • Loading branch information
plaguss authored Feb 25, 2025
2 parents 4c52030 + 066f84f commit 00949bd
Show file tree
Hide file tree
Showing 7 changed files with 8 additions and 6 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ huggingface-cli login

## 🚀 Quickstart

Lighteval offers two main entry points for model evaluation:
Lighteval offers the following entry points for model evaluation:

- `lighteval accelerate` : evaluate models on CPU or one or more GPUs using [🤗
Accelerate](https://github.com/huggingface/accelerate)
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/logging/evaluation_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def save(self) -> None:
details_datasets: dict[str, Dataset] = {}
for task_name, task_details in self.details_logger.details.items():
# Create a dataset from the dictionary - we force cast to str to avoid formatting problems for nested objects
dataset = Dataset.from_list([{k: str(v) for k, v in asdict(detail).items()} for detail in task_details])
dataset = Dataset.from_list([asdict(detail) for detail in task_details])

# We don't keep 'id' around if it's there
column_names = dataset.column_names
Expand Down
1 change: 1 addition & 0 deletions src/lighteval/logging/info_loggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ class Detail:
num_effective_few_shots: int = 0
num_asked_few_shots: int = 0
predictions: list = field(default_factory=list)
prediction_logits: list = field(default_factory=list)
input_tokens: list = field(default_factory=list)
cont_tokens: list = field(default_factory=list)
truncated: list = field(default_factory=list)
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/models/model_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class GenerativeResponse(ModelResponse):
logits: Optional[list[float]] = None # Generated text logits

def get_result_for_eval(self):
return self.result if self.logits is None else (self.result, self.logits)
return self.result


@dataclass
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/models/vllm/vllm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def _create_auto_model(self, config: VLLMModelConfig, env_config: EnvConfig) ->
"pipeline_parallel_size": int(config.pipeline_parallel_size),
"max_model_len": self._max_length,
"swap_space": 4,
"seed": 1234,
"seed": config.seed,
}
if int(config.data_parallel_size) > 1:
self.model_args["distributed_executor_backend"] = "ray"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,7 @@ def __getattribute__(self, name: str) -> str:
false="yanlış",
neither="hiçbiri",
or_word="veya",
and_word="ve",
full_stop=".",
comma=",",
question_mark="?",
Expand Down
4 changes: 2 additions & 2 deletions tests/logging/test_evaluation_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def test_results_logging(mock_evaluation_tracker: EvaluationTracker):
@pytest.mark.evaluation_tracker(save_details=True)
def test_details_logging(mock_evaluation_tracker, mock_datetime):
task_details = {
"task1": [DetailsLogger.CompiledDetail(truncated=10, padded=5)],
"task2": [DetailsLogger.CompiledDetail(truncated=20, padded=10)],
"task1": [DetailsLogger.CompiledDetail(hashes=None, truncated=10, padded=5)],
"task2": [DetailsLogger.CompiledDetail(hashes=None, truncated=20, padded=10)],
}
mock_evaluation_tracker.details_logger.details = task_details

Expand Down

0 comments on commit 00949bd

Please sign in to comment.