From c3949ab5be15f9fa6986f729389c12f324d5e672 Mon Sep 17 00:00:00 2001 From: Nathan Habib Date: Wed, 20 Nov 2024 12:52:41 +0000 Subject: [PATCH] fix splits --- src/lighteval/data.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/lighteval/data.py b/src/lighteval/data.py index 88de9ade0..382ce656f 100644 --- a/src/lighteval/data.py +++ b/src/lighteval/data.py @@ -247,11 +247,11 @@ def init_split_limits(self, num_dataset_splits): "You cannot select the number of dataset splits for a generative evaluation at the moment. Automatically inferring." ) - all_sorting_criterion = [self._sorting_criteria(self.sorted_data[0])[:2]] + all_sorting_criterion = [self._sorting_criteria(self.sorted_data[0])[:-1]] splits_indices = [[0, None]] for ix, req in enumerate(self.sorted_data): current_sorting_criteria = self._sorting_criteria(req) - current_key = current_sorting_criteria[:2] + current_key = current_sorting_criteria[:-1] if current_key not in all_sorting_criterion: all_sorting_criterion.append(current_key) splits_indices[-1][1] = ix @@ -264,7 +264,7 @@ def init_split_limits(self, num_dataset_splits): splits_indices = [tuple(e) for e in splits_indices] return num_dataset_splits, splits_indices - def _sorting_criteria(self, request: GreedyUntilRequest) -> tuple[bool, bool, list, int]: + def _sorting_criteria(self, request: GreedyUntilRequest) -> tuple[bool, bool, list, int, int]: """ Collate function for generating batches. @@ -279,7 +279,13 @@ def _sorting_criteria(self, request: GreedyUntilRequest) -> tuple[bool, bool, li # The generative task has no limit except the model context if gen_length is None: gen_length = 0 - return request.do_sample, request.use_logits, request.stop_sequence, -(len(toks) + gen_length) + return ( + request.do_sample, + request.use_logits, + tuple(request.stop_sequence), + gen_length, + -(len(toks) + gen_length), + ) class GenerativeTaskDatasetNanotron(GenerativeTaskDataset):