Skip to content

Commit

Permalink
added class for few shot back
Browse files Browse the repository at this point in the history
  • Loading branch information
clefourrier committed Nov 18, 2024
1 parent 8b51d61 commit 05217d0
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 5 deletions.
25 changes: 20 additions & 5 deletions src/lighteval/tasks/prompt_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,24 @@ def doc_to_target(formatted_doc: Doc) -> str:
Returns:
str: Target of the document, which is the correct answer for a document.
"""
# likely we mostly need one example not all
return as_list(formatted_doc.get_golds())[0]

@staticmethod
def doc_to_fewshot_sorting_class(formatted_doc: Doc) -> str:
"""
In some cases, when selecting few-shot samples, we want to use specific document classes
which need to be specified separately from the target.
For example, a document where the gold is a json might want to use only one of the keys of
the json to define sorting classes in few shot samples. Else we take the gold.
Args:
formatted_doc (Doc): Formatted document.
Returns:
str: Class of the
"""
return formatted_doc.fewshot_sorting_class or PromptManager.doc_to_target(formatted_doc)

def add_context_to_doc(
self,
doc: Doc,
Expand Down Expand Up @@ -352,16 +367,16 @@ def _init_fewshot_sampling_balanced(
):
fewshotpool = self.task.fewshot_docs()

# rnd = random.Random(variance_seed)
random.seed(variance_seed)

# Build up balanced selection based on labels
# Sort by counts of labels
# Build up balanced selection based on fewshot_sorting_class
# (or the gold target, if the class is undefined)
label_to_instances = defaultdict(list)
for instance in fewshotpool:
target = PromptManager.doc_to_target(instance)
target = PromptManager.doc_to_fewshot_sorting_class(instance)
label_to_instances[target].append(instance)

# Sort by counts of class labels
counts_to_labels = defaultdict(list)
for label, instances in sorted(label_to_instances.items()):
counts_to_labels[len(instances)].append(label)
Expand Down
1 change: 1 addition & 0 deletions src/lighteval/tasks/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ class Doc:

# For few-shot
instruction: Optional[str] = ""
fewshot_sorting_class: Optional[str] = None # class to use to select balanced few-shot samples

# Filled when parsing and adding the few-shot context
ctx: Optional[str] = ""
Expand Down

0 comments on commit 05217d0

Please sign in to comment.