diff --git a/src/lighteval/tasks/extended/lcb/main.py b/src/lighteval/tasks/extended/lcb/main.py index d2c96962..088f35d0 100644 --- a/src/lighteval/tasks/extended/lcb/main.py +++ b/src/lighteval/tasks/extended/lcb/main.py @@ -34,6 +34,7 @@ import numpy as np from aenum import extend_enum +from datasets import get_dataset_config_names from lighteval.metrics.metrics import MetricCategory, Metrics, MetricUseCase, SampleLevelMetric from lighteval.tasks.extended.lcb.codegen_metrics import ( @@ -114,21 +115,28 @@ def codegen_metric(predictions: list[str], formatted_doc: Doc, **kwargs) -> floa extend_enum(Metrics, "lcb_codegen_metric", lcb_codegen_metric) - -task = LightevalTaskConfig( - name="lcb:codegeneration", - suite=["extended"], - prompt_function=lcb_codegeneration_prompt_fn, - hf_repo="livecodebench/code_generation_lite", - hf_subset="v4_v5", # https://github.com/LiveCodeBench/LiveCodeBench/tree/main?tab=readme-ov-file#dataset-versions - hf_avail_splits=["test"], - evaluation_splits=["test"], - generation_size=32768, - metric=[Metrics.lcb_codegen_metric], - stop_sequence=[], # no stop sequence, will use EOS token - trust_dataset=True, - version=0, -) +configs = get_dataset_config_names("livecodebench/code_generation_lite", trust_remote_code=True) + +tasks = [] + +for subset in configs: + # To keep the base subset as the default, the others are named "lcb:codegeneration_v4", "lcb:codegeneration_v5"... etc + name = "lcb:codegeneration" if subset == "v4_v5" else f"lcb:codegeneration_{subset}" + task = LightevalTaskConfig( + name=name, + suite=["extended"], + prompt_function=lcb_codegeneration_prompt_fn, + hf_repo="livecodebench/code_generation_lite", + hf_subset=subset, # https://github.com/LiveCodeBench/LiveCodeBench/tree/main?tab=readme-ov-file#dataset-versions + hf_avail_splits=["test"], + evaluation_splits=["test"], + generation_size=32768, + metric=[Metrics.lcb_codegen_metric], + stop_sequence=[], # no stop sequence, will use EOS token + trust_dataset=True, + version=0, + ) + tasks.append(task) -TASKS_TABLE = [task] +TASKS_TABLE = tasks