Skip to content

Commit

Permalink
Sync eval changes in OLMo/ladder-1xC to here (#122)
Browse files Browse the repository at this point in the history
This adds scaling law eval sets as in-loop.

Testing of metric:
https://legacy.beaker.org/ex/01JF4NNA49YJGC55P3Q5FPEAPA/tasks/01JF4NNA4HM9Q90BQNQ99XSJ9Y/job/01JF4P6XRZVTDXWC3J2559R0K5
```
2024-12-15T08:21:11.301073649Z 2024-12-15 08:21:11.300	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:68	INFO	Running downstream evals...
2024-12-15T08:21:14.829675802Z 2024-12-15 08:21:14.829	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=5/75]
2024-12-15T08:21:14.940428448Z 2024-12-15 08:21:14.940	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=10/75]
2024-12-15T08:21:15.049435484Z 2024-12-15 08:21:15.049	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=15/75]
2024-12-15T08:21:15.157967512Z 2024-12-15 08:21:15.157	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=20/75]
2024-12-15T08:21:15.267427337Z 2024-12-15 08:21:15.267	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=25/75]
2024-12-15T08:21:15.375047960Z 2024-12-15 08:21:15.374	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=30/75]
2024-12-15T08:21:15.483513780Z 2024-12-15 08:21:15.483	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=35/75]
2024-12-15T08:21:15.594538312Z 2024-12-15 08:21:15.594	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=40/75]
2024-12-15T08:21:15.702422918Z 2024-12-15 08:21:15.702	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=45/75]
2024-12-15T08:21:15.811504739Z 2024-12-15 08:21:15.811	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=50/75]
2024-12-15T08:21:15.919817749Z 2024-12-15 08:21:15.919	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=55/75]
2024-12-15T08:21:16.026753004Z 2024-12-15 08:21:16.026	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=60/75]
2024-12-15T08:21:16.133501599Z 2024-12-15 08:21:16.133	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=65/75]
2024-12-15T08:21:16.240990822Z 2024-12-15 08:21:16.240	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=70/75]
2024-12-15T08:21:16.348730485Z 2024-12-15 08:21:16.348	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:111	INFO	[eval=downstream,step=75/75]
2024-12-15T08:21:17.056109188Z 2024-12-15 08:21:17.055	d22e6d646321:0	olmo_core.train.callbacks.evaluator_callback:104	INFO	Eval metrics:
2024-12-15T08:21:17.056129669Z     arc_challenge_val_rc_5shot (len_norm)=0.2441
2024-12-15T08:21:17.056131828Z     arc_challenge_val_rc_5shot (ce_loss)=2.472
2024-12-15T08:21:17.056133529Z     arc_challenge_val_rc_5shot (bpb)=3.565
2024-12-15T08:21:17.056134965Z     arc_challenge_val_rc_5shot (soft)=0.2539
2024-12-15T08:21:17.056136416Z     arc_challenge_val_rc_5shot (soft_log)=-1.46E+00
```

To see things in Comet:
https://www.comet.com/ai2/olmo-core-1b/7a3614872861484dbc7ad651ad5c9e35
  • Loading branch information
liujch1998 authored Dec 19, 2024
1 parent 0789479 commit ee27348
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- Added support for tensor parallelism. See the `TransformerConfig` class for usage.
- Added more downstream tasks from the model ladder.
- Added `io.copy_dir()` function.
- Added new LR schedulers: `LinearWithWarmup`, `InvSqrtWithWarmup`, `ConstantWithWarmup`, `SequentialScheduler`.
- Added option to pre-download checkpoint files from remote storage before trying to load a checkpoint.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ dependencies = [
"omegaconf",
"safetensors",
"importlib_resources",
"ai2-olmo-eval==0.2.0",
"ai2-olmo-eval==0.5.0",
]

[project.urls]
Expand Down
17 changes: 11 additions & 6 deletions src/olmo_core/train/callbacks/evaluator_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ def post_step(self):
metrics = []
with cuda_sync_debug_mode(0):
for name, value in evaluator.compute_metrics().items():
value = value.item()
metrics.append(f" {name}={format_float(value)}")
self.trainer.record_metric(f"eval/{evaluator.name}/{name}", value)
log.info("Eval metrics:\n" + "\n".join(metrics))
Expand Down Expand Up @@ -161,6 +160,8 @@ class DownstreamEvaluator(Evaluator):
"pmi_dc": "PMI-DC accuracy",
"ce_loss": "CE loss",
"bpb": "BPB",
"soft": "soft loss",
"soft_log": "log soft loss",
}

def __init__(
Expand All @@ -184,13 +185,14 @@ def __init__(
if is_distributed():
sampler = DistributedSampler(
self.task, # type: ignore
drop_last=True,
drop_last=False,
shuffle=False,
num_replicas=get_world_size(dp_process_group),
rank=get_rank(dp_process_group),
)

rank_batch_size_instances = max(0, rank_batch_size // self.task.max_sequence_length)

log.info(
f"Using per-rank batch size of {rank_batch_size_instances} instances "
f"for downstream eval task '{task}' with max sequence length {self.task.max_sequence_length:,d} tokens"
Expand All @@ -215,9 +217,12 @@ def update_metrics(
self.metric.update(batch, logits)

def compute_metrics(self) -> Dict[str, torch.Tensor]:
value = self.metric.compute()
label = f"{self.label} ({self.metric_type_to_label[self.task.metric_type]})"
return {label: value}
metric_type_to_value = self.metric.compute()
outputs = {}
for metric_type, value in metric_type_to_value.items():
key = f"{self.label} ({self.metric_type_to_label[metric_type]})"
outputs[key] = value.item()
return outputs

def reset_metrics(self) -> None:
self.metric.reset()
Expand All @@ -227,7 +232,7 @@ def reset_metrics(self) -> None:
class DownstreamEvaluatorCallbackConfig(CallbackConfig):
tasks: List[str]
tokenizer: TokenizerConfig
eval_batch_size: Optional[int] = None
eval_batch_size: Optional[int] = None # NOTE: this counts in number of tokens
eval_interval: int = 1000
eval_duration: Duration = field(default_factory=lambda: Duration.epochs(1))
log_interval: int = 5
Expand Down
45 changes: 45 additions & 0 deletions src/scripts/train/OLMo2-1B.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
from olmo_core.optim import AdamWConfig, OptimGroupOverride
from olmo_core.train import TrainerConfig
from olmo_core.train.callbacks import CheckpointerCallback, CometCallback, WandBCallback
from olmo_core.train.callbacks.evaluator_callback import (
DownstreamEvaluatorCallbackConfig,
)


def build_model_config(common: CommonComponents) -> TransformerConfig:
Expand Down Expand Up @@ -73,6 +76,48 @@ def build_trainer_config(common: CommonComponents) -> TrainerConfig:
cancel_check_interval=10,
),
)
.with_callback(
"downstream_evaluator",
DownstreamEvaluatorCallbackConfig(
tasks=[
"arc_challenge_val_rc_5shot",
"arc_challenge_val_mc_5shot",
"arc_challenge_test_rc_5shot",
"arc_challenge_test_mc_5shot",
"arc_easy_val_rc_5shot",
"arc_easy_val_mc_5shot",
"arc_easy_test_rc_5shot",
"arc_easy_test_mc_5shot",
"boolq_val_rc_5shot",
"boolq_val_mc_5shot",
"csqa_val_rc_5shot",
"csqa_val_mc_5shot",
"hellaswag_val_rc_5shot",
"hellaswag_val_mc_5shot",
"openbookqa_val_rc_5shot",
"openbookqa_val_mc_5shot",
"openbookqa_test_rc_5shot",
"openbookqa_test_mc_5shot",
"piqa_val_rc_5shot",
"piqa_val_mc_5shot",
"socialiqa_val_rc_5shot",
"socialiqa_val_mc_5shot",
"winogrande_val_rc_5shot",
"winogrande_val_mc_5shot",
"mmlu_stem_val_rc_5shot",
"mmlu_stem_val_mc_5shot",
"mmlu_humanities_val_rc_5shot",
"mmlu_humanities_val_mc_5shot",
"mmlu_social_sciences_val_rc_5shot",
"mmlu_social_sciences_val_mc_5shot",
"mmlu_other_val_rc_5shot",
"mmlu_other_val_mc_5shot",
],
tokenizer=common.tokenizer,
eval_batch_size=1024 * 4096,
eval_interval=1000,
),
)
)


Expand Down

0 comments on commit ee27348

Please sign in to comment.