Skip to content

Commit

Permalink
Clarify ckpt configs
Browse files Browse the repository at this point in the history
  • Loading branch information
Muennighoff authored Jan 28, 2024
1 parent 7e35c87 commit 531c337
Showing 1 changed file with 19 additions and 1 deletion.
20 changes: 19 additions & 1 deletion olmo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,11 +669,29 @@ class ShardedCheckpointerType(StrEnum):

class ActivationCheckpointingStrategy(StrEnum):
whole_layer = "whole_layer"
"""
Checkpoint every transformer layer.
"""

one_in_two = "one_in_two"
"""
Checkpoint one in two transformer layers.
"""

one_in_three = "one_in_three"
"""
Checkpoint one in three transformer layers.
"""

one_in_four = "one_in_four"
"""
Checkpoint one in four transformer layers.
"""

fine_grained = "fine_grained"

"""
Focus checkpointing on where it is cheap to recompute and saves most memory.
"""

@dataclass
class TrainConfig(BaseConfig):
Expand Down

0 comments on commit 531c337

Please sign in to comment.