mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-04-27 18:29:08 +08:00
[v1] support resume training from checkpoint (#10280)
Co-authored-by: frozenleaves <frozen@Mac.local> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -85,6 +85,28 @@ class TrainingArguments:
|
||||
default=42,
|
||||
metadata={"help": "Random seed that will be set at the beginning of training."},
|
||||
)
|
||||
resume_from_checkpoint: str | None = field(
|
||||
default=None,
|
||||
metadata={"help": "Path to a checkpoint directory to resume training from, or 'auto' to find the latest."},
|
||||
)
|
||||
save_steps: int | None = field(
|
||||
default=None,
|
||||
metadata={"help": "Save a training checkpoint every N global steps."},
|
||||
)
|
||||
save_epochs: float | None = field(
|
||||
default=None,
|
||||
metadata={"help": "Save a training checkpoint every N epochs."},
|
||||
)
|
||||
save_ckpt_as_hf: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Save intermediate checkpoints in HuggingFace format instead of distributed format. Warning: doubles memory usage."
|
||||
},
|
||||
)
|
||||
save_total_limit: int | None = field(
|
||||
default=None,
|
||||
metadata={"help": "Maximum number of checkpoints to keep. Oldest checkpoints are deleted."},
|
||||
)
|
||||
logging_steps: int = field(
|
||||
default=1,
|
||||
metadata={"help": "Log metrics every N optimizer steps."},
|
||||
|
||||
Reference in New Issue
Block a user