mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-04-27 18:29:08 +08:00
[v1] add deepspeed zero3 trigger for low memory usage weight loading (#10300)
This commit is contained in:
@@ -54,7 +54,7 @@ class TrainingArguments:
|
||||
metadata={"help": "Maximum gradient norm for training."},
|
||||
)
|
||||
bf16: bool = field(
|
||||
default=False,
|
||||
default=True,
|
||||
metadata={"help": "Use bf16 for training."},
|
||||
)
|
||||
batching_strategy: BatchingStrategy = field(
|
||||
@@ -66,7 +66,7 @@ class TrainingArguments:
|
||||
metadata={"help": "Number of workers for batching."},
|
||||
)
|
||||
enable_activation_checkpointing: bool = field(
|
||||
default=False,
|
||||
default=True,
|
||||
metadata={"help": "Enable activation checkpointing for training."},
|
||||
)
|
||||
dist_config: PluginConfig | None = field(
|
||||
|
||||
Reference in New Issue
Block a user