mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-05-28 10:58:54 +08:00
[feat] support LlamaFactory SFT training by HyperParallel FSDP2 backend (#10289)
This commit is contained in:
@@ -482,6 +482,24 @@ class FinetuningArguments(
|
||||
)
|
||||
},
|
||||
)
|
||||
use_hyper_parallel: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to use HyperParallel distributed training backend (FSDP/TP). "
|
||||
"Only supported for the 'sft' stage with full fine-tuning."
|
||||
)
|
||||
},
|
||||
)
|
||||
hyper_parallel_args: str | None = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": (
|
||||
"Path to a JSON file containing HyperParallel strategy arguments "
|
||||
"(e.g., tp_size, param_dtype). Used when use_hyper_parallel=True."
|
||||
)
|
||||
},
|
||||
)
|
||||
use_muon: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whether or not to use the Muon optimizer."},
|
||||
|
||||
Reference in New Issue
Block a user