[feat] support LlamaFactory SFT training by HyperParallel FSDP2 backend (#10289)

2026-05-28 10:58:54 +08:00 · 2026-03-30 10:47:20 +08:00
parent b5afabe3d2
commit 97433c53b6
5 changed files with 235 additions and 2 deletions
--- a/src/llamafactory/hparams/finetuning_args.py
+++ b/src/llamafactory/hparams/finetuning_args.py
@@ -482,6 +482,24 @@ class FinetuningArguments(
            )
        },
    )
+    use_hyper_parallel: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether or not to use HyperParallel distributed training backend (FSDP/TP). "
+                "Only supported for the 'sft' stage with full fine-tuning."
+            )
+        },
+    )
+    hyper_parallel_args: str | None = field(
+        default=None,
+        metadata={
+            "help": (
+                "Path to a JSON file containing HyperParallel strategy arguments "
+                "(e.g., tp_size, param_dtype). Used when use_hyper_parallel=True."
+            )
+        },
+    )
    use_muon: bool = field(
        default=False,
        metadata={"help": "Whether or not to use the Muon optimizer."},