[feat] fp8 training (#8960)

Co-authored-by: Benjamin Feuer <penfever@gmail.com> Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
2026-03-08 04:35:58 +08:00 · 2025-09-30 23:32:53 -07:00
parent e2b1594d31
commit 1c44b60e3e
8 changed files with 322 additions and 3 deletions
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -213,6 +213,23 @@ class QuantizationArguments:
        default=None,
        metadata={"help": "Device map used to infer the 4-bit quantized model, needs bitsandbytes>=0.43.0."},
    )
+    fp8: bool = field(
+        default=False,
+        metadata={
+            "help": "Enable FP8 mixed precision training via HuggingFace Accelerate. "
+            "Requires PyTorch 2.7+ and Hopper architecture GPUs."
+        },
+    )
+    fp8_backend: str = field(
+        default="auto",
+        metadata={
+            "help": "FP8 backend to use ('auto', 'torchao', 'te', 'msamp'). 'auto' selects best available backend."
+        },
+    )
+    fp8_enable_fsdp_float8_all_gather: bool = field(
+        default=False,
+        metadata={"help": "Enable FP8 optimizations for FSDP2 all-gather operations."},
+    )


@dataclass