From 8c74dca76a813129c175489c85bf50e2c614091f Mon Sep 17 00:00:00 2001 From: Xunpeng Xiao <124695565+tangefly@users.noreply.github.com> Date: Thu, 18 Dec 2025 22:54:35 +0800 Subject: [PATCH] [feat] Models trained and inferred with FP8 are dequantized by default (#9627) --- src/llamafactory/model/model_utils/quantization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py index ebffbbc7f..59dfc502a 100644 --- a/src/llamafactory/model/model_utils/quantization.py +++ b/src/llamafactory/model/model_utils/quantization.py @@ -110,7 +110,7 @@ def configure_quantization( check_version("aqlm>=1.1.0", mandatory=True) quantization_config["bits"] = 2 - if quant_method == QuantizationMethod.FP8 and is_trainable: + if quant_method == QuantizationMethod.FP8: quant_config = FineGrainedFP8Config(dequantize=True) init_kwargs["quantization_config"] = quant_config