[feat] Models trained and inferred with FP8 are dequantized by default (#9627)

This commit is contained in:
Xunpeng Xiao
2025-12-18 22:54:35 +08:00
committed by GitHub
parent e8deda53a1
commit 8c74dca76a

View File

@@ -110,7 +110,7 @@ def configure_quantization(
check_version("aqlm>=1.1.0", mandatory=True) check_version("aqlm>=1.1.0", mandatory=True)
quantization_config["bits"] = 2 quantization_config["bits"] = 2
if quant_method == QuantizationMethod.FP8 and is_trainable: if quant_method == QuantizationMethod.FP8:
quant_config = FineGrainedFP8Config(dequantize=True) quant_config = FineGrainedFP8Config(dequantize=True)
init_kwargs["quantization_config"] = quant_config init_kwargs["quantization_config"] = quant_config