mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-23 23:30:36 +08:00
[feat] Models trained and inferred with FP8 are dequantized by default (#9627)
This commit is contained in:
@@ -110,7 +110,7 @@ def configure_quantization(
|
||||
check_version("aqlm>=1.1.0", mandatory=True)
|
||||
quantization_config["bits"] = 2
|
||||
|
||||
if quant_method == QuantizationMethod.FP8 and is_trainable:
|
||||
if quant_method == QuantizationMethod.FP8:
|
||||
quant_config = FineGrainedFP8Config(dequantize=True)
|
||||
init_kwargs["quantization_config"] = quant_config
|
||||
|
||||
|
||||
Reference in New Issue
Block a user