mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-23 23:30:36 +08:00
[feat] Models trained and inferred with FP8 are dequantized by default (#9627)
This commit is contained in:
@@ -110,7 +110,7 @@ def configure_quantization(
|
|||||||
check_version("aqlm>=1.1.0", mandatory=True)
|
check_version("aqlm>=1.1.0", mandatory=True)
|
||||||
quantization_config["bits"] = 2
|
quantization_config["bits"] = 2
|
||||||
|
|
||||||
if quant_method == QuantizationMethod.FP8 and is_trainable:
|
if quant_method == QuantizationMethod.FP8:
|
||||||
quant_config = FineGrainedFP8Config(dequantize=True)
|
quant_config = FineGrainedFP8Config(dequantize=True)
|
||||||
init_kwargs["quantization_config"] = quant_config
|
init_kwargs["quantization_config"] = quant_config
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user