mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-23 14:22:51 +08:00
[model] fix gemma3 export (#7786)
Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
This commit is contained in:
parent
ec7257e70f
commit
b8cddbc7d7
@ -122,9 +122,22 @@ def configure_quantization(
|
|||||||
if getattr(config, "model_type", None) == "chatglm":
|
if getattr(config, "model_type", None) == "chatglm":
|
||||||
raise ValueError("ChatGLM model is not supported yet.")
|
raise ValueError("ChatGLM model is not supported yet.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from optimum.gptq import utils as gq_utils
|
||||||
|
if "language_model.model.layers" not in gq_utils.BLOCK_PATTERNS:
|
||||||
|
gq_utils.BLOCK_PATTERNS.insert(0, "language_model.model.layers")
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
block_name_to_quantize = None
|
||||||
|
if getattr(config, "model_type", None) in ["gemma3", "paligemma"]:
|
||||||
|
block_name_to_quantize = "language_model.model.layers"
|
||||||
|
|
||||||
init_kwargs["quantization_config"] = GPTQConfig(
|
init_kwargs["quantization_config"] = GPTQConfig(
|
||||||
bits=model_args.export_quantization_bit,
|
bits=model_args.export_quantization_bit,
|
||||||
|
tokenizer=tokenizer,
|
||||||
dataset=_get_quantization_dataset(tokenizer, model_args),
|
dataset=_get_quantization_dataset(tokenizer, model_args),
|
||||||
|
block_name_to_quantize=block_name_to_quantize,
|
||||||
)
|
)
|
||||||
init_kwargs["device_map"] = "auto"
|
init_kwargs["device_map"] = "auto"
|
||||||
init_kwargs["max_memory"] = get_max_memory()
|
init_kwargs["max_memory"] = get_max_memory()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user