[model] fix gemma3 export (#7786)

Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
This commit is contained in:
ddddng 2025-04-21 23:07:11 +08:00 committed by GitHub
parent ec7257e70f
commit b8cddbc7d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -122,9 +122,22 @@ def configure_quantization(
if getattr(config, "model_type", None) == "chatglm":
raise ValueError("ChatGLM model is not supported yet.")
try:
from optimum.gptq import utils as gq_utils
if "language_model.model.layers" not in gq_utils.BLOCK_PATTERNS:
gq_utils.BLOCK_PATTERNS.insert(0, "language_model.model.layers")
except ImportError:
pass
block_name_to_quantize = None
if getattr(config, "model_type", None) in ["gemma3", "paligemma"]:
block_name_to_quantize = "language_model.model.layers"
init_kwargs["quantization_config"] = GPTQConfig(
bits=model_args.export_quantization_bit,
tokenizer=tokenizer,
dataset=_get_quantization_dataset(tokenizer, model_args),
block_name_to_quantize=block_name_to_quantize,
)
init_kwargs["device_map"] = "auto"
init_kwargs["max_memory"] = get_max_memory()