From 8efa5c9694b557adf5b7fbb3008ac9fac8239032 Mon Sep 17 00:00:00 2001 From: Kingsley Date: Mon, 11 Aug 2025 21:45:14 +0800 Subject: [PATCH] [model] support GLM4.5V (#8876) --- src/llamafactory/data/template.py | 16 ++++++++++++++++ src/llamafactory/extras/constants.py | 16 ++++++++++++++-- src/llamafactory/model/model_utils/moe.py | 5 +++++ src/llamafactory/model/model_utils/visual.py | 9 +++++++++ 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index e28d462b..ef6c7c15 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -1047,6 +1047,22 @@ register_template( ) +register_template( + name="glm45v", + format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]), + format_assistant=StringFormatter(slots=["\n{{content}}"]), + format_system=StringFormatter(slots=["<|system|>\n{{content}}"]), + format_function=FunctionFormatter(slots=["{{content}}"], tool_format="glm4_moe"), + format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]), + format_tools=ToolFormatter(tool_format="glm4_moe"), + format_prefix=EmptyFormatter(slots=["[gMASK]"]), + stop_words=["<|user|>", "<|observation|>", ""], + efficient_eos=True, + mm_plugin=get_mm_plugin(name="glm4v", image_token="<|image|>", video_token="<|video|>"), + template_class=ReasoningTemplate, +) + + # copied from glm4 template register_template( name="glmz1", diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 472ba4c5..8364e96c 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -896,11 +896,11 @@ register_model_group( DownloadSource.DEFAULT: "zai-org/GLM-4.5-Base", DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4.5-Base", }, - "GLM-4.5-Air-Chat": { + "GLM-4.5-Air-Thinking": { DownloadSource.DEFAULT: "zai-org/GLM-4.5-Air", DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4.5-Air", }, - "GLM-4.5-Chat": { + "GLM-4.5-Thinking": { DownloadSource.DEFAULT: "zai-org/GLM-4.5", DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4.5", }, @@ -909,6 +909,18 @@ register_model_group( ) +register_model_group( + models={ + "GLM-4.5V-Air-Thinking":{ + DownloadSource.DEFAULT: "zai-org/GLM-4.5V", + DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4.5V", + } + }, + template="glm45v", + multimodal=True, +) + + register_model_group( models={ "GLM-Z1-0414-9B-Chat": { diff --git a/src/llamafactory/model/model_utils/moe.py b/src/llamafactory/model/model_utils/moe.py index db7adbee..95990f2f 100644 --- a/src/llamafactory/model/model_utils/moe.py +++ b/src/llamafactory/model/model_utils/moe.py @@ -62,6 +62,11 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None: _set_z3_leaf_modules(model, [Glm4MoeMoE]) + if model_type == "glm4v_moe": + from transformers.models.glm4v_moe.modeling_glm4v_moe import Glm4vMoeTextMoE + + _set_z3_leaf_modules(model, [Glm4vMoeTextMoE]) + if model_type == "jamba": from transformers.models.jamba.modeling_jamba import JambaSparseMoeBlock diff --git a/src/llamafactory/model/model_utils/visual.py b/src/llamafactory/model/model_utils/visual.py index 1e228855..c3f1e9c1 100644 --- a/src/llamafactory/model/model_utils/visual.py +++ b/src/llamafactory/model/model_utils/visual.py @@ -221,6 +221,15 @@ _register_composite_model( ) +_register_composite_model( + model_type="glm4v_moe", + projector_key="visual.merger", + vision_model_keys=["visual.patch_embed", "visual.blocks"], + language_model_keys=["language_model", "lm_head"], + lora_conflict_keys=["patch_embed"], +) + + _register_composite_model( model_type="internvl", )