mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-02-26 07:45:59 +08:00
[model] support GLM-4.7-Flash SFT (#10173)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -459,6 +459,18 @@ class ReasoningTemplate(Template):
|
|||||||
return [(encoded_messages[i], encoded_messages[i + 1]) for i in range(0, len(encoded_messages), 2)]
|
return [(encoded_messages[i], encoded_messages[i + 1]) for i in range(0, len(encoded_messages), 2)]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Glm47ReasoningTemplate(ReasoningTemplate):
|
||||||
|
r"""GLM-4.7 uses only the closing </think> tag for empty thinking blocks."""
|
||||||
|
|
||||||
|
@override
|
||||||
|
def add_thought(self, content: str = "") -> str:
|
||||||
|
if not content:
|
||||||
|
return self.thought_words[1]
|
||||||
|
|
||||||
|
return self.thought_words[0] + content + self.thought_words[1]
|
||||||
|
|
||||||
|
|
||||||
TEMPLATES: dict[str, "Template"] = {}
|
TEMPLATES: dict[str, "Template"] = {}
|
||||||
|
|
||||||
|
|
||||||
@@ -1049,6 +1061,23 @@ register_template(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# copied from glm4_moe template
|
||||||
|
register_template(
|
||||||
|
name="glm4_7",
|
||||||
|
format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]),
|
||||||
|
format_assistant=StringFormatter(slots=["\n{{content}}"]),
|
||||||
|
format_system=StringFormatter(slots=["<|system|>\n{{content}}"]),
|
||||||
|
format_function=FunctionFormatter(slots=["{{content}}"], tool_format="glm4_moe"),
|
||||||
|
format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]),
|
||||||
|
format_tools=ToolFormatter(tool_format="glm4_moe"),
|
||||||
|
format_prefix=EmptyFormatter(slots=["[gMASK]<sop>"]),
|
||||||
|
stop_words=["<|user|>", "<|observation|>"],
|
||||||
|
thought_words=("<think>", "</think>"),
|
||||||
|
efficient_eos=True,
|
||||||
|
template_class=Glm47ReasoningTemplate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# copied from glm4 template
|
# copied from glm4 template
|
||||||
register_template(
|
register_template(
|
||||||
name="glmz1",
|
name="glmz1",
|
||||||
|
|||||||
@@ -939,6 +939,17 @@ register_model_group(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_group(
|
||||||
|
models={
|
||||||
|
"GLM-4.7-Flash": {
|
||||||
|
DownloadSource.DEFAULT: "zai-org/GLM-4.7-Flash",
|
||||||
|
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4.7-Flash",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
template="glm4_7",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"GLM-Z1-0414-9B-Chat": {
|
"GLM-Z1-0414-9B-Chat": {
|
||||||
|
|||||||
@@ -77,6 +77,11 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None:
|
|||||||
|
|
||||||
_set_z3_leaf_modules(model, [Glm4MoeMoE])
|
_set_z3_leaf_modules(model, [Glm4MoeMoE])
|
||||||
|
|
||||||
|
if model_type == "glm4_moe_lite":
|
||||||
|
from transformers.models.glm4_moe_lite.modeling_glm4_moe_lite import Glm4MoeLiteMoE
|
||||||
|
|
||||||
|
_set_z3_leaf_modules(model, [Glm4MoeLiteMoE])
|
||||||
|
|
||||||
if model_type == "glm4v_moe":
|
if model_type == "glm4v_moe":
|
||||||
from transformers.models.glm4v_moe.modeling_glm4v_moe import Glm4vMoeTextMoE
|
from transformers.models.glm4v_moe.modeling_glm4v_moe import Glm4vMoeTextMoE
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user