[model] support GLM-OCR SFT (#10183)

2026-05-29 03:18:56 +08:00 · 2026-02-10 21:41:01 +08:00
parent 1d5e8ebcd0
commit d3ebd5678d
4 changed files with 38 additions and 0 deletions
--- a/src/llamafactory/data/collator.py
+++ b/src/llamafactory/data/collator.py
@@ -213,6 +213,7 @@ class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
            and getattr(self.model.config, "model_type", None)
            in [
                "glm4v",
+                "glm_ocr",
                "Keye",
                "qwen2_vl",
                "qwen2_5_vl",
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -1061,6 +1061,22 @@ register_template(
 )


+# copied from glm4 template
+register_template(
+    name="glm_ocr",
+    format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]),
+    format_assistant=StringFormatter(slots=["\n{{content}}"]),
+    format_system=StringFormatter(slots=["<|system|>\n{{content}}"]),
+    format_function=FunctionFormatter(slots=["{{content}}"], tool_format="glm4"),
+    format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]),
+    format_tools=ToolFormatter(tool_format="glm4"),
+    format_prefix=EmptyFormatter(slots=["[gMASK]<sop>"]),
+    stop_words=["<|user|>", "<|observation|>"],
+    efficient_eos=True,
+    mm_plugin=get_mm_plugin(name="glm4v", image_token="<|image|>", video_token="<|video|>"),
+)
+
+
 # copied from glm4_moe template
 register_template(
    name="glm4_7",