From c1f5f8fff65d54805fc9dbc408ca69ae59cced24 Mon Sep 17 00:00:00 2001
From: Hertz <2267379130@qq.com>
Date: Tue, 9 Dec 2025 11:06:42 +0800
Subject: [PATCH] [model] support GLM4.6v (#9586)

---
 src/llamafactory/data/template.py      | 9 ++++++---
 src/llamafactory/extras/constants.py   | 8 ++++++++
 src/llamafactory/train/sft/workflow.py | 7 +++++--
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 604c4301..b511fe14 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -199,9 +199,12 @@ class Template:
             logger.info_rank0(f"Add pad token: {tokenizer.pad_token}")
 
         if stop_words:
-            num_added_tokens = tokenizer.add_special_tokens(
-                dict(additional_special_tokens=stop_words), replace_additional_special_tokens=False
-            )
+            try:
+                num_added_tokens = tokenizer.add_special_tokens(
+                    dict(additional_special_tokens=stop_words), replace_additional_special_tokens=False
+                )
+            except TypeError:
+                num_added_tokens = tokenizer.add_special_tokens(dict(additional_special_tokens=stop_words))
             logger.info_rank0("Add {} to stop words.".format(",".join(stop_words)))
             if num_added_tokens > 0:
                 logger.warning_rank0("New tokens have been added, make sure `resize_vocab` is True.")
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index e03740a1..6f763153 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -969,6 +969,14 @@ register_model_group(
             DownloadSource.DEFAULT: "zai-org/GLM-4.1V-9B-Thinking",
             DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4.1V-9B-Thinking",
         },
+        "GLM-4.6V": {
+            DownloadSource.DEFAULT: "zai-org/GLM-4.6V",
+            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4.6V",
+        },
+        "GLM-4.6V-Flash": {
+            DownloadSource.DEFAULT: "zai-org/GLM-4.6V-Flash",
+            DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4.6V-Flash",
+        },
     },
     template="glm4v",
     multimodal=True,
diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py
index 7acf7595..c5470258 100644
--- a/src/llamafactory/train/sft/workflow.py
+++ b/src/llamafactory/train/sft/workflow.py
@@ -78,8 +78,11 @@ def run_sft(
 
     # Compatible with Transformers v4 and Transformers v5
     if is_transformers_version_greater_than("5.0.0RC0"):
-        extra_special_tokens = getattr(tokenizer, "_extra_special_tokens", [])
-        extra_ids = tokenizer.convert_tokens_to_ids(extra_special_tokens)
+        extra_ids = getattr(tokenizer, "additional_special_tokens_ids", None)
+        if not isinstance(extra_ids, list):
+            extra_special_tokens = getattr(tokenizer, "_extra_special_tokens", [])
+            string_tokens = [str(t) for t in extra_special_tokens]
+            extra_ids = tokenizer.convert_tokens_to_ids(string_tokens)
         all_eos_ids = [tokenizer.eos_token_id] + [i for i in extra_ids if i != -1]
         unique_eos_ids = list(dict.fromkeys(all_eos_ids))
         gen_kwargs["eos_token_id"] = unique_eos_ids