[model] adds semantic initialization support for special tokens (#9267)

Co-authored-by: ximingxing <ximingxing@tencent.com>
2026-03-03 10:15:58 +08:00 · 2025-10-14 17:00:48 +08:00
parent 3dbca4b533
commit c867e28093
4 changed files with 264 additions and 7 deletions
--- a/src/llamafactory/model/patcher.py
+++ b/src/llamafactory/model/patcher.py
@@ -175,7 +175,12 @@ def patch_model(
        prepare_valuehead_model(model)

    if model_args.resize_vocab:
-        resize_embedding_layer(model, tokenizer)
+        resize_embedding_layer(
+            model,
+            tokenizer,
+            new_special_tokens_config=getattr(model_args, "_special_token_descriptions", None),
+            init_special_tokens=model_args.init_special_tokens,
+        )

    if is_trainable:
        if getattr(model.config, "model_type", None) == "gemma3n":