2 Commits

Author SHA1 Message Date
Shanay Mehta
184304b5b4 [model] add liger kernel support for Qwen3-Next (#10176)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 21:47:48 +08:00
Xue Yadong
d3ebd5678d [model] support GLM-OCR SFT (#10183) 2026-02-10 21:41:01 +08:00
6 changed files with 41 additions and 1 deletions

View File

@@ -1 +1 @@
liger-kernel>=0.5.5
liger-kernel>=0.6.3

View File

@@ -213,6 +213,7 @@ class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
and getattr(self.model.config, "model_type", None)
in [
"glm4v",
"glm_ocr",
"Keye",
"qwen2_vl",
"qwen2_5_vl",

View File

@@ -1061,6 +1061,22 @@ register_template(
)
# copied from glm4 template
register_template(
name="glm_ocr",
format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]),
format_assistant=StringFormatter(slots=["\n{{content}}"]),
format_system=StringFormatter(slots=["<|system|>\n{{content}}"]),
format_function=FunctionFormatter(slots=["{{content}}"], tool_format="glm4"),
format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]),
format_tools=ToolFormatter(tool_format="glm4"),
format_prefix=EmptyFormatter(slots=["[gMASK]<sop>"]),
stop_words=["<|user|>", "<|observation|>"],
efficient_eos=True,
mm_plugin=get_mm_plugin(name="glm4v", image_token="<|image|>", video_token="<|video|>"),
)
# copied from glm4_moe template
register_template(
name="glm4_7",

View File

@@ -950,6 +950,18 @@ register_model_group(
)
register_model_group(
models={
"GLM-OCR": {
DownloadSource.DEFAULT: "zai-org/GLM-OCR",
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-OCR",
},
},
template="glm_ocr",
multimodal=True,
)
register_model_group(
models={
"GLM-Z1-0414-9B-Chat": {

View File

@@ -77,6 +77,8 @@ def apply_liger_kernel(
from liger_kernel.transformers import apply_liger_kernel_to_qwen3 as apply_liger_kernel
elif model_type == "qwen3_moe":
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe as apply_liger_kernel
elif model_type == "qwen3_next":
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_next as apply_liger_kernel
elif model_type == "gpt_oss":
try:
from liger_kernel.transformers import apply_liger_kernel_to_gpt_oss as apply_liger_kernel

View File

@@ -239,6 +239,15 @@ _register_composite_model(
)
_register_composite_model(
model_type="glm_ocr",
projector_key="visual.merger",
vision_model_keys=["visual.patch_embed", "visual.blocks"],
language_model_keys=["language_model", "lm_head"],
lora_conflict_keys=["patch_embed"],
)
_register_composite_model(
model_type="internvl",
)