diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 8afc42c16..cb90eb3ec 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -833,6 +833,19 @@ register_template( ) +register_template( + name="hy3", + format_user=StringFormatter(slots=["<|hy_User|>{{content}}<|hy_Assistant|>"]), + format_assistant=StringFormatter(slots=["{{content}}<|hy_eos|>"]), + format_system=StringFormatter(slots=["{{content}}"]), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), + stop_words=["<|hy_eos|>"], + replace_eos=True, + thought_words=("", ""), + template_class=ReasoningTemplate, +) + + register_template( name="deepseekcoder", format_user=StringFormatter(slots=["### Instruction:\n{{content}}\n### Response:"]), diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 5c30ffd4b..c90fcfc8b 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -1257,6 +1257,17 @@ register_model_group( ) +register_model_group( + models={ + "Hy3-Preview": { + DownloadSource.DEFAULT: "tencent/Hy3-preview", + DownloadSource.MODELSCOPE: "tencent/Hy3-preview", + }, + }, + template="hy3", +) + + register_model_group( models={ "Index-1.9B-Base": { diff --git a/src/llamafactory/model/model_utils/moe.py b/src/llamafactory/model/model_utils/moe.py index 592e7e397..478dbf9e8 100644 --- a/src/llamafactory/model/model_utils/moe.py +++ b/src/llamafactory/model/model_utils/moe.py @@ -62,6 +62,10 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None: # deepseek v3 and kimi vl use custom code _set_z3_leaf_modules(model, ["DeepseekV3MoE"]) + if model_type == "hy_v3": + # hy3 uses custom code + _set_z3_leaf_modules(model, ["HYV3MoE"]) + if model_type == "ernie4_5_moe": from transformers.models.ernie4_5_moe.modeling_ernie4_5_moe import Ernie4_5_MoeSparseMoeBlock