mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-05-05 07:38:55 +08:00
[model] support Hy3-Preview (#10432)
This commit is contained in:
@@ -833,6 +833,19 @@ register_template(
|
||||
)
|
||||
|
||||
|
||||
register_template(
|
||||
name="hy3",
|
||||
format_user=StringFormatter(slots=["<|hy_User|>{{content}}<|hy_Assistant|>"]),
|
||||
format_assistant=StringFormatter(slots=["{{content}}<|hy_eos|>"]),
|
||||
format_system=StringFormatter(slots=["{{content}}"]),
|
||||
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
|
||||
stop_words=["<|hy_eos|>"],
|
||||
replace_eos=True,
|
||||
thought_words=("<think>", "</think>"),
|
||||
template_class=ReasoningTemplate,
|
||||
)
|
||||
|
||||
|
||||
register_template(
|
||||
name="deepseekcoder",
|
||||
format_user=StringFormatter(slots=["### Instruction:\n{{content}}\n### Response:"]),
|
||||
|
||||
@@ -1257,6 +1257,17 @@ register_model_group(
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"Hy3-Preview": {
|
||||
DownloadSource.DEFAULT: "tencent/Hy3-preview",
|
||||
DownloadSource.MODELSCOPE: "tencent/Hy3-preview",
|
||||
},
|
||||
},
|
||||
template="hy3",
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"Index-1.9B-Base": {
|
||||
|
||||
@@ -62,6 +62,10 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None:
|
||||
# deepseek v3 and kimi vl use custom code
|
||||
_set_z3_leaf_modules(model, ["DeepseekV3MoE"])
|
||||
|
||||
if model_type == "hy_v3":
|
||||
# hy3 uses custom code
|
||||
_set_z3_leaf_modules(model, ["HYV3MoE"])
|
||||
|
||||
if model_type == "ernie4_5_moe":
|
||||
from transformers.models.ernie4_5_moe.modeling_ernie4_5_moe import Ernie4_5_MoeSparseMoeBlock
|
||||
|
||||
|
||||
Reference in New Issue
Block a user