[model] update constants (#10220)

2026-06-19 13:48:55 +08:00 · 2026-02-26 21:13:56 +08:00
parent 2b8b871475
commit 122cd46084
12 changed files with 69 additions and 40 deletions
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -2810,6 +2810,29 @@ register_model_group(
 )


+register_model_group(
+    models={
+        "Qwen3.5-27B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3.5-27B",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-27B",
+        },
+        "Qwen3.5-35B-A3B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3.5-35B-A3B",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-35B-A3B",
+        },
+        "Qwen3.5-122B-A10B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3.5-122B-A10B",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-122B-A10B",
+        },
+        "Qwen3.5-397B-A17B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3.5-397B-A17B",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-397B-A17B",
+        },
+    },
+    template="qwen3_5",
+)
+
+
 register_model_group(
    models={
        "Qwen2-Audio-7B": {
--- a/src/llamafactory/model/model_utils/moe.py
+++ b/src/llamafactory/model/model_utils/moe.py
@@ -147,6 +147,7 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None:

        _set_z3_leaf_modules(model, [Qwen3NextSparseMoeBlock])

+
 def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
    if not is_trainable or not model_args.moe_aux_loss_coef:
        return
--- a/src/llamafactory/train/mca/workflow.py
+++ b/src/llamafactory/train/mca/workflow.py
@@ -110,6 +110,7 @@ def _freeze_model_parameters(model: Any, finetuning_args: "FinetuningArguments")
            if any(name.startswith(k) for k in params_to_freeze):
                p.requires_grad_(False)

+
 def run_pt(
    model_args: "ModelArguments",
    data_args: "DataArguments",