diff --git a/src/llamafactory/model/model_utils/moe.py b/src/llamafactory/model/model_utils/moe.py index d7c19c44c..fdff829dc 100644 --- a/src/llamafactory/model/model_utils/moe.py +++ b/src/llamafactory/model/model_utils/moe.py @@ -142,6 +142,10 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None: _set_z3_leaf_modules(model, [Qwen3OmniMoeThinkerTextSparseMoeBlock]) + if model_type == "qwen3_next": + from transformers.models.qwen3_next.modeling_qwen3_next import Qwen3NextSparseMoeBlock + + _set_z3_leaf_modules(model, [Qwen3NextSparseMoeBlock]) def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None: if not is_trainable or not model_args.moe_aux_loss_coef: