diff --git a/src/llamafactory/model/model_utils/visual.py b/src/llamafactory/model/model_utils/visual.py index e5c39280..778b5f3c 100644 --- a/src/llamafactory/model/model_utils/visual.py +++ b/src/llamafactory/model/model_utils/visual.py @@ -350,6 +350,32 @@ _register_composite_model( lora_conflict_keys=["patch_embed"], ) +_register_composite_model( + model_type="qwen3_vl", + projector_key="visual.merger", + vision_model_keys=["visual.patch_embed", "visual.blocks"], + language_model_keys=["language_model", "lm_head"], + lora_conflict_keys=["patch_embed"], +) + + +_register_composite_model( + model_type="qwen3_vl_moe", + projector_key="visual.merger", + vision_model_keys=["visual.patch_embed", "visual.blocks"], + language_model_keys=["language_model", "lm_head"], + lora_conflict_keys=["patch_embed"], +) + + +_register_composite_model( + model_type="qwen3_omni_moe_thinker", + projector_key="visual.merger", + vision_model_keys=["visual.patch_embed", "visual.blocks", "audio_tower"], + language_model_keys=["model", "lm_head"], + lora_conflict_keys=["patch_embed"], +) + _register_composite_model( model_type="video_llava", diff --git a/tests/data/test_mm_plugin.py b/tests/data/test_mm_plugin.py index 92238b35..b4c47e55 100644 --- a/tests/data/test_mm_plugin.py +++ b/tests/data/test_mm_plugin.py @@ -332,9 +332,14 @@ def test_qwen2_omni_plugin(): image_seqlen, audio_seqlen = 4, 2 tokenizer_module = _load_tokenizer_module(model_name_or_path="Qwen/Qwen2.5-Omni-7B") qwen2_omni_plugin = get_mm_plugin( - name="qwen2_omni", audio_token="<|AUDIO|>", image_token="<|IMAGE|>", video_token="<|VIDEO|>", - vision_bos_token="<|vision_bos|>", vision_eos_token="<|vision_eos|>", - audio_bos_token="<|audio_bos|>", audio_eos_token="<|audio_eos|>" + name="qwen2_omni", + audio_token="<|AUDIO|>", + image_token="<|IMAGE|>", + video_token="<|VIDEO|>", + vision_bos_token="<|vision_bos|>", + vision_eos_token="<|vision_eos|>", + audio_bos_token="<|audio_bos|>", + audio_eos_token="<|audio_eos|>", ) check_inputs = {"plugin": qwen2_omni_plugin, **tokenizer_module} check_inputs["expected_mm_messages"] = [