[model] support audio (#6701)

* support qwen2_audio * improve code * lint * fix * fix * fix --------- Co-authored-by: hiyouga <hiyouga@buaa.edu.cn>
2026-03-12 06:55:59 +08:00 · 2025-02-05 04:59:09 +08:00
parent a5e943f7bc
commit 24c7842948
37 changed files with 736 additions and 213 deletions
--- a/src/llamafactory/model/model_utils/visual.py
+++ b/src/llamafactory/model/model_utils/visual.py
@@ -280,6 +280,12 @@ _register_composite_model(
 )


+_register_composite_model(
+    model_type="qwen2_audio",
+    vision_model_keys=["audio_tower"],
+)
+
+
 _register_composite_model(
    model_type="qwen2_vl",
    projector_key="visual.merger",