diff --git a/src/llamafactory/model/model_utils/visual.py b/src/llamafactory/model/model_utils/visual.py index a8010579..ba2bf5c9 100644 --- a/src/llamafactory/model/model_utils/visual.py +++ b/src/llamafactory/model/model_utils/visual.py @@ -76,7 +76,7 @@ def _register_composite_model( model_type=model_type, projector_key=projector_key or "multi_modal_projector", vision_model_keys=vision_model_keys or ["vision_tower"], - language_model_keys=language_model_keys or ["language_model"], + language_model_keys=language_model_keys or ["language_model", "lm_head"], lora_conflict_keys=lora_conflict_keys or [], ) @@ -200,12 +200,12 @@ def patch_target_modules( _register_composite_model( - model_type="internvl", + model_type="gemma3", ) _register_composite_model( - model_type="gemma3", + model_type="internvl", ) @@ -246,14 +246,8 @@ _register_composite_model( lora_conflict_keys=["audio_projection_layer"], ) - _register_composite_model( - model_type="paligemma", -) - - -_register_composite_model( - model_type="video_llava", + model_type="mistral3", ) @@ -264,7 +258,7 @@ _register_composite_model( _register_composite_model( - model_type="mistral3", + model_type="paligemma", ) @@ -303,3 +297,8 @@ _register_composite_model( else ["model", "lm_head"], lora_conflict_keys=["patch_embed"], ) + + +_register_composite_model( + model_type="video_llava", +)