From 35862d19ec68924db9bf6dd34146146d869491a3 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 6 Oct 2024 10:33:11 +0800 Subject: [PATCH] fix #5611 Former-commit-id: 76c813d37c1d945a8bb6d3e4168e15fbe97c7a87 --- src/llamafactory/model/patcher.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 06d41af5..126e9723 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -110,6 +110,9 @@ def patch_config( if getattr(config, "model_type", None) == "qwen2" and is_trainable and model_args.flash_attn == "fa2": setattr(config, "use_cache", False) # qwen2 does not support use_cache when using flash attn + if "LlavaLlamaForCausalLM" in getattr(config, "architectures", []): + raise ValueError("Please download llava models with hf-compatible format: https://huggingface.co/llava-hf") + # deepspeed zero3 is not compatible with low_cpu_mem_usage init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled())