Former-commit-id: eb2db2af8ec061205d7370baa8f15bdaf34ed31c
This commit is contained in:
hiyouga 2024-10-06 10:33:11 +08:00
parent 4bae540cd6
commit 2b3173e5d2

View File

@ -110,6 +110,9 @@ def patch_config(
if getattr(config, "model_type", None) == "qwen2" and is_trainable and model_args.flash_attn == "fa2":
setattr(config, "use_cache", False) # qwen2 does not support use_cache when using flash attn
if "LlavaLlamaForCausalLM" in getattr(config, "architectures", []):
raise ValueError("Please download llava models with hf-compatible format: https://huggingface.co/llava-hf")
# deepspeed zero3 is not compatible with low_cpu_mem_usage
init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled())