Former-commit-id: dac2f617bda9470ac8d85c7e9def09cc04970506
This commit is contained in:
hiyouga 2024-04-02 13:58:39 +08:00
parent 0c4a1381a4
commit 03e20bb5c6

View File

@ -336,7 +336,7 @@ def patch_model(
if is_trainable and getattr(model.config, "model_type", None) == "qwen2" and model_args.flash_attn: if is_trainable and getattr(model.config, "model_type", None) == "qwen2" and model_args.flash_attn:
setattr(model.config, "use_cache", False) # qwen2 does not support use_cache when using flashattn setattr(model.config, "use_cache", False) # qwen2 does not support use_cache when using flashattn
if is_trainable and model_args.resize_vocab: if model_args.resize_vocab:
_resize_embedding_layer(model, tokenizer) _resize_embedding_layer(model, tokenizer)
if is_trainable: if is_trainable: