Former-commit-id: 9ddbe2866a4a4433d7635659a5635d16c59800b1
This commit is contained in:
hiyouga 2024-04-02 13:58:39 +08:00
parent bc3ced05b8
commit c7104f8fab

View File

@ -336,7 +336,7 @@ def patch_model(
if is_trainable and getattr(model.config, "model_type", None) == "qwen2" and model_args.flash_attn: if is_trainable and getattr(model.config, "model_type", None) == "qwen2" and model_args.flash_attn:
setattr(model.config, "use_cache", False) # qwen2 does not support use_cache when using flashattn setattr(model.config, "use_cache", False) # qwen2 does not support use_cache when using flashattn
if is_trainable and model_args.resize_vocab: if model_args.resize_vocab:
_resize_embedding_layer(model, tokenizer) _resize_embedding_layer(model, tokenizer)
if is_trainable: if is_trainable: