mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-10-15 16:18:10 +08:00
parent
0c4a1381a4
commit
03e20bb5c6
@ -336,7 +336,7 @@ def patch_model(
|
|||||||
if is_trainable and getattr(model.config, "model_type", None) == "qwen2" and model_args.flash_attn:
|
if is_trainable and getattr(model.config, "model_type", None) == "qwen2" and model_args.flash_attn:
|
||||||
setattr(model.config, "use_cache", False) # qwen2 does not support use_cache when using flashattn
|
setattr(model.config, "use_cache", False) # qwen2 does not support use_cache when using flashattn
|
||||||
|
|
||||||
if is_trainable and model_args.resize_vocab:
|
if model_args.resize_vocab:
|
||||||
_resize_embedding_layer(model, tokenizer)
|
_resize_embedding_layer(model, tokenizer)
|
||||||
|
|
||||||
if is_trainable:
|
if is_trainable:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user