mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-04 12:42:51 +08:00
parent
bc3ced05b8
commit
c7104f8fab
@ -336,7 +336,7 @@ def patch_model(
|
|||||||
if is_trainable and getattr(model.config, "model_type", None) == "qwen2" and model_args.flash_attn:
|
if is_trainable and getattr(model.config, "model_type", None) == "qwen2" and model_args.flash_attn:
|
||||||
setattr(model.config, "use_cache", False) # qwen2 does not support use_cache when using flashattn
|
setattr(model.config, "use_cache", False) # qwen2 does not support use_cache when using flashattn
|
||||||
|
|
||||||
if is_trainable and model_args.resize_vocab:
|
if model_args.resize_vocab:
|
||||||
_resize_embedding_layer(model, tokenizer)
|
_resize_embedding_layer(model, tokenizer)
|
||||||
|
|
||||||
if is_trainable:
|
if is_trainable:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user