mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-23 14:22:51 +08:00
parent
dbe26e7cdf
commit
37c6a0c6dc
@ -89,9 +89,6 @@ def patch_config(
|
|||||||
if getattr(config, "model_type", None) == "qwen2" and is_trainable and model_args.flash_attn == "fa2":
|
if getattr(config, "model_type", None) == "qwen2" and is_trainable and model_args.flash_attn == "fa2":
|
||||||
setattr(config, "use_cache", False) # qwen2 does not support use_cache when using flash attn
|
setattr(config, "use_cache", False) # qwen2 does not support use_cache when using flash attn
|
||||||
|
|
||||||
if getattr(config, "model_type", None) == "chatglm":
|
|
||||||
require_version("transformers==4.41.2", "To fix: pip install transformers==4.41.2")
|
|
||||||
|
|
||||||
# deepspeed zero3 is not compatible with low_cpu_mem_usage
|
# deepspeed zero3 is not compatible with low_cpu_mem_usage
|
||||||
init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled())
|
init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled())
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user