From e194efab10dc8058d3057e5aff904978c6b1a974 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Fri, 15 Mar 2024 19:18:42 +0800 Subject: [PATCH] fix patcher Former-commit-id: 6a5ad99c8cbf6b7def0a130306d49e7d1eb4e5a5 --- src/llmtuner/model/patcher.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py index 210044f2..949e9e16 100644 --- a/src/llmtuner/model/patcher.py +++ b/src/llmtuner/model/patcher.py @@ -279,13 +279,11 @@ def patch_config( model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None)) if getattr(config, "model_type", None) == "qwen": + setattr(config, "use_flash_attn", model_args.flash_attn) for dtype_name, dtype in [("fp16", torch.float16), ("bf16", torch.bfloat16), ("fp32", torch.float32)]: setattr(config, dtype_name, model_args.compute_dtype == dtype) _configure_attn_implementation(model_args, init_kwargs) - if getattr(config, "model_type", None) == "qwen" and init_kwargs["attn_implementation"] != "flash_attention_2": - config.use_flash_attn = False - _configure_rope(config, model_args, is_trainable) _configure_longlora(config, model_args, is_trainable) _configure_quantization(config, tokenizer, model_args, init_kwargs)