From a5537f3ee8f1ed7bfaad308e6b6ae6e46584ed58 Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Fri, 15 Mar 2024 19:18:42 +0800
Subject: [PATCH] fix patcher

Former-commit-id: 85c376fc1e0bcc854ed6e70e6455a0b00b341655
---
 src/llmtuner/model/patcher.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py
index 210044f2..949e9e16 100644
--- a/src/llmtuner/model/patcher.py
+++ b/src/llmtuner/model/patcher.py
@@ -279,13 +279,11 @@ def patch_config(
         model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None))
 
     if getattr(config, "model_type", None) == "qwen":
+        setattr(config, "use_flash_attn", model_args.flash_attn)
         for dtype_name, dtype in [("fp16", torch.float16), ("bf16", torch.bfloat16), ("fp32", torch.float32)]:
             setattr(config, dtype_name, model_args.compute_dtype == dtype)
 
     _configure_attn_implementation(model_args, init_kwargs)
-    if getattr(config, "model_type", None) == "qwen" and init_kwargs["attn_implementation"] != "flash_attention_2":
-        config.use_flash_attn = False
-
     _configure_rope(config, model_args, is_trainable)
     _configure_longlora(config, model_args, is_trainable)
     _configure_quantization(config, tokenizer, model_args, init_kwargs)