fix inference in llamaboard

Former-commit-id: 5e631915157083b61e2d5a183e0c91f2d11f416e
2026-02-07 14:32:23 +08:00 · 2024-04-24 13:53:39 +08:00
parent a5eabbe933
commit 7cfcd69c64
1 changed files with 1 additions and 1 deletions
--- a/src/llmtuner/webui/runner.py
+++ b/src/llmtuner/webui/runner.py
@@ -222,7 +222,7 @@ class Runner:
            quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None,
            template=get("top.template"),
            rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None,
-            flash_attn=(get("top.booster") == "flashattn"),
+            flash_attn="fa2" if get("top.booster") == "flashattn2" else "auto",
            use_unsloth=(get("top.booster") == "unsloth"),
            dataset_dir=get("eval.dataset_dir"),
            dataset=",".join(get("eval.dataset")),