From 7cfcd69c646ffc5b53144554ac24f1a0e025ec2e Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Wed, 24 Apr 2024 13:53:39 +0800 Subject: [PATCH] fix inference in llamaboard Former-commit-id: 5e631915157083b61e2d5a183e0c91f2d11f416e --- src/llmtuner/webui/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmtuner/webui/runner.py b/src/llmtuner/webui/runner.py index b64a015c..77d5ea98 100644 --- a/src/llmtuner/webui/runner.py +++ b/src/llmtuner/webui/runner.py @@ -222,7 +222,7 @@ class Runner: quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None, template=get("top.template"), rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None, - flash_attn=(get("top.booster") == "flashattn"), + flash_attn="fa2" if get("top.booster") == "flashattn2" else "auto", use_unsloth=(get("top.booster") == "unsloth"), dataset_dir=get("eval.dataset_dir"), dataset=",".join(get("eval.dataset")),