From 4a854dfe273ef3519e926e084f66bbf20b3e7e2c Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Wed, 24 Apr 2024 13:53:39 +0800 Subject: [PATCH] fix inference in llamaboard Former-commit-id: f36057ea0300ab089ded568fa170682e9e19c4ee --- src/llmtuner/webui/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmtuner/webui/runner.py b/src/llmtuner/webui/runner.py index b64a015c..77d5ea98 100644 --- a/src/llmtuner/webui/runner.py +++ b/src/llmtuner/webui/runner.py @@ -222,7 +222,7 @@ class Runner: quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None, template=get("top.template"), rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None, - flash_attn=(get("top.booster") == "flashattn"), + flash_attn="fa2" if get("top.booster") == "flashattn2" else "auto", use_unsloth=(get("top.booster") == "unsloth"), dataset_dir=get("eval.dataset_dir"), dataset=",".join(get("eval.dataset")),