From 8792f06161c2e0240a9762eab61e41cfb3ea5580 Mon Sep 17 00:00:00 2001 From: summernight Date: Wed, 17 Jun 2026 15:36:40 +0800 Subject: [PATCH] [webui] Fix WebUI training hang from subprocess log pipe (#10584) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: 凉夜 --- src/llamafactory/webui/runner.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 9c772f5dc..6de99719b 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -16,7 +16,7 @@ import json import os from collections.abc import Generator from copy import deepcopy -from subprocess import PIPE, Popen, TimeoutExpired +from subprocess import Popen, TimeoutExpired from typing import TYPE_CHECKING, Any from transformers.utils import is_torch_npu_available @@ -375,7 +375,16 @@ class Runner: env["FORCE_TORCHRUN"] = "1" # NOTE: DO NOT USE shell=True to avoid security risk - self.trainer = Popen(["llamafactory-cli", "train", save_cmd(args)], env=env, stderr=PIPE, text=True) + webui_log_path = os.path.join(args["output_dir"], "webui_subprocess.log") + webui_log = open(webui_log_path, "a", encoding="utf-8") + self.trainer = Popen( + ["llamafactory-cli", "train", save_cmd(args)], + env=env, + stdout=webui_log, + stderr=webui_log, + text=True, + ) + webui_log.close() yield from self.monitor() def _build_config_dict(self, data: dict["Component", Any]) -> dict[str, Any]: @@ -451,6 +460,16 @@ class Runner: else: finish_log = load_eval_results(os.path.join(output_path, "all_results.json")) + "\n\n" + running_log else: + if stderr is None: + webui_log_path = os.path.join(output_path, "webui_subprocess.log") + if os.path.exists(webui_log_path): + with open(webui_log_path, "rb") as f: + f.seek(0, os.SEEK_END) + f.seek(max(f.tell() - 20000, 0)) + stderr = f.read().decode("utf-8", errors="replace") + else: + stderr = "No subprocess log file found." + print(stderr) finish_info = ALERTS["err_failed"][lang] finish_log = ALERTS["err_failed"][lang] + f" Exit code: {return_code}\n\n```\n{stderr}\n```\n"