mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-06-25 08:38:55 +08:00
[webui] Fix WebUI training hang from subprocess log pipe (#10584)
Co-authored-by: 凉夜 <liangye@liangyedeMacBook-Air.local>
This commit is contained in:
@@ -16,7 +16,7 @@ import json
|
||||
import os
|
||||
from collections.abc import Generator
|
||||
from copy import deepcopy
|
||||
from subprocess import PIPE, Popen, TimeoutExpired
|
||||
from subprocess import Popen, TimeoutExpired
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from transformers.utils import is_torch_npu_available
|
||||
@@ -375,7 +375,16 @@ class Runner:
|
||||
env["FORCE_TORCHRUN"] = "1"
|
||||
|
||||
# NOTE: DO NOT USE shell=True to avoid security risk
|
||||
self.trainer = Popen(["llamafactory-cli", "train", save_cmd(args)], env=env, stderr=PIPE, text=True)
|
||||
webui_log_path = os.path.join(args["output_dir"], "webui_subprocess.log")
|
||||
webui_log = open(webui_log_path, "a", encoding="utf-8")
|
||||
self.trainer = Popen(
|
||||
["llamafactory-cli", "train", save_cmd(args)],
|
||||
env=env,
|
||||
stdout=webui_log,
|
||||
stderr=webui_log,
|
||||
text=True,
|
||||
)
|
||||
webui_log.close()
|
||||
yield from self.monitor()
|
||||
|
||||
def _build_config_dict(self, data: dict["Component", Any]) -> dict[str, Any]:
|
||||
@@ -451,6 +460,16 @@ class Runner:
|
||||
else:
|
||||
finish_log = load_eval_results(os.path.join(output_path, "all_results.json")) + "\n\n" + running_log
|
||||
else:
|
||||
if stderr is None:
|
||||
webui_log_path = os.path.join(output_path, "webui_subprocess.log")
|
||||
if os.path.exists(webui_log_path):
|
||||
with open(webui_log_path, "rb") as f:
|
||||
f.seek(0, os.SEEK_END)
|
||||
f.seek(max(f.tell() - 20000, 0))
|
||||
stderr = f.read().decode("utf-8", errors="replace")
|
||||
else:
|
||||
stderr = "No subprocess log file found."
|
||||
|
||||
print(stderr)
|
||||
finish_info = ALERTS["err_failed"][lang]
|
||||
finish_log = ALERTS["err_failed"][lang] + f" Exit code: {return_code}\n\n```\n{stderr}\n```\n"
|
||||
|
||||
Reference in New Issue
Block a user