support DDP in webui

Former-commit-id: 7c016b22aa
This commit is contained in:
hiyouga
2024-05-28 19:24:22 +08:00
parent 2a473f36fb
commit 3ea8f5e6b9
19 changed files with 78 additions and 166 deletions

View File

@@ -1469,11 +1469,6 @@ ALERTS = {
"ru": "Обучение недоступно в демонстрационном режиме, сначала скопируйте пространство в частное.",
"zh": "展示模式不支持训练,请先复制到私人空间。",
},
"err_device_count": {
"en": "Multiple GPUs are not supported yet.",
"ru": "Пока не поддерживается множественные GPU.",
"zh": "尚不支持多 GPU 训练。",
},
"err_tool_name": {
"en": "Tool name not found.",
"ru": "Имя инструмента не найдено.",

View File

@@ -6,10 +6,9 @@ from typing import TYPE_CHECKING, Any, Dict, Generator, Optional
import psutil
from transformers.trainer import TRAINING_ARGS_NAME
from transformers.utils import is_torch_cuda_available
from ..extras.constants import TRAINING_STAGES
from ..extras.misc import get_device_count, torch_gc
from ..extras.misc import is_gpu_or_npu_available, torch_gc
from ..extras.packages import is_gradio_available
from .common import get_module, get_save_dir, load_args, load_config, save_args
from .locales import ALERTS
@@ -64,16 +63,13 @@ class Runner:
if not from_preview and self.demo_mode:
return ALERTS["err_demo"][lang]
if not from_preview and get_device_count() > 1:
return ALERTS["err_device_count"][lang]
if do_train:
stage = TRAINING_STAGES[get("train.training_stage")]
reward_model = get("train.reward_model")
if stage == "ppo" and not reward_model:
return ALERTS["err_no_reward_model"][lang]
if not from_preview and not is_torch_cuda_available():
if not from_preview and not is_gpu_or_npu_available():
gr.Warning(ALERTS["warn_no_cuda"][lang])
return ""
@@ -273,7 +269,6 @@ class Runner:
self.do_train, self.running_data = do_train, data
args = self._parse_train_args(data) if do_train else self._parse_eval_args(data)
env = deepcopy(os.environ)
env["CUDA_VISIBLE_DEVICES"] = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
env["LLAMABOARD_ENABLED"] = "1"
self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True)
yield from self.monitor()

View File

@@ -42,8 +42,7 @@ def clean_cmd(args: Dict[str, Any]) -> Dict[str, Any]:
def gen_cmd(args: Dict[str, Any]) -> str:
current_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
cmd_lines = ["CUDA_VISIBLE_DEVICES={} llamafactory-cli train ".format(current_devices)]
cmd_lines = ["llamafactory-cli train "]
for k, v in clean_cmd(args).items():
cmd_lines.append(" --{} {} ".format(k, str(v)))