diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index 2cec4f75..bec6c507 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -41,13 +41,12 @@ def create_top() -> Dict[str, "Component"]: finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1) checkpoint_path = gr.Dropdown(multiselect=True, allow_custom_value=True, scale=6) - with gr.Accordion(open=False) as advanced_tab: - with gr.Row(): - quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True, scale=2) - quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes", scale=2) - template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=2) - rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=3) - booster = gr.Radio(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto", scale=5) + with gr.Row(): + quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True, scale=2) + quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes", scale=2) + template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=2) + rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=3) + booster = gr.Radio(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto", scale=5) model_name.change(get_model_info, [model_name], [model_path, template], queue=False).then( list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False @@ -66,7 +65,6 @@ def create_top() -> Dict[str, "Component"]: model_path=model_path, finetuning_type=finetuning_type, checkpoint_path=checkpoint_path, - advanced_tab=advanced_tab, quantization_bit=quantization_bit, quantization_method=quantization_method, template=template, diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index a167fdeb..6766cbb0 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -91,7 +91,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: save_steps = gr.Slider(minimum=10, maximum=5000, value=100, step=10) warmup_steps = gr.Slider(minimum=0, maximum=5000, value=0, step=1) neftune_alpha = gr.Slider(minimum=0, maximum=10, value=0, step=0.1) - optim = gr.Textbox(value="adamw_torch") + extra_args = gr.Textbox(value='{"optim": "adamw_torch"}') with gr.Row(): with gr.Column(): @@ -116,7 +116,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: save_steps, warmup_steps, neftune_alpha, - optim, + extra_args, packing, neat_packing, train_on_prompt, @@ -134,7 +134,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: save_steps=save_steps, warmup_steps=warmup_steps, neftune_alpha=neftune_alpha, - optim=optim, + extra_args=extra_args, packing=packing, neat_packing=neat_packing, train_on_prompt=train_on_prompt, diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index 5fc9dda9..7e2d5bb9 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -87,20 +87,6 @@ LOCALES = { "label": "체크포인트 경로", }, }, - "advanced_tab": { - "en": { - "label": "Advanced configurations", - }, - "ru": { - "label": "Расширенные конфигурации", - }, - "zh": { - "label": "高级设置", - }, - "ko": { - "label": "고급 설정", - }, - }, "quantization_bit": { "en": { "label": "Quantization bit", @@ -581,11 +567,11 @@ LOCALES = { }, "neftune_alpha": { "en": { - "label": "NEFTune Alpha", + "label": "NEFTune alpha", "info": "Magnitude of noise adding to embedding vectors.", }, "ru": { - "label": "NEFTune Alpha", + "label": "NEFTune alpha", "info": "Величина шума, добавляемого к векторам вложений.", }, "zh": { @@ -597,22 +583,22 @@ LOCALES = { "info": "임베딩 벡터에 추가되는 노이즈의 크기.", }, }, - "optim": { + "extra_args": { "en": { - "label": "Optimizer", - "info": "The optimizer to use: adamw_torch, adamw_8bit or adafactor.", + "label": "Extra arguments", + "info": "Extra arguments passed to the trainer in JSON format.", }, "ru": { - "label": "Оптимизатор", - "info": "Оптимизатор для использования: adamw_torch, adamw_8bit или adafactor.", + "label": "Дополнительные аргументы", + "info": "Дополнительные аргументы, которые передаются тренеру в формате JSON.", }, "zh": { - "label": "优化器", - "info": "使用的优化器:adamw_torch、adamw_8bit 或 adafactor。", + "label": "额外参数", + "info": "以 JSON 格式传递给训练器的额外参数。", }, "ko": { - "label": "옵티마이저", - "info": "사용할 옵티마이저: adamw_torch, adamw_8bit 또는 adafactor 등.", + "label": "추가 인수", + "info": "JSON 형식으로 트레이너에게 전달할 추가 인수입니다.", }, }, "packing": { diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 41de62fb..2703553d 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import os from copy import deepcopy from subprocess import Popen, TimeoutExpired @@ -78,6 +79,11 @@ class Runner: if not get("train.output_dir"): return ALERTS["err_no_output_dir"][lang] + try: + json.loads(get("train.extra_args")) + except json.JSONDecodeError: + return ALERTS["err_json_schema"][lang] + stage = TRAINING_STAGES[get("train.training_stage")] if stage == "ppo" and not get("train.reward_model"): return ALERTS["err_no_reward_model"][lang] @@ -130,7 +136,6 @@ class Runner: save_steps=get("train.save_steps"), warmup_steps=get("train.warmup_steps"), neftune_noise_alpha=get("train.neftune_alpha") or None, - optim=get("train.optim"), packing=get("train.packing") or get("train.neat_packing"), neat_packing=get("train.neat_packing"), train_on_prompt=get("train.train_on_prompt"), @@ -148,6 +153,7 @@ class Runner: plot_loss=True, ddp_timeout=180000000, include_num_input_tokens_seen=True, + **json.loads(get("train.extra_args")), ) # checkpoints