mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-23 06:12:50 +08:00
support extra args in llamaboard
Former-commit-id: 86e5affc0d5f5682a20b28d27f80e0015b36fdd9
This commit is contained in:
parent
5142faca8f
commit
aba4268607
@ -41,13 +41,12 @@ def create_top() -> Dict[str, "Component"]:
|
||||
finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1)
|
||||
checkpoint_path = gr.Dropdown(multiselect=True, allow_custom_value=True, scale=6)
|
||||
|
||||
with gr.Accordion(open=False) as advanced_tab:
|
||||
with gr.Row():
|
||||
quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True, scale=2)
|
||||
quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes", scale=2)
|
||||
template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=2)
|
||||
rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=3)
|
||||
booster = gr.Radio(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto", scale=5)
|
||||
with gr.Row():
|
||||
quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True, scale=2)
|
||||
quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes", scale=2)
|
||||
template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=2)
|
||||
rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=3)
|
||||
booster = gr.Radio(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto", scale=5)
|
||||
|
||||
model_name.change(get_model_info, [model_name], [model_path, template], queue=False).then(
|
||||
list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False
|
||||
@ -66,7 +65,6 @@ def create_top() -> Dict[str, "Component"]:
|
||||
model_path=model_path,
|
||||
finetuning_type=finetuning_type,
|
||||
checkpoint_path=checkpoint_path,
|
||||
advanced_tab=advanced_tab,
|
||||
quantization_bit=quantization_bit,
|
||||
quantization_method=quantization_method,
|
||||
template=template,
|
||||
|
@ -91,7 +91,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
save_steps = gr.Slider(minimum=10, maximum=5000, value=100, step=10)
|
||||
warmup_steps = gr.Slider(minimum=0, maximum=5000, value=0, step=1)
|
||||
neftune_alpha = gr.Slider(minimum=0, maximum=10, value=0, step=0.1)
|
||||
optim = gr.Textbox(value="adamw_torch")
|
||||
extra_args = gr.Textbox(value='{"optim": "adamw_torch"}')
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
@ -116,7 +116,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
save_steps,
|
||||
warmup_steps,
|
||||
neftune_alpha,
|
||||
optim,
|
||||
extra_args,
|
||||
packing,
|
||||
neat_packing,
|
||||
train_on_prompt,
|
||||
@ -134,7 +134,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
save_steps=save_steps,
|
||||
warmup_steps=warmup_steps,
|
||||
neftune_alpha=neftune_alpha,
|
||||
optim=optim,
|
||||
extra_args=extra_args,
|
||||
packing=packing,
|
||||
neat_packing=neat_packing,
|
||||
train_on_prompt=train_on_prompt,
|
||||
|
@ -87,20 +87,6 @@ LOCALES = {
|
||||
"label": "체크포인트 경로",
|
||||
},
|
||||
},
|
||||
"advanced_tab": {
|
||||
"en": {
|
||||
"label": "Advanced configurations",
|
||||
},
|
||||
"ru": {
|
||||
"label": "Расширенные конфигурации",
|
||||
},
|
||||
"zh": {
|
||||
"label": "高级设置",
|
||||
},
|
||||
"ko": {
|
||||
"label": "고급 설정",
|
||||
},
|
||||
},
|
||||
"quantization_bit": {
|
||||
"en": {
|
||||
"label": "Quantization bit",
|
||||
@ -581,11 +567,11 @@ LOCALES = {
|
||||
},
|
||||
"neftune_alpha": {
|
||||
"en": {
|
||||
"label": "NEFTune Alpha",
|
||||
"label": "NEFTune alpha",
|
||||
"info": "Magnitude of noise adding to embedding vectors.",
|
||||
},
|
||||
"ru": {
|
||||
"label": "NEFTune Alpha",
|
||||
"label": "NEFTune alpha",
|
||||
"info": "Величина шума, добавляемого к векторам вложений.",
|
||||
},
|
||||
"zh": {
|
||||
@ -597,22 +583,22 @@ LOCALES = {
|
||||
"info": "임베딩 벡터에 추가되는 노이즈의 크기.",
|
||||
},
|
||||
},
|
||||
"optim": {
|
||||
"extra_args": {
|
||||
"en": {
|
||||
"label": "Optimizer",
|
||||
"info": "The optimizer to use: adamw_torch, adamw_8bit or adafactor.",
|
||||
"label": "Extra arguments",
|
||||
"info": "Extra arguments passed to the trainer in JSON format.",
|
||||
},
|
||||
"ru": {
|
||||
"label": "Оптимизатор",
|
||||
"info": "Оптимизатор для использования: adamw_torch, adamw_8bit или adafactor.",
|
||||
"label": "Дополнительные аргументы",
|
||||
"info": "Дополнительные аргументы, которые передаются тренеру в формате JSON.",
|
||||
},
|
||||
"zh": {
|
||||
"label": "优化器",
|
||||
"info": "使用的优化器:adamw_torch、adamw_8bit 或 adafactor。",
|
||||
"label": "额外参数",
|
||||
"info": "以 JSON 格式传递给训练器的额外参数。",
|
||||
},
|
||||
"ko": {
|
||||
"label": "옵티마이저",
|
||||
"info": "사용할 옵티마이저: adamw_torch, adamw_8bit 또는 adafactor 등.",
|
||||
"label": "추가 인수",
|
||||
"info": "JSON 형식으로 트레이너에게 전달할 추가 인수입니다.",
|
||||
},
|
||||
},
|
||||
"packing": {
|
||||
|
@ -12,6 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import os
|
||||
from copy import deepcopy
|
||||
from subprocess import Popen, TimeoutExpired
|
||||
@ -78,6 +79,11 @@ class Runner:
|
||||
if not get("train.output_dir"):
|
||||
return ALERTS["err_no_output_dir"][lang]
|
||||
|
||||
try:
|
||||
json.loads(get("train.extra_args"))
|
||||
except json.JSONDecodeError:
|
||||
return ALERTS["err_json_schema"][lang]
|
||||
|
||||
stage = TRAINING_STAGES[get("train.training_stage")]
|
||||
if stage == "ppo" and not get("train.reward_model"):
|
||||
return ALERTS["err_no_reward_model"][lang]
|
||||
@ -130,7 +136,6 @@ class Runner:
|
||||
save_steps=get("train.save_steps"),
|
||||
warmup_steps=get("train.warmup_steps"),
|
||||
neftune_noise_alpha=get("train.neftune_alpha") or None,
|
||||
optim=get("train.optim"),
|
||||
packing=get("train.packing") or get("train.neat_packing"),
|
||||
neat_packing=get("train.neat_packing"),
|
||||
train_on_prompt=get("train.train_on_prompt"),
|
||||
@ -148,6 +153,7 @@ class Runner:
|
||||
plot_loss=True,
|
||||
ddp_timeout=180000000,
|
||||
include_num_input_tokens_seen=True,
|
||||
**json.loads(get("train.extra_args")),
|
||||
)
|
||||
|
||||
# checkpoints
|
||||
|
Loading…
x
Reference in New Issue
Block a user