support extra args in llamaboard

Former-commit-id: 86e5affc0d5f5682a20b28d27f80e0015b36fdd9
This commit is contained in:
hiyouga 2024-10-30 08:55:54 +00:00
parent 5142faca8f
commit aba4268607
4 changed files with 27 additions and 37 deletions

View File

@ -41,13 +41,12 @@ def create_top() -> Dict[str, "Component"]:
finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1) finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1)
checkpoint_path = gr.Dropdown(multiselect=True, allow_custom_value=True, scale=6) checkpoint_path = gr.Dropdown(multiselect=True, allow_custom_value=True, scale=6)
with gr.Accordion(open=False) as advanced_tab: with gr.Row():
with gr.Row(): quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True, scale=2)
quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True, scale=2) quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes", scale=2)
quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes", scale=2) template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=2)
template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=2) rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=3)
rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=3) booster = gr.Radio(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto", scale=5)
booster = gr.Radio(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto", scale=5)
model_name.change(get_model_info, [model_name], [model_path, template], queue=False).then( model_name.change(get_model_info, [model_name], [model_path, template], queue=False).then(
list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False
@ -66,7 +65,6 @@ def create_top() -> Dict[str, "Component"]:
model_path=model_path, model_path=model_path,
finetuning_type=finetuning_type, finetuning_type=finetuning_type,
checkpoint_path=checkpoint_path, checkpoint_path=checkpoint_path,
advanced_tab=advanced_tab,
quantization_bit=quantization_bit, quantization_bit=quantization_bit,
quantization_method=quantization_method, quantization_method=quantization_method,
template=template, template=template,

View File

@ -91,7 +91,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
save_steps = gr.Slider(minimum=10, maximum=5000, value=100, step=10) save_steps = gr.Slider(minimum=10, maximum=5000, value=100, step=10)
warmup_steps = gr.Slider(minimum=0, maximum=5000, value=0, step=1) warmup_steps = gr.Slider(minimum=0, maximum=5000, value=0, step=1)
neftune_alpha = gr.Slider(minimum=0, maximum=10, value=0, step=0.1) neftune_alpha = gr.Slider(minimum=0, maximum=10, value=0, step=0.1)
optim = gr.Textbox(value="adamw_torch") extra_args = gr.Textbox(value='{"optim": "adamw_torch"}')
with gr.Row(): with gr.Row():
with gr.Column(): with gr.Column():
@ -116,7 +116,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
save_steps, save_steps,
warmup_steps, warmup_steps,
neftune_alpha, neftune_alpha,
optim, extra_args,
packing, packing,
neat_packing, neat_packing,
train_on_prompt, train_on_prompt,
@ -134,7 +134,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
save_steps=save_steps, save_steps=save_steps,
warmup_steps=warmup_steps, warmup_steps=warmup_steps,
neftune_alpha=neftune_alpha, neftune_alpha=neftune_alpha,
optim=optim, extra_args=extra_args,
packing=packing, packing=packing,
neat_packing=neat_packing, neat_packing=neat_packing,
train_on_prompt=train_on_prompt, train_on_prompt=train_on_prompt,

View File

@ -87,20 +87,6 @@ LOCALES = {
"label": "체크포인트 경로", "label": "체크포인트 경로",
}, },
}, },
"advanced_tab": {
"en": {
"label": "Advanced configurations",
},
"ru": {
"label": "Расширенные конфигурации",
},
"zh": {
"label": "高级设置",
},
"ko": {
"label": "고급 설정",
},
},
"quantization_bit": { "quantization_bit": {
"en": { "en": {
"label": "Quantization bit", "label": "Quantization bit",
@ -581,11 +567,11 @@ LOCALES = {
}, },
"neftune_alpha": { "neftune_alpha": {
"en": { "en": {
"label": "NEFTune Alpha", "label": "NEFTune alpha",
"info": "Magnitude of noise adding to embedding vectors.", "info": "Magnitude of noise adding to embedding vectors.",
}, },
"ru": { "ru": {
"label": "NEFTune Alpha", "label": "NEFTune alpha",
"info": "Величина шума, добавляемого к векторам вложений.", "info": "Величина шума, добавляемого к векторам вложений.",
}, },
"zh": { "zh": {
@ -597,22 +583,22 @@ LOCALES = {
"info": "임베딩 벡터에 추가되는 노이즈의 크기.", "info": "임베딩 벡터에 추가되는 노이즈의 크기.",
}, },
}, },
"optim": { "extra_args": {
"en": { "en": {
"label": "Optimizer", "label": "Extra arguments",
"info": "The optimizer to use: adamw_torch, adamw_8bit or adafactor.", "info": "Extra arguments passed to the trainer in JSON format.",
}, },
"ru": { "ru": {
"label": "Оптимизатор", "label": "Дополнительные аргументы",
"info": "Оптимизатор для использования: adamw_torch, adamw_8bit или adafactor.", "info": "Дополнительные аргументы, которые передаются тренеру в формате JSON.",
}, },
"zh": { "zh": {
"label": "优化器", "label": "额外参数",
"info": "使用的优化器adamw_torch、adamw_8bit 或 adafactor", "info": "以 JSON 格式传递给训练器的额外参数",
}, },
"ko": { "ko": {
"label": "옵티마이저", "label": "추가 인수",
"info": "사용할 옵티마이저: adamw_torch, adamw_8bit 또는 adafactor 등.", "info": "JSON 형식으로 트레이너에게 전달할 추가 인수입니다.",
}, },
}, },
"packing": { "packing": {

View File

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import json
import os import os
from copy import deepcopy from copy import deepcopy
from subprocess import Popen, TimeoutExpired from subprocess import Popen, TimeoutExpired
@ -78,6 +79,11 @@ class Runner:
if not get("train.output_dir"): if not get("train.output_dir"):
return ALERTS["err_no_output_dir"][lang] return ALERTS["err_no_output_dir"][lang]
try:
json.loads(get("train.extra_args"))
except json.JSONDecodeError:
return ALERTS["err_json_schema"][lang]
stage = TRAINING_STAGES[get("train.training_stage")] stage = TRAINING_STAGES[get("train.training_stage")]
if stage == "ppo" and not get("train.reward_model"): if stage == "ppo" and not get("train.reward_model"):
return ALERTS["err_no_reward_model"][lang] return ALERTS["err_no_reward_model"][lang]
@ -130,7 +136,6 @@ class Runner:
save_steps=get("train.save_steps"), save_steps=get("train.save_steps"),
warmup_steps=get("train.warmup_steps"), warmup_steps=get("train.warmup_steps"),
neftune_noise_alpha=get("train.neftune_alpha") or None, neftune_noise_alpha=get("train.neftune_alpha") or None,
optim=get("train.optim"),
packing=get("train.packing") or get("train.neat_packing"), packing=get("train.packing") or get("train.neat_packing"),
neat_packing=get("train.neat_packing"), neat_packing=get("train.neat_packing"),
train_on_prompt=get("train.train_on_prompt"), train_on_prompt=get("train.train_on_prompt"),
@ -148,6 +153,7 @@ class Runner:
plot_loss=True, plot_loss=True,
ddp_timeout=180000000, ddp_timeout=180000000,
include_num_input_tokens_seen=True, include_num_input_tokens_seen=True,
**json.loads(get("train.extra_args")),
) )
# checkpoints # checkpoints