support extra args in llamaboard

Former-commit-id: 86e5affc0d5f5682a20b28d27f80e0015b36fdd9
This commit is contained in:
hiyouga 2024-10-30 08:55:54 +00:00
parent 5142faca8f
commit aba4268607
4 changed files with 27 additions and 37 deletions

View File

@ -41,13 +41,12 @@ def create_top() -> Dict[str, "Component"]:
finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1)
checkpoint_path = gr.Dropdown(multiselect=True, allow_custom_value=True, scale=6)
with gr.Accordion(open=False) as advanced_tab:
with gr.Row():
quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True, scale=2)
quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes", scale=2)
template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=2)
rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=3)
booster = gr.Radio(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto", scale=5)
with gr.Row():
quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True, scale=2)
quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes", scale=2)
template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=2)
rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=3)
booster = gr.Radio(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto", scale=5)
model_name.change(get_model_info, [model_name], [model_path, template], queue=False).then(
list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False
@ -66,7 +65,6 @@ def create_top() -> Dict[str, "Component"]:
model_path=model_path,
finetuning_type=finetuning_type,
checkpoint_path=checkpoint_path,
advanced_tab=advanced_tab,
quantization_bit=quantization_bit,
quantization_method=quantization_method,
template=template,

View File

@ -91,7 +91,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
save_steps = gr.Slider(minimum=10, maximum=5000, value=100, step=10)
warmup_steps = gr.Slider(minimum=0, maximum=5000, value=0, step=1)
neftune_alpha = gr.Slider(minimum=0, maximum=10, value=0, step=0.1)
optim = gr.Textbox(value="adamw_torch")
extra_args = gr.Textbox(value='{"optim": "adamw_torch"}')
with gr.Row():
with gr.Column():
@ -116,7 +116,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
save_steps,
warmup_steps,
neftune_alpha,
optim,
extra_args,
packing,
neat_packing,
train_on_prompt,
@ -134,7 +134,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
save_steps=save_steps,
warmup_steps=warmup_steps,
neftune_alpha=neftune_alpha,
optim=optim,
extra_args=extra_args,
packing=packing,
neat_packing=neat_packing,
train_on_prompt=train_on_prompt,

View File

@ -87,20 +87,6 @@ LOCALES = {
"label": "체크포인트 경로",
},
},
"advanced_tab": {
"en": {
"label": "Advanced configurations",
},
"ru": {
"label": "Расширенные конфигурации",
},
"zh": {
"label": "高级设置",
},
"ko": {
"label": "고급 설정",
},
},
"quantization_bit": {
"en": {
"label": "Quantization bit",
@ -581,11 +567,11 @@ LOCALES = {
},
"neftune_alpha": {
"en": {
"label": "NEFTune Alpha",
"label": "NEFTune alpha",
"info": "Magnitude of noise adding to embedding vectors.",
},
"ru": {
"label": "NEFTune Alpha",
"label": "NEFTune alpha",
"info": "Величина шума, добавляемого к векторам вложений.",
},
"zh": {
@ -597,22 +583,22 @@ LOCALES = {
"info": "임베딩 벡터에 추가되는 노이즈의 크기.",
},
},
"optim": {
"extra_args": {
"en": {
"label": "Optimizer",
"info": "The optimizer to use: adamw_torch, adamw_8bit or adafactor.",
"label": "Extra arguments",
"info": "Extra arguments passed to the trainer in JSON format.",
},
"ru": {
"label": "Оптимизатор",
"info": "Оптимизатор для использования: adamw_torch, adamw_8bit или adafactor.",
"label": "Дополнительные аргументы",
"info": "Дополнительные аргументы, которые передаются тренеру в формате JSON.",
},
"zh": {
"label": "优化器",
"info": "使用的优化器adamw_torch、adamw_8bit 或 adafactor",
"label": "额外参数",
"info": "以 JSON 格式传递给训练器的额外参数",
},
"ko": {
"label": "옵티마이저",
"info": "사용할 옵티마이저: adamw_torch, adamw_8bit 또는 adafactor 등.",
"label": "추가 인수",
"info": "JSON 형식으로 트레이너에게 전달할 추가 인수입니다.",
},
},
"packing": {

View File

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
from copy import deepcopy
from subprocess import Popen, TimeoutExpired
@ -78,6 +79,11 @@ class Runner:
if not get("train.output_dir"):
return ALERTS["err_no_output_dir"][lang]
try:
json.loads(get("train.extra_args"))
except json.JSONDecodeError:
return ALERTS["err_json_schema"][lang]
stage = TRAINING_STAGES[get("train.training_stage")]
if stage == "ppo" and not get("train.reward_model"):
return ALERTS["err_no_reward_model"][lang]
@ -130,7 +136,6 @@ class Runner:
save_steps=get("train.save_steps"),
warmup_steps=get("train.warmup_steps"),
neftune_noise_alpha=get("train.neftune_alpha") or None,
optim=get("train.optim"),
packing=get("train.packing") or get("train.neat_packing"),
neat_packing=get("train.neat_packing"),
train_on_prompt=get("train.train_on_prompt"),
@ -148,6 +153,7 @@ class Runner:
plot_loss=True,
ddp_timeout=180000000,
include_num_input_tokens_seen=True,
**json.loads(get("train.extra_args")),
)
# checkpoints