mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-03 20:22:49 +08:00
add new options in webui
Former-commit-id: f793ca0a2c9e7abab360658d0e65506c1f97e1ae
This commit is contained in:
parent
ce112dc744
commit
59fd3155a1
Binary file not shown.
Before Width: | Height: | Size: 146 KiB After Width: | Height: | Size: 138 KiB |
@ -3,12 +3,16 @@ from types import MethodType
|
|||||||
from typing import TYPE_CHECKING, List, Optional
|
from typing import TYPE_CHECKING, List, Optional
|
||||||
|
|
||||||
from llmtuner.extras.constants import LAYERNORM_NAMES
|
from llmtuner.extras.constants import LAYERNORM_NAMES
|
||||||
|
from llmtuner.extras.logging import get_logger
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from transformers.modeling_utils import PreTrainedModel
|
from transformers.modeling_utils import PreTrainedModel
|
||||||
from llmtuner.hparams import FinetuningArguments
|
from llmtuner.hparams import FinetuningArguments
|
||||||
|
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def find_all_linear_modules(
|
def find_all_linear_modules(
|
||||||
model: "PreTrainedModel",
|
model: "PreTrainedModel",
|
||||||
quantization_bit: Optional[int] = None,
|
quantization_bit: Optional[int] = None,
|
||||||
@ -49,6 +53,7 @@ def prepare_model_for_training(
|
|||||||
for name, param in model.named_parameters():
|
for name, param in model.named_parameters():
|
||||||
if param.ndim == 1 and any(ln_name in name for ln_name in layernorm_names):
|
if param.ndim == 1 and any(ln_name in name for ln_name in layernorm_names):
|
||||||
param.data = param.data.to(torch.float32)
|
param.data = param.data.to(torch.float32)
|
||||||
|
logger.info("Upcasting weights in layernorm in float32.")
|
||||||
|
|
||||||
if finetuning_args.neft_alpha > 1e-6:
|
if finetuning_args.neft_alpha > 1e-6:
|
||||||
input_embed: torch.nn.Embedding = model.get_input_embeddings()
|
input_embed: torch.nn.Embedding = model.get_input_embeddings()
|
||||||
@ -62,6 +67,7 @@ def prepare_model_for_training(
|
|||||||
return embeddings
|
return embeddings
|
||||||
|
|
||||||
input_embed.forward = MethodType(noisy_forward, input_embed)
|
input_embed.forward = MethodType(noisy_forward, input_embed)
|
||||||
|
logger.info("Using noisy embedding with alpha={:.2f}".format(finetuning_args.neft_alpha))
|
||||||
|
|
||||||
if use_gradient_checkpointing:
|
if use_gradient_checkpointing:
|
||||||
if hasattr(model, "enable_input_require_grads"):
|
if hasattr(model, "enable_input_require_grads"):
|
||||||
@ -73,6 +79,7 @@ def prepare_model_for_training(
|
|||||||
|
|
||||||
model.gradient_checkpointing_enable()
|
model.gradient_checkpointing_enable()
|
||||||
model.config.use_cache = False # turn off when gradient checkpointing is enabled
|
model.config.use_cache = False # turn off when gradient checkpointing is enabled
|
||||||
|
logger.info("Gradient checkpointing enabled.")
|
||||||
|
|
||||||
if finetuning_args.finetuning_type != "full" and hasattr(model, output_layer_name):
|
if finetuning_args.finetuning_type != "full" and hasattr(model, output_layer_name):
|
||||||
output_layer: torch.nn.Linear = getattr(model, output_layer_name)
|
output_layer: torch.nn.Linear = getattr(model, output_layer_name)
|
||||||
|
@ -31,9 +31,10 @@ def create_top() -> Dict[str, "Component"]:
|
|||||||
|
|
||||||
with gr.Accordion(label="Model config (LLaMA only)", open=False) as llama_tab:
|
with gr.Accordion(label="Model config (LLaMA only)", open=False) as llama_tab:
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
flash_attn = gr.Checkbox(value=False)
|
with gr.Column():
|
||||||
shift_attn = gr.Checkbox(value=False)
|
flash_attn = gr.Checkbox(value=False)
|
||||||
rope_scaling = gr.Dropdown(choices=["none", "linear", "dynamic"], value="none")
|
shift_attn = gr.Checkbox(value=False)
|
||||||
|
rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none")
|
||||||
|
|
||||||
model_name.change(
|
model_name.change(
|
||||||
list_checkpoint, [model_name, finetuning_type], [checkpoints], queue=False
|
list_checkpoint, [model_name, finetuning_type], [checkpoints], queue=False
|
||||||
|
@ -79,26 +79,30 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
|||||||
logging_steps = gr.Slider(value=5, minimum=5, maximum=1000, step=5)
|
logging_steps = gr.Slider(value=5, minimum=5, maximum=1000, step=5)
|
||||||
save_steps = gr.Slider(value=100, minimum=10, maximum=5000, step=10)
|
save_steps = gr.Slider(value=100, minimum=10, maximum=5000, step=10)
|
||||||
warmup_steps = gr.Slider(value=0, minimum=0, maximum=5000, step=1)
|
warmup_steps = gr.Slider(value=0, minimum=0, maximum=5000, step=1)
|
||||||
|
neft_alpha = gr.Slider(value=0, minimum=0, maximum=10, step=0.1)
|
||||||
|
|
||||||
input_elems.update({logging_steps, save_steps, warmup_steps})
|
with gr.Column():
|
||||||
|
train_on_prompt = gr.Checkbox(value=False)
|
||||||
|
upcast_layernorm = gr.Checkbox(value=False)
|
||||||
|
|
||||||
|
input_elems.update({logging_steps, save_steps, warmup_steps, neft_alpha, train_on_prompt, upcast_layernorm})
|
||||||
elem_dict.update(dict(
|
elem_dict.update(dict(
|
||||||
advanced_tab=advanced_tab, logging_steps=logging_steps, save_steps=save_steps, warmup_steps=warmup_steps
|
advanced_tab=advanced_tab, logging_steps=logging_steps, save_steps=save_steps, warmup_steps=warmup_steps,
|
||||||
|
neft_alpha=neft_alpha, train_on_prompt=train_on_prompt, upcast_layernorm=upcast_layernorm
|
||||||
))
|
))
|
||||||
|
|
||||||
with gr.Accordion(label="LoRA config", open=False) as lora_tab:
|
with gr.Accordion(label="LoRA config", open=False) as lora_tab:
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
lora_rank = gr.Slider(value=8, minimum=1, maximum=1024, step=1, scale=1)
|
lora_rank = gr.Slider(value=8, minimum=1, maximum=1024, step=1, scale=1)
|
||||||
lora_dropout = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, scale=1)
|
lora_dropout = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, scale=1)
|
||||||
lora_target = gr.Textbox(scale=2)
|
lora_target = gr.Textbox(scale=1)
|
||||||
|
additional_target = gr.Textbox(scale=1)
|
||||||
resume_lora_training = gr.Checkbox(value=True, scale=1)
|
resume_lora_training = gr.Checkbox(value=True, scale=1)
|
||||||
|
|
||||||
input_elems.update({lora_rank, lora_dropout, lora_target, resume_lora_training})
|
input_elems.update({lora_rank, lora_dropout, lora_target, additional_target, resume_lora_training})
|
||||||
elem_dict.update(dict(
|
elem_dict.update(dict(
|
||||||
lora_tab=lora_tab,
|
lora_tab=lora_tab, lora_rank=lora_rank, lora_dropout=lora_dropout, lora_target=lora_target,
|
||||||
lora_rank=lora_rank,
|
additional_target=additional_target, resume_lora_training=resume_lora_training,
|
||||||
lora_dropout=lora_dropout,
|
|
||||||
lora_target=lora_target,
|
|
||||||
resume_lora_training=resume_lora_training,
|
|
||||||
))
|
))
|
||||||
|
|
||||||
with gr.Accordion(label="RLHF config", open=False) as rlhf_tab:
|
with gr.Accordion(label="RLHF config", open=False) as rlhf_tab:
|
||||||
|
@ -309,6 +309,36 @@ LOCALES = {
|
|||||||
"info": "学习率预热采用的步数。"
|
"info": "学习率预热采用的步数。"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"neft_alpha": {
|
||||||
|
"en": {
|
||||||
|
"label": "NEFTune Alpha",
|
||||||
|
"info": "Magnitude of noise adding to embedding vectors."
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"label": "NEFTune 噪声参数",
|
||||||
|
"info": "嵌入向量所添加的噪声大小。"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"train_on_prompt": {
|
||||||
|
"en": {
|
||||||
|
"label": "Train on prompt",
|
||||||
|
"info": "Compute loss on the prompt tokens in supervised fine-tuning."
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"label": "计算输入损失",
|
||||||
|
"info": "在监督微调时候计算输入序列的损失。"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"upcast_layernorm": {
|
||||||
|
"en": {
|
||||||
|
"label": "Upcast LayerNorm",
|
||||||
|
"info": "Upcast weights of layernorm in float32."
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"label": "缩放归一化层",
|
||||||
|
"info": "将归一化层权重缩放至 32 位浮点数。"
|
||||||
|
}
|
||||||
|
},
|
||||||
"lora_tab": {
|
"lora_tab": {
|
||||||
"en": {
|
"en": {
|
||||||
"label": "LoRA configurations"
|
"label": "LoRA configurations"
|
||||||
@ -340,11 +370,21 @@ LOCALES = {
|
|||||||
"lora_target": {
|
"lora_target": {
|
||||||
"en": {
|
"en": {
|
||||||
"label": "LoRA modules (optional)",
|
"label": "LoRA modules (optional)",
|
||||||
"info": "The name(s) of target modules to apply LoRA. Use commas to separate multiple modules."
|
"info": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules."
|
||||||
},
|
},
|
||||||
"zh": {
|
"zh": {
|
||||||
"label": "LoRA 作用层(非必填)",
|
"label": "LoRA 作用模块(非必填)",
|
||||||
"info": "应用 LoRA 的线性层名称。使用英文逗号分隔多个名称。"
|
"info": "应用 LoRA 的目标模块名称。使用英文逗号分隔多个名称。"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additional_target": {
|
||||||
|
"en": {
|
||||||
|
"label": "Additional modules (optional)",
|
||||||
|
"info": "Name(s) of modules apart from LoRA layers to be set as trainable. Use commas to separate multiple modules."
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"label": "附加模块(非必填)",
|
||||||
|
"info": "除 LoRA 层以外的可训练模块名称。使用英文逗号分隔多个名称。"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"resume_lora_training": {
|
"resume_lora_training": {
|
||||||
|
@ -89,7 +89,6 @@ class Runner:
|
|||||||
stage=TRAINING_STAGES[get("train.training_stage")],
|
stage=TRAINING_STAGES[get("train.training_stage")],
|
||||||
model_name_or_path=get("top.model_path"),
|
model_name_or_path=get("top.model_path"),
|
||||||
do_train=True,
|
do_train=True,
|
||||||
overwrite_cache=False,
|
|
||||||
cache_dir=user_config.get("cache_dir", None),
|
cache_dir=user_config.get("cache_dir", None),
|
||||||
checkpoint_dir=checkpoint_dir,
|
checkpoint_dir=checkpoint_dir,
|
||||||
finetuning_type=get("top.finetuning_type"),
|
finetuning_type=get("top.finetuning_type"),
|
||||||
@ -112,9 +111,13 @@ class Runner:
|
|||||||
logging_steps=get("train.logging_steps"),
|
logging_steps=get("train.logging_steps"),
|
||||||
save_steps=get("train.save_steps"),
|
save_steps=get("train.save_steps"),
|
||||||
warmup_steps=get("train.warmup_steps"),
|
warmup_steps=get("train.warmup_steps"),
|
||||||
|
neft_alpha=get("train.neft_alpha"),
|
||||||
|
train_on_prompt=get("train.train_on_prompt"),
|
||||||
|
upcast_layernorm=get("train.upcast_layernorm"),
|
||||||
lora_rank=get("train.lora_rank"),
|
lora_rank=get("train.lora_rank"),
|
||||||
lora_dropout=get("train.lora_dropout"),
|
lora_dropout=get("train.lora_dropout"),
|
||||||
lora_target=get("train.lora_target") or get_module(get("top.model_name")),
|
lora_target=get("train.lora_target") or get_module(get("top.model_name")),
|
||||||
|
additional_target=get("train.additional_target"),
|
||||||
resume_lora_training=get("train.resume_lora_training"),
|
resume_lora_training=get("train.resume_lora_training"),
|
||||||
output_dir=output_dir
|
output_dir=output_dir
|
||||||
)
|
)
|
||||||
@ -160,7 +163,6 @@ class Runner:
|
|||||||
stage="sft",
|
stage="sft",
|
||||||
model_name_or_path=get("top.model_path"),
|
model_name_or_path=get("top.model_path"),
|
||||||
do_eval=True,
|
do_eval=True,
|
||||||
overwrite_cache=False,
|
|
||||||
predict_with_generate=True,
|
predict_with_generate=True,
|
||||||
cache_dir=user_config.get("cache_dir", None),
|
cache_dir=user_config.get("cache_dir", None),
|
||||||
checkpoint_dir=checkpoint_dir,
|
checkpoint_dir=checkpoint_dir,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user