release v0.5.3

Former-commit-id: fa5ab21ebc0ab738178c0c57578db3bda995ae06
This commit is contained in:
hiyouga 2024-02-29 00:34:19 +08:00
parent 845e750abd
commit 8e7d50dae4
10 changed files with 116 additions and 67 deletions

View File

@ -42,9 +42,9 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
- **Various models**: LLaMA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc. - **Various models**: LLaMA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc.
- **Integrated methods**: (Continuous) pre-training, supervised fine-tuning, reward modeling, PPO and DPO. - **Integrated methods**: (Continuous) pre-training, supervised fine-tuning, reward modeling, PPO and DPO.
- **Scalable resources**: 32-bit full-tuning, 16-bit freeze tuning, 16-bit LoRA tuning, 2/4/8-bit QLoRA with AQLM/AWQ/GPTQ/LLM.int8. - **Scalable resources**: 32-bit full-tuning, 16-bit freeze-tuning, 16-bit LoRA, 2/4/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8.
- **Advanced algorithms**: DoRA, LongLoRA, LLaMA Pro, LoftQ, agent tuning. - **Advanced algorithms**: DoRA, LongLoRA, LLaMA Pro, LoftQ, agent tuning.
- **Intriguing tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune, rsLoRA. - **Practical tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune, rsLoRA.
- **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, etc. - **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, etc.
## Benchmark ## Benchmark
@ -140,7 +140,7 @@ Please refer to [constants.py](src/llmtuner/extras/constants.py) for a full list
## Supported Training Approaches ## Supported Training Approaches
| Approach | Full-parameter | Partial-parameter | LoRA | QLoRA | | Approach | Full-tuning | Freeze-tuning | LoRA | QLoRA |
| ---------------------- | ------------------ | ------------------ | ------------------ | ------------------ | | ---------------------- | ------------------ | ------------------ | ------------------ | ------------------ |
| Pre-Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | Pre-Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| Supervised Fine-Tuning | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | Supervised Fine-Tuning | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |

View File

@ -41,10 +41,10 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
## 项目特色 ## 项目特色
- **多种模型**LLaMA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。 - **多种模型**LLaMA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。
- **集成方法**增量预训练、指令监督微调、奖励模型训练、PPO 训练DPO 训练。 - **集成方法**增量预训练、指令监督微调、奖励模型训练、PPO 训练DPO 训练。
- **多种精度**32 比特全参数训练、16 比特部分参数训练、16比特 LoRA 训练、基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 LoRA 训练 - **多种精度**32 比特全参数微调、16 比特冻结微调、16 比特 LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 QLoRA 微调
- **先进算法**: DoRA、LongLoRA、LLaMA Pro、LoftQ、agent tuning - **先进算法**DoRA、LongLoRA、LLaMA Pro、LoftQ 和 Agent 微调
- **新鲜技巧**FlashAttention-2、Unsloth、RoPE scaling、NEFTune、rsLoRA。 - **实用技巧**FlashAttention-2、Unsloth、RoPE scaling、NEFTune 和 rsLoRA。
- **实验监控**LlamaBoard、TensorBoard、Wandb、MLflow 等等。 - **实验监控**LlamaBoard、TensorBoard、Wandb、MLflow 等等。
## 性能指标 ## 性能指标

View File

@ -7,5 +7,5 @@ from .train import export_model, run_exp
from .webui import create_ui, create_web_demo from .webui import create_ui, create_web_demo
__version__ = "0.5.2" __version__ = "0.5.3"
__all__ = ["create_app", "ChatModel", "Evaluator", "export_model", "run_exp", "create_ui", "create_web_demo"] __all__ = ["create_app", "ChatModel", "Evaluator", "export_model", "run_exp", "create_ui", "create_web_demo"]

View File

@ -3,7 +3,6 @@ import os
import sys import sys
from typing import Any, Dict, Optional, Tuple from typing import Any, Dict, Optional, Tuple
import datasets
import torch import torch
import transformers import transformers
from transformers import HfArgumentParser, Seq2SeqTrainingArguments from transformers import HfArgumentParser, Seq2SeqTrainingArguments
@ -62,7 +61,6 @@ def _parse_args(parser: "HfArgumentParser", args: Optional[Dict[str, Any]] = Non
def _set_transformers_logging(log_level: Optional[int] = logging.INFO) -> None: def _set_transformers_logging(log_level: Optional[int] = logging.INFO) -> None:
datasets.utils.logging.set_verbosity(log_level)
transformers.utils.logging.set_verbosity(log_level) transformers.utils.logging.set_verbosity(log_level)
transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format() transformers.utils.logging.enable_explicit_format()
@ -243,7 +241,6 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
str(model_args.compute_dtype), str(model_args.compute_dtype),
) )
) )
logger.info(f"Training/evaluation parameters {training_args}")
transformers.set_seed(training_args.seed) transformers.set_seed(training_args.seed)

View File

@ -51,7 +51,7 @@ def load_model_and_tokenizer(
patch_tokenizer(tokenizer) patch_tokenizer(tokenizer)
config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs) config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs)
patch_config(config, tokenizer, model_args, finetuning_args, config_kwargs, is_trainable) patch_config(config, tokenizer, model_args, config_kwargs, is_trainable)
model = None model = None
if is_trainable and model_args.use_unsloth: if is_trainable and model_args.use_unsloth:

View File

@ -24,7 +24,7 @@ if TYPE_CHECKING:
from transformers import PretrainedConfig, PreTrainedTokenizer from transformers import PretrainedConfig, PreTrainedTokenizer
from trl import AutoModelForCausalLMWithValueHead from trl import AutoModelForCausalLMWithValueHead
from ..hparams import FinetuningArguments, ModelArguments from ..hparams import ModelArguments
logger = get_logger(__name__) logger = get_logger(__name__)
@ -157,7 +157,7 @@ def _configure_quantization(
config_kwargs: Dict[str, Any], config_kwargs: Dict[str, Any],
) -> None: ) -> None:
r""" r"""
Priority: GPTQ-quantized (training) > AutoGPTQ (export) > Bitsandbytes (training) Priority: PTQ-quantized (training) > AutoGPTQ (export) > Bitsandbytes (training)
""" """
if getattr(config, "quantization_config", None): # gptq if getattr(config, "quantization_config", None): # gptq
if is_deepspeed_zero3_enabled(): if is_deepspeed_zero3_enabled():
@ -167,7 +167,15 @@ def _configure_quantization(
quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None) quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None)
if quantization_config.get("quant_method", None) == "gptq" and quantization_config.get("bits", -1) == 4: if quantization_config.get("quant_method", None) == "gptq" and quantization_config.get("bits", -1) == 4:
quantization_config["use_exllama"] = False # disable exllama quantization_config["use_exllama"] = False # disable exllama
logger.info("Loading {}-bit GPTQ-quantized model.".format(quantization_config.get("bits", -1)))
if quantization_config.get("quant_method", None) == "aqlm":
quantization_config["bits"] = 2
logger.info(
"Loading {}-bit {}-quantized model.".format(
quantization_config.get("bits", "?"), quantization_config.get("quant_method", None)
)
)
elif model_args.export_quantization_bit is not None: # auto-gptq elif model_args.export_quantization_bit is not None: # auto-gptq
require_version("optimum>=1.16.0", "To fix: pip install optimum>=1.16.0") require_version("optimum>=1.16.0", "To fix: pip install optimum>=1.16.0")
@ -253,7 +261,6 @@ def patch_config(
config: "PretrainedConfig", config: "PretrainedConfig",
tokenizer: "PreTrainedTokenizer", tokenizer: "PreTrainedTokenizer",
model_args: "ModelArguments", model_args: "ModelArguments",
finetuning_args: "FinetuningArguments",
config_kwargs: Dict[str, Any], config_kwargs: Dict[str, Any],
is_trainable: bool, is_trainable: bool,
) -> None: ) -> None:
@ -274,9 +281,6 @@ def patch_config(
_configure_quantization(config, tokenizer, model_args, config_kwargs) _configure_quantization(config, tokenizer, model_args, config_kwargs)
if finetuning_args.use_dora:
config_kwargs["device_map"] = {"": get_current_device()}
def patch_model( def patch_model(
model: "PreTrainedModel", tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments", is_trainable: bool model: "PreTrainedModel", tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments", is_trainable: bool

View File

@ -34,7 +34,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
elem_dict.update(dict(training_stage=training_stage, dataset_dir=dataset_dir, dataset=dataset, **preview_elems)) elem_dict.update(dict(training_stage=training_stage, dataset_dir=dataset_dir, dataset=dataset, **preview_elems))
with gr.Row(): with gr.Row():
cutoff_len = gr.Slider(value=1024, minimum=4, maximum=8192, step=1) cutoff_len = gr.Slider(value=1024, minimum=4, maximum=16384, step=1)
learning_rate = gr.Textbox(value="5e-5") learning_rate = gr.Textbox(value="5e-5")
num_train_epochs = gr.Textbox(value="3.0") num_train_epochs = gr.Textbox(value="3.0")
max_samples = gr.Textbox(value="100000") max_samples = gr.Textbox(value="100000")
@ -52,8 +52,8 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
) )
with gr.Row(): with gr.Row():
batch_size = gr.Slider(value=4, minimum=1, maximum=1024, step=1) batch_size = gr.Slider(value=2, minimum=1, maximum=1024, step=1)
gradient_accumulation_steps = gr.Slider(value=4, minimum=1, maximum=1024, step=1) gradient_accumulation_steps = gr.Slider(value=8, minimum=1, maximum=1024, step=1)
lr_scheduler_type = gr.Dropdown(choices=[scheduler.value for scheduler in SchedulerType], value="cosine") lr_scheduler_type = gr.Dropdown(choices=[scheduler.value for scheduler in SchedulerType], value="cosine")
max_grad_norm = gr.Textbox(value="1.0") max_grad_norm = gr.Textbox(value="1.0")
val_size = gr.Slider(value=0, minimum=0, maximum=1, step=0.001) val_size = gr.Slider(value=0, minimum=0, maximum=1, step=0.001)
@ -122,25 +122,31 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
with gr.Accordion(label="LoRA config", open=False) as lora_tab: with gr.Accordion(label="LoRA config", open=False) as lora_tab:
with gr.Row(): with gr.Row():
lora_rank = gr.Slider(value=8, minimum=1, maximum=1024, step=1) lora_rank = gr.Slider(value=8, minimum=1, maximum=1024, step=1, scale=1)
lora_dropout = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01) lora_alpha = gr.Slider(value=16, minimum=1, maximum=2048, step=0.1, scale=1)
lora_target = gr.Textbox() lora_dropout = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, scale=1)
additional_target = gr.Textbox() lora_target = gr.Textbox(scale=2)
with gr.Column(): with gr.Row():
use_rslora = gr.Checkbox() use_rslora = gr.Checkbox(scale=1)
create_new_adapter = gr.Checkbox() use_dora = gr.Checkbox(scale=1)
create_new_adapter = gr.Checkbox(scale=1)
additional_target = gr.Textbox(scale=2)
input_elems.update({lora_rank, lora_dropout, lora_target, additional_target, use_rslora, create_new_adapter}) input_elems.update(
{lora_rank, lora_alpha, lora_dropout, lora_target, use_rslora, use_dora, create_new_adapter, additional_target}
)
elem_dict.update( elem_dict.update(
dict( dict(
lora_tab=lora_tab, lora_tab=lora_tab,
lora_rank=lora_rank, lora_rank=lora_rank,
lora_alpha=lora_alpha,
lora_dropout=lora_dropout, lora_dropout=lora_dropout,
lora_target=lora_target, lora_target=lora_target,
additional_target=additional_target,
use_rslora=use_rslora, use_rslora=use_rslora,
use_dora=use_dora,
create_new_adapter=create_new_adapter, create_new_adapter=create_new_adapter,
additional_target=additional_target,
) )
) )

View File

@ -572,6 +572,20 @@ LOCALES = {
"info": "LoRA 矩阵的秩。", "info": "LoRA 矩阵的秩。",
}, },
}, },
"lora_alpha": {
"en": {
"label": "LoRA Alpha",
"info": "Lora scaling coefficient.",
},
"ru": {
"label": "LoRA Alpha",
"info": "Коэффициент масштабирования LoRA.",
},
"zh": {
"label": "LoRA 缩放系数",
"info": "LoRA 缩放系数大小。",
},
},
"lora_dropout": { "lora_dropout": {
"en": { "en": {
"label": "LoRA Dropout", "label": "LoRA Dropout",
@ -600,6 +614,48 @@ LOCALES = {
"info": "应用 LoRA 的目标模块名称。使用英文逗号分隔多个名称。", "info": "应用 LoRA 的目标模块名称。使用英文逗号分隔多个名称。",
}, },
}, },
"use_rslora": {
"en": {
"label": "Use rslora",
"info": "Use the rank stabilization scaling factor for LoRA layer.",
},
"ru": {
"label": "Использовать rslora",
"info": "Использовать коэффициент масштабирования стабилизации ранга для слоя LoRA.",
},
"zh": {
"label": "使用 rslora",
"info": "对 LoRA 层使用秩稳定缩放方法。",
},
},
"use_dora": {
"en": {
"label": "Use DoRA",
"info": "Use weight-decomposed LoRA.",
},
"ru": {
"label": "Используйте DoRA",
"info": "Используйте LoRA с декомпозицией весов.",
},
"zh": {
"label": "使用 DoRA",
"info": "使用权重分解的 LoRA。",
},
},
"create_new_adapter": {
"en": {
"label": "Create new adapter",
"info": "Create a new adapter with randomly initialized weight upon the existing one.",
},
"ru": {
"label": "Создать новый адаптер",
"info": "Создать новый адаптер с случайной инициализацией веса на основе существующего.",
},
"zh": {
"label": "新建适配器",
"info": "在现有的适配器上创建一个随机初始化后的新适配器。",
},
},
"additional_target": { "additional_target": {
"en": { "en": {
"label": "Additional modules (optional)", "label": "Additional modules (optional)",
@ -617,34 +673,6 @@ LOCALES = {
"info": "除 LoRA 层以外的可训练模块名称。使用英文逗号分隔多个名称。", "info": "除 LoRA 层以外的可训练模块名称。使用英文逗号分隔多个名称。",
}, },
}, },
"use_rslora": {
"en": {
"label": "Use rslora",
"info": "Use the rank stabilization scaling factor for LoRA layer.",
},
"ru": {
"label": "Использовать rslora",
"info": "Использовать коэффициент масштабирования стабилизации ранга для слоя LoRA.",
},
"zh": {
"label": "使用 rslora",
"info": "对 LoRA 层使用秩稳定缩放方法。",
},
},
"create_new_adapter": {
"en": {
"label": "Create new adapter",
"info": "Create a new adapter with randomly initialized weight upon the existing one.",
},
"ru": {
"label": "Создать новый адаптер",
"info": "Создать новый адаптер с случайной инициализацией веса на основе существующего.",
},
"zh": {
"label": "新建适配器",
"info": "在现有的适配器上创建一个随机初始化后的新适配器。",
},
},
"rlhf_tab": { "rlhf_tab": {
"en": { "en": {
"label": "RLHF configurations", "label": "RLHF configurations",
@ -1055,6 +1083,11 @@ ALERTS = {
"ru": "Неверная схема JSON.", "ru": "Неверная схема JSON.",
"zh": "Json 格式错误。", "zh": "Json 格式错误。",
}, },
"warn_no_cuda": {
"en": "CUDA environment was not detected.",
"ru": "Среда CUDA не обнаружена.",
"zh": "未检测到 CUDA 环境。",
},
"info_aborting": { "info_aborting": {
"en": "Aborted, wait for terminating...", "en": "Aborted, wait for terminating...",
"ru": "Прервано, ожидание завершения...", "ru": "Прервано, ожидание завершения...",

View File

@ -8,6 +8,7 @@ import gradio as gr
import transformers import transformers
from gradio.components import Component # cannot use TYPE_CHECKING here from gradio.components import Component # cannot use TYPE_CHECKING here
from transformers.trainer import TRAINING_ARGS_NAME from transformers.trainer import TRAINING_ARGS_NAME
from transformers.utils import is_torch_cuda_available
from ..extras.callbacks import LogCallback from ..extras.callbacks import LogCallback
from ..extras.constants import TRAINING_STAGES from ..extras.constants import TRAINING_STAGES
@ -64,12 +65,15 @@ class Runner:
if len(dataset) == 0: if len(dataset) == 0:
return ALERTS["err_no_dataset"][lang] return ALERTS["err_no_dataset"][lang]
if self.demo_mode and (not from_preview): if not from_preview and self.demo_mode:
return ALERTS["err_demo"][lang] return ALERTS["err_demo"][lang]
if not from_preview and get_device_count() > 1: if not from_preview and get_device_count() > 1:
return ALERTS["err_device_count"][lang] return ALERTS["err_device_count"][lang]
if not from_preview and not is_torch_cuda_available():
gr.Warning(ALERTS["warn_no_cuda"][lang])
self.aborted = False self.aborted = False
self.logger_handler.reset() self.logger_handler.reset()
self.trainer_callback = LogCallback(self) self.trainer_callback = LogCallback(self)
@ -139,11 +143,13 @@ class Runner:
args["num_layer_trainable"] = int(get("train.num_layer_trainable")) args["num_layer_trainable"] = int(get("train.num_layer_trainable"))
args["name_module_trainable"] = get("train.name_module_trainable") args["name_module_trainable"] = get("train.name_module_trainable")
elif args["finetuning_type"] == "lora": elif args["finetuning_type"] == "lora":
args["lora_rank"] = get("train.lora_rank") args["lora_rank"] = int(get("train.lora_rank"))
args["lora_dropout"] = get("train.lora_dropout") args["lora_alpha"] = float(get("train.lora_alpha"))
args["lora_dropout"] = float(get("train.lora_dropout"))
args["lora_target"] = get("train.lora_target") or get_module(get("top.model_name")) args["lora_target"] = get("train.lora_target") or get_module(get("top.model_name"))
args["additional_target"] = get("train.additional_target") or None
args["use_rslora"] = get("train.use_rslora") args["use_rslora"] = get("train.use_rslora")
args["use_dora"] = get("train.use_dora")
args["additional_target"] = get("train.additional_target") or None
if args["stage"] in ["rm", "ppo", "dpo"]: if args["stage"] in ["rm", "ppo", "dpo"]:
args["create_new_adapter"] = args["quantization_bit"] is None args["create_new_adapter"] = args["quantization_bit"] is None
else: else:

View File

@ -44,11 +44,14 @@ def can_quantize(finetuning_type: str) -> Dict[str, Any]:
def check_json_schema(text: str, lang: str) -> None: def check_json_schema(text: str, lang: str) -> None:
try: try:
tools = json.loads(text) tools = json.loads(text)
for tool in tools: if tools:
assert "name" in tool assert isinstance(tools, list)
except AssertionError: for tool in tools:
if "name" not in tool:
raise ValueError("Name not found.")
except ValueError:
gr.Warning(ALERTS["err_tool_name"][lang]) gr.Warning(ALERTS["err_tool_name"][lang])
except json.JSONDecodeError: except Exception:
gr.Warning(ALERTS["err_json_schema"][lang]) gr.Warning(ALERTS["err_json_schema"][lang])