diff --git a/src/llmtuner/chat/stream_chat.py b/src/llmtuner/chat/stream_chat.py index 8220c0b3..e207ee0b 100644 --- a/src/llmtuner/chat/stream_chat.py +++ b/src/llmtuner/chat/stream_chat.py @@ -1,8 +1,7 @@ import torch -from types import MethodType from typing import Any, Dict, Generator, List, Optional, Tuple from threading import Thread -from transformers import PreTrainedModel, TextIteratorStreamer +from transformers import TextIteratorStreamer from llmtuner.extras.misc import dispatch_model, get_logits_processor, get_stopping_criteria from llmtuner.extras.template import get_template_and_fix_tokenizer @@ -15,10 +14,9 @@ class ChatModel: model_args, data_args, finetuning_args, self.generating_args = get_infer_args(args) self.model, self.tokenizer = load_model_and_tokenizer(model_args, finetuning_args) self.model = dispatch_model(self.model) - self.model = self.model.eval() # change to eval mode + self.model = self.model.eval() # enable evaluation mode self.template = get_template_and_fix_tokenizer(data_args.template, self.tokenizer) self.source_prefix = data_args.source_prefix - self.model.generate = MethodType(PreTrainedModel.generate, self.model) # disable custom method (for Qwen) def process_args( self, diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 4e7101f7..8ee997bb 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -1,7 +1,5 @@ IGNORE_INDEX = -100 -SFT_SCRIPT_PREFIX = "CUDA_VISIBLE_DEVICES=0 python " - LOG_FILE_NAME = "trainer_log.jsonl" VALUE_HEAD_FILE_NAME = "value_head.bin" diff --git a/src/llmtuner/extras/logging.py b/src/llmtuner/extras/logging.py index 0b1a68f6..d6f185e6 100644 --- a/src/llmtuner/extras/logging.py +++ b/src/llmtuner/extras/logging.py @@ -8,6 +8,9 @@ class LoggerHandler(logging.Handler): super().__init__() self.log = "" + def reset(self): + self.log = "" + def emit(self, record): if record.name == "httpx": return diff --git a/src/llmtuner/hparams/model_args.py b/src/llmtuner/hparams/model_args.py index 44454f45..dc515f51 100644 --- a/src/llmtuner/hparams/model_args.py +++ b/src/llmtuner/hparams/model_args.py @@ -43,9 +43,9 @@ class ModelArguments: default=True, metadata={"help": "Whether to use double quantization in int4 training or not."} ) - compute_dtype: Optional[torch.dtype] = field( + rope_scaling: Optional[Literal["linear", "dynamic"]] = field( default=None, - metadata={"help": "Used in quantization configs. Do not specify this argument manually."} + metadata={"help": "Adopt scaled rotary positional embeddings."} ) checkpoint_dir: Optional[str] = field( default=None, @@ -63,8 +63,19 @@ class ModelArguments: default=None, metadata={"help": "Auth token to log in with Hugging Face Hub."} ) + compute_dtype: Optional[torch.dtype] = field( + default=None, + metadata={"help": "Used in quantization configs. Do not specify this argument manually."} + ) + model_max_length: Optional[int] = field( + default=None, + metadata={"help": "Used in rope scaling. Do not specify this argument manually."} + ) def __post_init__(self): + if self.compute_dtype is not None or self.model_max_length is not None: + raise ValueError("These arguments cannot be specified.") + if self.checkpoint_dir is not None: # support merging multiple lora weights self.checkpoint_dir = [cd.strip() for cd in self.checkpoint_dir.split(",")] diff --git a/src/llmtuner/tuner/core/loader.py b/src/llmtuner/tuner/core/loader.py index 74b4b59f..c03bdf63 100644 --- a/src/llmtuner/tuner/core/loader.py +++ b/src/llmtuner/tuner/core/loader.py @@ -1,5 +1,7 @@ import os +import math import torch +from types import MethodType from typing import TYPE_CHECKING, Literal, Optional, Tuple from transformers import ( @@ -66,15 +68,56 @@ def load_model_and_tokenizer( **config_kwargs ) - if model_args.checkpoint_dir is not None and finetuning_args.finetuning_type == "full": + if finetuning_args.finetuning_type == "full" and model_args.checkpoint_dir is not None: model_to_load = model_args.checkpoint_dir[0] else: model_to_load = model_args.model_name_or_path config = AutoConfig.from_pretrained(model_to_load, **config_kwargs) - is_mergeable = True + + if hasattr(config, "fp16") and hasattr(config, "bf16"): # fix Qwen config + if model_args.compute_dtype == torch.bfloat16: + setattr(config, "bf16", True) + else: + setattr(config, "fp16", True) + + # Set RoPE scaling + if model_args.rope_scaling is not None: + if hasattr(config, "use_dynamic_ntk"): # for Qwen models + if is_trainable: + logger.warning("Qwen model does not support rope scaling in training.") + else: + setattr(config, "use_dynamic_ntk", True) + setattr(config, "use_logn_attn", True) + logger.info("Using dynamic NTK scaling.") + + elif hasattr(config, "rope_scaling"): # for LLaMA models + if is_trainable: + if model_args.rope_scaling == "dynamic": + logger.warning( + "Dynamic NTK may not work well with fine-tuning. " + "See: https://github.com/huggingface/transformers/pull/24653" + ) + + current_max_length = getattr(config, "max_position_embeddings", None) + if current_max_length and model_args.model_max_length <= current_max_length: + logger.warning("Input length is smaller than max length. Consider increase input length.") + scaling_factor = 1.0 + else: + scaling_factor = float(math.ceil(model_args.model_max_length / current_max_length)) + else: + scaling_factor = 2.0 + + setattr(config, "rope_scaling", {"type": model_args.rope_scaling, "factor": scaling_factor}) + logger.info("Using {} scaling strategy and setting scaling factor to {}".format( + model_args.rope_scaling, scaling_factor + )) + + else: + logger.warning("Current model does not support RoPE scaling.") # Quantization configurations (using bitsandbytes library). + is_mergeable = True if model_args.quantization_bit is not None: if model_args.quantization_bit == 8: require_version("bitsandbytes>=0.37.0", "To fix: pip install bitsandbytes>=0.37.0") @@ -95,7 +138,7 @@ def load_model_and_tokenizer( config_kwargs["device_map"] = {"": int(os.environ.get("LOCAL_RANK", "0"))} if is_trainable else "auto" logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit)) - # Load and prepare pretrained models (without valuehead). + # Load and prepare pre-trained models (without valuehead). model = AutoModelForCausalLM.from_pretrained( model_to_load, config=config, @@ -104,6 +147,10 @@ def load_model_and_tokenizer( **config_kwargs ) + # Disable custom generate method (for Qwen) + if "GenerationMixin" not in str(model.generate.__func__): + model.generate = MethodType(PreTrainedModel.generate, model) + # Register auto class to save the custom code files. if isinstance(config, PretrainedConfig) and "AutoConfig" in getattr(config, "auto_map", {}): config.__class__.register_for_auto_class() @@ -116,10 +163,10 @@ def load_model_and_tokenizer( model = prepare_model_for_training(model, finetuning_args.finetuning_type) if is_trainable else model model = init_adapter(model, model_args, finetuning_args, is_trainable, is_mergeable) - if stage == "rm" or stage == "ppo": # add value head - model: "AutoModelForCausalLMWithValueHead" = AutoModelForCausalLMWithValueHead.from_pretrained(model) + # Prepare model with valuehead for RLHF + if stage == "rm" or stage == "ppo": + model: AutoModelForCausalLMWithValueHead = AutoModelForCausalLMWithValueHead.from_pretrained(model) reset_logging() - if stage == "rm" and model_args.checkpoint_dir is not None: # load valuehead weights to evaluate reward model logger.warning("Only the last checkpoint containing valuehead will be loaded as the valuehead.") if load_valuehead_params(model, model_args.checkpoint_dir[-1]): @@ -133,9 +180,11 @@ def load_model_and_tokenizer( model.pretrained_model.load_adapter(model_args.reward_model, "reward", is_trainable=False) assert load_valuehead_params(model, model_args.reward_model), "Reward model is not correctly loaded." + # Prepare model for inference if not is_trainable: model.requires_grad_(False) # fix all model params - model = model.half() if model_args.quantization_bit is None else model # cast from fp32 to fp16 + infer_dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16 # detect cuda capability + model = model.to(infer_dtype) if model_args.quantization_bit is None else model trainable_params, all_param = count_parameters(model) logger.info("trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format( diff --git a/src/llmtuner/tuner/core/parser.py b/src/llmtuner/tuner/core/parser.py index c5bdbe16..7dda0a5c 100644 --- a/src/llmtuner/tuner/core/parser.py +++ b/src/llmtuner/tuner/core/parser.py @@ -151,13 +151,16 @@ def get_train_args( training_args.optim = "adamw_torch" if training_args.optim == "adamw_hf" else training_args.optim # suppress warning - if model_args.quantization_bit is not None: - if training_args.fp16: - model_args.compute_dtype = torch.float16 - elif training_args.bf16: - model_args.compute_dtype = torch.bfloat16 - else: - model_args.compute_dtype = torch.float32 + if training_args.fp16: + model_args.compute_dtype = torch.float16 + elif training_args.bf16: + if not torch.cuda.is_bf16_supported(): + raise ValueError("Current device does not support bf16 training.") + model_args.compute_dtype = torch.bfloat16 + else: + model_args.compute_dtype = torch.float32 + + model_args.model_max_length = data_args.max_source_length + data_args.max_target_length # Log on each process the small summary: logger.info("Process rank: {}, device: {}, n_gpu: {}\n distributed training: {}, 16-bits training: {}".format( diff --git a/src/llmtuner/tuner/tune.py b/src/llmtuner/tuner/tune.py index 7b9446e9..a4a4c2a1 100644 --- a/src/llmtuner/tuner/tune.py +++ b/src/llmtuner/tuner/tune.py @@ -35,7 +35,7 @@ def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["Tra def export_model(args: Optional[Dict[str, Any]] = None, max_shard_size: Optional[str] = "10GB"): - model_args, _, training_args, finetuning_args, _ = get_train_args(args) + model_args, _, training_args, finetuning_args, _, _ = get_train_args(args) model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args) model.save_pretrained(training_args.output_dir, max_shard_size=max_shard_size) try: diff --git a/src/llmtuner/webui/components/eval.py b/src/llmtuner/webui/components/eval.py index ce6eddae..72d08a1f 100644 --- a/src/llmtuner/webui/components/eval.py +++ b/src/llmtuner/webui/components/eval.py @@ -14,13 +14,13 @@ def create_eval_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict with gr.Row(): dataset_dir = gr.Textbox(value=DEFAULT_DATA_DIR, scale=2) dataset = gr.Dropdown(multiselect=True, scale=4) - preview_btn = gr.Button(interactive=False, scale=1) + data_preview_btn = gr.Button(interactive=False, scale=1) preview_box, preview_count, preview_samples, close_btn = create_preview_box() dataset_dir.change(list_dataset, [dataset_dir], [dataset]) - dataset.change(can_preview, [dataset_dir, dataset], [preview_btn]) - preview_btn.click( + dataset.change(can_preview, [dataset_dir, dataset], [data_preview_btn]) + data_preview_btn.click( get_preview, [dataset_dir, dataset], [preview_count, preview_samples, preview_box], @@ -35,6 +35,7 @@ def create_eval_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict predict = gr.Checkbox(value=True) with gr.Row(): + cmd_preview_btn = gr.Button() start_btn = gr.Button() stop_btn = gr.Button() @@ -44,35 +45,36 @@ def create_eval_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict with gr.Box(): output_box = gr.Markdown() - start_btn.click( - runner.run_eval, - [ - top_elems["lang"], - top_elems["model_name"], - top_elems["checkpoints"], - top_elems["finetuning_type"], - top_elems["quantization_bit"], - top_elems["template"], - top_elems["source_prefix"], - dataset_dir, - dataset, - max_source_length, - max_target_length, - max_samples, - batch_size, - predict - ], - [ - output_box, - process_bar - ] - ) + input_list = [ + top_elems["lang"], + top_elems["model_name"], + top_elems["checkpoints"], + top_elems["finetuning_type"], + top_elems["quantization_bit"], + top_elems["template"], + top_elems["source_prefix"], + dataset_dir, + dataset, + max_source_length, + max_target_length, + max_samples, + batch_size, + predict + ] + + output_list = [ + output_box, + process_bar + ] + + cmd_preview_btn.click(runner.preview_eval, input_list, output_list) + start_btn.click(runner.run_eval, input_list, output_list) stop_btn.click(runner.set_abort, queue=False) return dict( dataset_dir=dataset_dir, dataset=dataset, - preview_btn=preview_btn, + data_preview_btn=data_preview_btn, preview_count=preview_count, preview_samples=preview_samples, close_btn=close_btn, @@ -81,6 +83,7 @@ def create_eval_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict max_samples=max_samples, batch_size=batch_size, predict=predict, + cmd_preview_btn=cmd_preview_btn, start_btn=start_btn, stop_btn=stop_btn, output_box=output_box diff --git a/src/llmtuner/webui/components/sft.py b/src/llmtuner/webui/components/sft.py index e74ef5cf..a4101d15 100644 --- a/src/llmtuner/webui/components/sft.py +++ b/src/llmtuner/webui/components/sft.py @@ -16,13 +16,13 @@ def create_sft_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict[ with gr.Row(): dataset_dir = gr.Textbox(value=DEFAULT_DATA_DIR, scale=2) dataset = gr.Dropdown(multiselect=True, scale=4) - preview_btn = gr.Button(interactive=False, scale=1) + data_preview_btn = gr.Button(interactive=False, scale=1) preview_box, preview_count, preview_samples, close_btn = create_preview_box() dataset_dir.change(list_dataset, [dataset_dir], [dataset]) - dataset.change(can_preview, [dataset_dir, dataset], [preview_btn]) - preview_btn.click( + dataset.change(can_preview, [dataset_dir, dataset], [data_preview_btn]) + data_preview_btn.click( get_preview, [dataset_dir, dataset], [preview_count, preview_samples, preview_box], @@ -61,15 +61,12 @@ def create_sft_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict[ resume_lora_training = gr.Checkbox(value=True, scale=1) with gr.Row(): - preview_script_btn = gr.Button() + cmd_preview_btn = gr.Button() start_btn = gr.Button() stop_btn = gr.Button() with gr.Row(): with gr.Column(scale=3): - with gr.Box(): - preview_script_box = gr.Textbox() - with gr.Row(): output_dir = gr.Textbox() @@ -82,82 +79,45 @@ def create_sft_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict[ with gr.Column(scale=1): loss_viewer = gr.Plot() - preview_script_btn.click( - runner.preview_sft_script, - [ - top_elems["lang"], - top_elems["model_name"], - top_elems["checkpoints"], - top_elems["finetuning_type"], - top_elems["quantization_bit"], - top_elems["template"], - top_elems["source_prefix"], - dataset_dir, - dataset, - max_source_length, - max_target_length, - learning_rate, - num_train_epochs, - max_samples, - batch_size, - gradient_accumulation_steps, - lr_scheduler_type, - max_grad_norm, - val_size, - logging_steps, - save_steps, - warmup_steps, - compute_type, - padding_side, - lora_rank, - lora_dropout, - lora_target, - resume_lora_training, - output_dir - ], - [ - preview_script_box - ] - ) + input_list = [ + top_elems["lang"], + top_elems["model_name"], + top_elems["checkpoints"], + top_elems["finetuning_type"], + top_elems["quantization_bit"], + top_elems["template"], + top_elems["source_prefix"], + dataset_dir, + dataset, + max_source_length, + max_target_length, + learning_rate, + num_train_epochs, + max_samples, + batch_size, + gradient_accumulation_steps, + lr_scheduler_type, + max_grad_norm, + val_size, + logging_steps, + save_steps, + warmup_steps, + compute_type, + padding_side, + lora_rank, + lora_dropout, + lora_target, + resume_lora_training, + output_dir + ] - start_btn.click( - runner.run_train, - [ - top_elems["lang"], - top_elems["model_name"], - top_elems["checkpoints"], - top_elems["finetuning_type"], - top_elems["quantization_bit"], - top_elems["template"], - top_elems["source_prefix"], - dataset_dir, - dataset, - max_source_length, - max_target_length, - learning_rate, - num_train_epochs, - max_samples, - batch_size, - gradient_accumulation_steps, - lr_scheduler_type, - max_grad_norm, - val_size, - logging_steps, - save_steps, - warmup_steps, - compute_type, - padding_side, - lora_rank, - lora_dropout, - lora_target, - resume_lora_training, - output_dir - ], - [ - output_box, - process_bar - ] - ) + output_list = [ + output_box, + process_bar + ] + + cmd_preview_btn.click(runner.preview_train, input_list, output_list) + start_btn.click(runner.run_train, input_list, output_list) stop_btn.click(runner.set_abort, queue=False) process_bar.change( @@ -167,7 +127,7 @@ def create_sft_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict[ return dict( dataset_dir=dataset_dir, dataset=dataset, - preview_btn=preview_btn, + data_preview_btn=data_preview_btn, preview_count=preview_count, preview_samples=preview_samples, close_btn=close_btn, @@ -192,11 +152,10 @@ def create_sft_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict[ lora_dropout=lora_dropout, lora_target=lora_target, resume_lora_training=resume_lora_training, + cmd_preview_btn=cmd_preview_btn, start_btn=start_btn, stop_btn=stop_btn, output_dir=output_dir, output_box=output_box, - loss_viewer=loss_viewer, - preview_script_btn=preview_script_btn, - preview_script_box=preview_script_box + loss_viewer=loss_viewer ) diff --git a/src/llmtuner/webui/locales.py b/src/llmtuner/webui/locales.py index c4845735..aba4acae 100644 --- a/src/llmtuner/webui/locales.py +++ b/src/llmtuner/webui/locales.py @@ -105,12 +105,12 @@ LOCALES = { "label": "数据集" } }, - "preview_btn": { + "data_preview_btn": { "en": { - "value": "Preview" + "value": "Preview dataset" }, "zh": { - "value": "预览" + "value": "预览数据集" } }, "preview_count": { @@ -335,6 +335,14 @@ LOCALES = { "info": "接着上次的 LoRA 权重训练或创建一个新的 LoRA 权重。" } }, + "cmd_preview_btn": { + "en": { + "value": "Preview command" + }, + "zh": { + "value": "预览命令" + } + }, "start_btn": { "en": { "value": "Start" @@ -500,22 +508,6 @@ LOCALES = { "zh": { "value": "开始导出" } - }, - "preview_script_btn": { - "en": { - "value": "preview train script" - }, - "zh": { - "value": "预览训练脚本命令" - } - }, - "preview_script_box": { - "en": { - "label": "SFT Script Preview", - }, - "zh": { - "label": "训练命令预览", - } } } diff --git a/src/llmtuner/webui/runner.py b/src/llmtuner/webui/runner.py index d0d12d14..aab31349 100644 --- a/src/llmtuner/webui/runner.py +++ b/src/llmtuner/webui/runner.py @@ -5,16 +5,16 @@ import threading import time import transformers from transformers.trainer import TRAINING_ARGS_NAME -from typing import Generator, List, Tuple +from typing import Any, Dict, Generator, List, Tuple from llmtuner.extras.callbacks import LogCallback -from llmtuner.extras.constants import DEFAULT_MODULE, SFT_SCRIPT_PREFIX +from llmtuner.extras.constants import DEFAULT_MODULE from llmtuner.extras.logging import LoggerHandler from llmtuner.extras.misc import torch_gc from llmtuner.tuner import run_exp from llmtuner.webui.common import get_model_path, get_save_dir from llmtuner.webui.locales import ALERTS -from llmtuner.webui.utils import get_eval_results, update_process_bar +from llmtuner.webui.utils import gen_cmd, get_eval_results, update_process_bar class Runner: @@ -22,39 +22,36 @@ class Runner: def __init__(self): self.aborted = False self.running = False + self.logger_handler = LoggerHandler() + self.logger_handler.setLevel(logging.INFO) + logging.root.addHandler(self.logger_handler) + transformers.logging.add_handler(self.logger_handler) def set_abort(self): self.aborted = True self.running = False - def initialize( + def _initialize( self, lang: str, model_name: str, dataset: List[str] - ) -> Tuple[str, str, LoggerHandler, LogCallback]: + ) -> str: if self.running: - return None, ALERTS["err_conflict"][lang], None, None + return ALERTS["err_conflict"][lang] if not model_name: - return None, ALERTS["err_no_model"][lang], None, None + return ALERTS["err_no_model"][lang] - model_name_or_path = get_model_path(model_name) - if not model_name_or_path: - return None, ALERTS["err_no_path"][lang], None, None + if not get_model_path(model_name): + return ALERTS["err_no_path"][lang] if len(dataset) == 0: - return None, ALERTS["err_no_dataset"][lang], None, None + return ALERTS["err_no_dataset"][lang] self.aborted = False - self.running = True + self.logger_handler.reset() + self.trainer_callback = LogCallback(self) + return "" - logger_handler = LoggerHandler() - logger_handler.setLevel(logging.INFO) - logging.root.addHandler(logger_handler) - transformers.logging.add_handler(logger_handler) - trainer_callback = LogCallback(self) - - return model_name_or_path, "", logger_handler, trainer_callback - - def finalize( + def _finalize( self, lang: str, finish_info: str ) -> str: self.running = False @@ -64,7 +61,7 @@ class Runner: else: return finish_info - def run_train( + def _parse_train_args( self, lang: str, model_name: str, @@ -95,52 +92,19 @@ class Runner: lora_target: str, resume_lora_training: bool, output_dir: str - ) -> Generator[str, None, None]: - model_name_or_path, error, logger_handler, trainer_callback = self.initialize(lang, model_name, dataset) - if error: - yield error, gr.update(visible=False) - return - output_dir = os.path.join(get_save_dir(model_name), finetuning_type, output_dir) - - args = self._build_args(batch_size, checkpoints, compute_type, dataset, dataset_dir, finetuning_type, - gradient_accumulation_steps, learning_rate, logging_steps, lora_dropout, lora_rank, - lora_target, lr_scheduler_type, max_grad_norm, max_samples, max_source_length, - max_target_length, model_name, model_name_or_path, num_train_epochs, output_dir, - padding_side, quantization_bit, resume_lora_training, save_steps, source_prefix, - template, val_size, warmup_steps) - - run_kwargs = dict(args=args, callbacks=[trainer_callback]) - thread = threading.Thread(target=run_exp, kwargs=run_kwargs) - thread.start() - - while thread.is_alive(): - time.sleep(2) - if self.aborted: - yield ALERTS["info_aborting"][lang], gr.update(visible=False) - else: - yield logger_handler.log, update_process_bar(trainer_callback) - - if os.path.exists(os.path.join(output_dir, TRAINING_ARGS_NAME)): - finish_info = ALERTS["info_finished"][lang] - else: - finish_info = ALERTS["err_failed"][lang] - - yield self.finalize(lang, finish_info), gr.update(visible=False) - - def _build_args(self, batch_size, checkpoints, compute_type, dataset, dataset_dir, finetuning_type, - gradient_accumulation_steps, learning_rate, logging_steps, lora_dropout, lora_rank, lora_target, - lr_scheduler_type, max_grad_norm, max_samples, max_source_length, max_target_length, model_name, - model_name_or_path, num_train_epochs, output_dir, padding_side, quantization_bit, - resume_lora_training, save_steps, source_prefix, template, val_size, warmup_steps): + ) -> Tuple[str, str, List[str], str, Dict[str, Any]]: if checkpoints: checkpoint_dir = ",".join( - [os.path.join(get_save_dir(model_name), finetuning_type, checkpoint) for checkpoint in checkpoints] + [os.path.join(get_save_dir(model_name), finetuning_type, ckpt) for ckpt in checkpoints] ) else: checkpoint_dir = None + + output_dir = os.path.join(get_save_dir(model_name), finetuning_type, output_dir) + args = dict( stage="sft", - model_name_or_path=model_name_or_path, + model_name_or_path=get_model_path(model_name), do_train=True, overwrite_cache=True, checkpoint_dir=checkpoint_dir, @@ -171,14 +135,16 @@ class Runner: resume_lora_training=resume_lora_training, output_dir=output_dir ) + if val_size > 1e-6: args["val_size"] = val_size args["evaluation_strategy"] = "steps" args["eval_steps"] = save_steps args["load_best_model_at_end"] = True - return args - def run_eval( + return lang, model_name, dataset, output_dir, args + + def _parse_eval_args( self, lang: str, model_name: str, @@ -194,12 +160,7 @@ class Runner: max_samples: str, batch_size: int, predict: bool - ) -> Generator[str, None, None]: - model_name_or_path, error, logger_handler, trainer_callback = self.initialize(lang, model_name, dataset) - if error: - yield error, gr.update(visible=False) - return - + ) -> Tuple[str, str, List[str], str, Dict[str, Any]]: if checkpoints: checkpoint_dir = ",".join( [os.path.join(get_save_dir(model_name), finetuning_type, checkpoint) for checkpoint in checkpoints] @@ -211,7 +172,7 @@ class Runner: args = dict( stage="sft", - model_name_or_path=model_name_or_path, + model_name_or_path=get_model_path(model_name), do_eval=True, overwrite_cache=True, predict_with_generate=True, @@ -233,7 +194,33 @@ class Runner: args.pop("do_eval", None) args["do_predict"] = True - run_kwargs = dict(args=args, callbacks=[trainer_callback]) + return lang, model_name, dataset, output_dir, args + + def preview_train(self, *args) -> Generator[Tuple[str, Dict[str, Any]], None, None]: + lang, model_name, dataset, _, args = self._parse_train_args(*args) + error = self._initialize(lang, model_name, dataset) + if error: + yield error, gr.update(visible=False) + else: + yield gen_cmd(args), gr.update(visible=False) + + def preview_eval(self, *args) -> Generator[Tuple[str, Dict[str, Any]], None, None]: + lang, model_name, dataset, _, args = self._parse_eval_args(*args) + error = self._initialize(lang, model_name, dataset) + if error: + yield error, gr.update(visible=False) + else: + yield gen_cmd(args), gr.update(visible=False) + + def run_train(self, *args) -> Generator[Tuple[str, Dict[str, Any]], None, None]: + lang, model_name, dataset, output_dir, args = self._parse_train_args(*args) + error = self._initialize(lang, model_name, dataset) + if error: + yield error, gr.update(visible=False) + return + + self.running = True + run_kwargs = dict(args=args, callbacks=[self.trainer_callback]) thread = threading.Thread(target=run_exp, kwargs=run_kwargs) thread.start() @@ -242,60 +229,37 @@ class Runner: if self.aborted: yield ALERTS["info_aborting"][lang], gr.update(visible=False) else: - yield logger_handler.log, update_process_bar(trainer_callback) + yield self.logger_handler.log, update_process_bar(self.trainer_callback) + + if os.path.exists(os.path.join(output_dir, TRAINING_ARGS_NAME)): + finish_info = ALERTS["info_finished"][lang] + else: + finish_info = ALERTS["err_failed"][lang] + + yield self._finalize(lang, finish_info), gr.update(visible=False) + + def run_eval(self, *args) -> Generator[str, None, None]: + lang, model_name, dataset, output_dir, args = self._parse_eval_args(*args) + error = self._initialize(lang, model_name, dataset) + if error: + yield error, gr.update(visible=False) + return + + self.running = True + run_kwargs = dict(args=args, callbacks=[self.trainer_callback]) + thread = threading.Thread(target=run_exp, kwargs=run_kwargs) + thread.start() + + while thread.is_alive(): + time.sleep(2) + if self.aborted: + yield ALERTS["info_aborting"][lang], gr.update(visible=False) + else: + yield self.logger_handler.log, update_process_bar(self.trainer_callback) if os.path.exists(os.path.join(output_dir, "all_results.json")): finish_info = get_eval_results(os.path.join(output_dir, "all_results.json")) else: finish_info = ALERTS["err_failed"][lang] - yield self.finalize(lang, finish_info), gr.update(visible=False) - - def preview_sft_script( - self, - lang: str, - model_name: str, - checkpoints: List[str], - finetuning_type: str, - quantization_bit: str, - template: str, - source_prefix: str, - dataset_dir: str, - dataset: List[str], - max_source_length: int, - max_target_length: int, - learning_rate: str, - num_train_epochs: str, - max_samples: str, - batch_size: int, - gradient_accumulation_steps: int, - lr_scheduler_type: str, - max_grad_norm: str, - val_size: float, - logging_steps: int, - save_steps: int, - warmup_steps: int, - compute_type: str, - padding_side: str, - lora_rank: int, - lora_dropout: float, - lora_target: str, - resume_lora_training: bool, - output_dir: str - ): - model_name_or_path, error, logger_handler, trainer_callback = self.initialize(lang, model_name, dataset) - output_dir = os.path.join(get_save_dir(model_name), finetuning_type, output_dir) - - args = self._build_args(batch_size, checkpoints, compute_type, dataset, dataset_dir, finetuning_type, - gradient_accumulation_steps, learning_rate, logging_steps, lora_dropout, lora_rank, - lora_target, lr_scheduler_type, max_grad_norm, max_samples, max_source_length, - max_target_length, model_name, model_name_or_path, num_train_epochs, output_dir, - padding_side, quantization_bit, resume_lora_training, save_steps, source_prefix, - template, val_size, warmup_steps) - script_lines = [SFT_SCRIPT_PREFIX] - for param_key, param_value in args.items(): - # filter None - if param_value: - script_lines.append(" --" + param_key + " " + str(param_value) + " ") - script_str = "\\\n".join(script_lines) - return gr.update(value=script_str) + yield self._finalize(lang, finish_info), gr.update(visible=False) diff --git a/src/llmtuner/webui/utils.py b/src/llmtuner/webui/utils.py index fb22bd0c..6ca55e14 100644 --- a/src/llmtuner/webui/utils.py +++ b/src/llmtuner/webui/utils.py @@ -62,6 +62,16 @@ def can_quantize(finetuning_type: str) -> Dict[str, Any]: return gr.update(interactive=True) +def gen_cmd(args: Dict[str, Any]) -> str: + cmd_lines = ["CUDA_VISIBLE_DEVICES=0 python "] + for k, v in args.items(): + if v is not None and v is not False and v != "": + cmd_lines.append(" --{} {} ".format(k, str(v))) + cmd_text = "\\\n".join(cmd_lines) + cmd_text = "```bash\n{}\n```".format(cmd_text) + return cmd_text + + def get_eval_results(path: os.PathLike) -> str: with open(path, "r", encoding="utf-8") as f: result = json.dumps(json.load(f), indent=4)