From a8318723a4a01a70c7c32acd0008fb598507b99f Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 00:22:16 +0800 Subject: [PATCH 01/15] add resume args in webui Former-commit-id: 06e5d136a4916413d1c116e341ba7d5136d7748a --- src/llamafactory/extras/constants.py | 6 ++- src/llamafactory/model/adapter.py | 4 +- src/llamafactory/webui/common.py | 17 ++++---- src/llamafactory/webui/components/top.py | 3 +- src/llamafactory/webui/components/train.py | 10 ++++- src/llamafactory/webui/runner.py | 48 ++++++++++++++++------ src/llamafactory/webui/utils.py | 29 ++++--------- 7 files changed, 68 insertions(+), 49 deletions(-) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 4099fe56..7d96fb5f 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -35,6 +35,8 @@ IGNORE_INDEX = -100 LAYERNORM_NAMES = {"norm", "ln"} +LLAMABOARD_CONFIG = "llamaboard_config.yaml" + METHODS = ["full", "freeze", "lora"] MOD_SUPPORTED_MODELS = {"bloom", "falcon", "gemma", "llama", "mistral", "mixtral", "phi", "starcoder2"} @@ -47,10 +49,10 @@ SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"] SUPPORTED_MODELS = OrderedDict() -TRAINER_CONFIG = "trainer_config.yaml" - TRAINER_LOG = "trainer_log.jsonl" +TRAINING_ARGS = "training_args.yaml" + TRAINING_STAGES = { "Supervised Fine-Tuning": "sft", "Reward Modeling": "rm", diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index 1a77d613..d17873f7 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -50,7 +50,7 @@ def init_adapter( logger.info("Upcasting trainable params to float32.") cast_trainable_params_to_fp32 = True - if finetuning_args.finetuning_type == "full" and is_trainable: + if is_trainable and finetuning_args.finetuning_type == "full": logger.info("Fine-tuning method: Full") forbidden_modules = set() @@ -67,7 +67,7 @@ def init_adapter( else: param.requires_grad_(False) - if finetuning_args.finetuning_type == "freeze" and is_trainable: + if is_trainable and finetuning_args.finetuning_type == "freeze": logger.info("Fine-tuning method: Freeze") if model_args.visual_inputs: diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index 304b56a5..37b38df0 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -50,13 +50,6 @@ def get_config_path() -> os.PathLike: return os.path.join(DEFAULT_CACHE_DIR, USER_CONFIG) -def get_arg_save_path(config_path: str) -> os.PathLike: - r""" - Gets the path to saved arguments. - """ - return os.path.join(DEFAULT_CONFIG_DIR, config_path) - - def load_config() -> Dict[str, Any]: r""" Loads user config if exists. @@ -77,24 +70,28 @@ def save_config(lang: str, model_name: Optional[str] = None, model_path: Optiona user_config["lang"] = lang or user_config["lang"] if model_name: user_config["last_model"] = model_name + + if model_name and model_path: user_config["path_dict"][model_name] = model_path + with open(get_config_path(), "w", encoding="utf-8") as f: safe_dump(user_config, f) -def get_model_path(model_name: str) -> Optional[str]: +def get_model_path(model_name: str) -> str: r""" Gets the model path according to the model name. """ user_config = load_config() - path_dict: Dict[DownloadSource, str] = SUPPORTED_MODELS.get(model_name, defaultdict(str)) - model_path = user_config["path_dict"].get(model_name, None) or path_dict.get(DownloadSource.DEFAULT, None) + path_dict: Dict["DownloadSource", str] = SUPPORTED_MODELS.get(model_name, defaultdict(str)) + model_path = user_config["path_dict"].get(model_name, "") or path_dict.get(DownloadSource.DEFAULT, "") if ( use_modelscope() and path_dict.get(DownloadSource.MODELSCOPE) and model_path == path_dict.get(DownloadSource.DEFAULT) ): # replace path model_path = path_dict.get(DownloadSource.MODELSCOPE) + return model_path diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index c794d0aa..fd0ead3d 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -36,7 +36,8 @@ def create_top() -> Dict[str, "Component"]: visual_inputs = gr.Checkbox(scale=1) model_name.change(get_model_info, [model_name], [model_path, template, visual_inputs], queue=False) - model_path.change(save_config, inputs=[lang, model_name, model_path], queue=False) + model_name.input(save_config, inputs=[lang, model_name], queue=False) + model_path.input(save_config, inputs=[lang, model_name, model_path], queue=False) finetuning_type.change(can_quantize, [finetuning_type], [quantization_bit], queue=False) checkpoint_path.focus(list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False) diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 74f8ef2a..72dfc858 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -6,7 +6,7 @@ from ...extras.constants import TRAINING_STAGES from ...extras.misc import get_device_count from ...extras.packages import is_gradio_available from ..common import DEFAULT_DATA_DIR, list_checkpoints, list_datasets -from ..utils import change_stage, check_output_dir, list_config_paths, list_output_dirs +from ..utils import change_stage, list_config_paths, list_output_dirs from .data import create_preview_box @@ -319,7 +319,13 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: finetuning_type.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False) output_dir.change( list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], concurrency_limit=None - ).then(check_output_dir, inputs=[lang, model_name, finetuning_type, output_dir], concurrency_limit=None) + ) + output_dir.input( + engine.runner.check_output_dir, + [lang, model_name, finetuning_type, output_dir], + list(input_elems) + [output_box], + concurrency_limit=None, + ) config_path.change(list_config_paths, [current_time], [config_path], queue=False) return elem_dict diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index c046152c..35014628 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -5,11 +5,11 @@ from typing import TYPE_CHECKING, Any, Dict, Generator, Optional from transformers.trainer import TRAINING_ARGS_NAME -from ..extras.constants import PEFT_METHODS, TRAINING_STAGES +from ..extras.constants import LLAMABOARD_CONFIG, PEFT_METHODS, TRAINING_STAGES from ..extras.misc import is_gpu_or_npu_available, torch_gc from ..extras.packages import is_gradio_available -from .common import DEFAULT_CACHE_DIR, get_save_dir, load_config -from .locales import ALERTS +from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_save_dir, load_config +from .locales import ALERTS, LOCALES from .utils import abort_leaf_process, gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd @@ -276,6 +276,10 @@ class Runner: else: self.do_train, self.running_data = do_train, data args = self._parse_train_args(data) if do_train else self._parse_eval_args(data) + + os.makedirs(args["output_dir"], exist_ok=True) + save_args(os.path.join(args["output_dir"], LLAMABOARD_CONFIG), self._form_config_dict(data)) + env = deepcopy(os.environ) env["LLAMABOARD_ENABLED"] = "1" if args.get("deepspeed", None) is not None: @@ -284,6 +288,16 @@ class Runner: self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True) yield from self.monitor() + def _form_config_dict(self, data: Dict["Component", Any]) -> Dict[str, Any]: + config_dict = {} + skip_ids = ["top.lang", "top.model_path", "train.output_dir", "train.config_path", "train.device_count"] + for elem, value in data.items(): + elem_id = self.manager.get_id_by_elem(elem) + if elem_id not in skip_ids: + config_dict[elem_id] = value + + return config_dict + def preview_train(self, data): yield from self._preview(data, do_train=True) @@ -349,28 +363,24 @@ class Runner: } yield return_dict - def save_args(self, data: dict): + def save_args(self, data): output_box = self.manager.get_elem_by_id("train.output_box") error = self._initialize(data, do_train=True, from_preview=True) if error: gr.Warning(error) return {output_box: error} - config_dict: Dict[str, Any] = {} lang = data[self.manager.get_elem_by_id("top.lang")] config_path = data[self.manager.get_elem_by_id("train.config_path")] - skip_ids = ["top.lang", "top.model_path", "train.output_dir", "train.config_path", "train.device_count"] - for elem, value in data.items(): - elem_id = self.manager.get_id_by_elem(elem) - if elem_id not in skip_ids: - config_dict[elem_id] = value + os.makedirs(DEFAULT_CONFIG_DIR, exist_ok=True) + save_path = os.path.join(DEFAULT_CONFIG_DIR, config_path) - save_path = save_args(config_path, config_dict) + save_args(save_path, self._form_config_dict(data)) return {output_box: ALERTS["info_config_saved"][lang] + save_path} def load_args(self, lang: str, config_path: str): output_box = self.manager.get_elem_by_id("train.output_box") - config_dict = load_args(config_path) + config_dict = load_args(os.path.join(DEFAULT_CONFIG_DIR, config_path)) if config_dict is None: gr.Warning(ALERTS["err_config_not_found"][lang]) return {output_box: ALERTS["err_config_not_found"][lang]} @@ -380,3 +390,17 @@ class Runner: output_dict[self.manager.get_elem_by_id(elem_id)] = value return output_dict + + def check_output_dir(self, lang: str, model_name: str, finetuning_type: str, output_dir: str): + output_box = self.manager.get_elem_by_id("train.output_box") + output_dict: Dict["Component", Any] = {output_box: LOCALES["output_box"][lang]["value"]} + if model_name and output_dir and os.path.isdir(get_save_dir(model_name, finetuning_type, output_dir)): + gr.Warning(ALERTS["warn_output_dir_exists"][lang]) + output_dict[output_box] = ALERTS["warn_output_dir_exists"][lang] + + output_dir = get_save_dir(model_name, finetuning_type, output_dir) + config_dict = load_args(os.path.join(output_dir, LLAMABOARD_CONFIG)) # load llamaboard config + for elem_id, value in config_dict.items(): + output_dict[self.manager.get_elem_by_id(elem_id)] = value + + return output_dict diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index 23e62dca..e39f2aa4 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -8,10 +8,10 @@ import psutil from transformers.trainer_utils import get_last_checkpoint from yaml import safe_dump, safe_load -from ..extras.constants import PEFT_METHODS, RUNNING_LOG, TRAINER_CONFIG, TRAINER_LOG, TRAINING_STAGES +from ..extras.constants import PEFT_METHODS, RUNNING_LOG, TRAINER_LOG, TRAINING_ARGS, TRAINING_STAGES from ..extras.packages import is_gradio_available, is_matplotlib_available from ..extras.ploting import gen_loss_plot -from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_arg_save_path, get_save_dir +from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_save_dir from .locales import ALERTS @@ -93,10 +93,10 @@ def save_cmd(args: Dict[str, Any]) -> str: output_dir = args["output_dir"] os.makedirs(output_dir, exist_ok=True) - with open(os.path.join(output_dir, TRAINER_CONFIG), "w", encoding="utf-8") as f: + with open(os.path.join(output_dir, TRAINING_ARGS), "w", encoding="utf-8") as f: safe_dump(clean_cmd(args), f) - return os.path.join(output_dir, TRAINER_CONFIG) + return os.path.join(output_dir, TRAINING_ARGS) def get_eval_results(path: os.PathLike) -> str: @@ -157,22 +157,19 @@ def load_args(config_path: str) -> Optional[Dict[str, Any]]: Loads saved arguments. """ try: - with open(get_arg_save_path(config_path), "r", encoding="utf-8") as f: + with open(config_path, "r", encoding="utf-8") as f: return safe_load(f) except Exception: return None -def save_args(config_path: str, config_dict: Dict[str, Any]) -> str: +def save_args(config_path: str, config_dict: Dict[str, Any]): r""" Saves arguments. """ - os.makedirs(DEFAULT_CONFIG_DIR, exist_ok=True) - with open(get_arg_save_path(config_path), "w", encoding="utf-8") as f: + with open(config_path, "w", encoding="utf-8") as f: safe_dump(config_dict, f) - return str(get_arg_save_path(config_path)) - def list_config_paths(current_time: str) -> "gr.Dropdown": r""" @@ -181,13 +178,13 @@ def list_config_paths(current_time: str) -> "gr.Dropdown": config_files = ["{}.yaml".format(current_time)] if os.path.isdir(DEFAULT_CONFIG_DIR): for file_name in os.listdir(DEFAULT_CONFIG_DIR): - if file_name.endswith(".yaml"): + if file_name.endswith(".yaml") and file_name not in config_files: config_files.append(file_name) return gr.Dropdown(choices=config_files) -def list_output_dirs(model_name: str, finetuning_type: str, current_time: str) -> "gr.Dropdown": +def list_output_dirs(model_name: Optional[str], finetuning_type: str, current_time: str) -> "gr.Dropdown": r""" Lists all the directories that can resume from. """ @@ -203,14 +200,6 @@ def list_output_dirs(model_name: str, finetuning_type: str, current_time: str) - return gr.Dropdown(choices=output_dirs) -def check_output_dir(lang: str, model_name: str, finetuning_type: str, output_dir: str) -> None: - r""" - Check if output dir exists. - """ - if model_name and output_dir and os.path.isdir(get_save_dir(model_name, finetuning_type, output_dir)): - gr.Warning(ALERTS["warn_output_dir_exists"][lang]) - - def create_ds_config() -> None: r""" Creates deepspeed config. From bad35d173013e111019e968c5789e1d4417625c5 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 8 Jun 2024 00:45:02 +0800 Subject: [PATCH 02/15] fix #4139 Former-commit-id: cfd62283a9772fc854b852d2a1b71699f79a0048 --- src/llamafactory/model/adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index d17873f7..bd14a52f 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -239,7 +239,7 @@ def init_adapter( ) model = get_peft_model(model, lora_config) - if cast_trainable_params_to_fp32: + if is_trainable and cast_trainable_params_to_fp32: for param in filter(lambda p: p.requires_grad, model.parameters()): param.data = param.data.to(torch.float32) From 4f0ce9be4ed5ee4514753585f013d8734b8db9e7 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 00:47:23 +0800 Subject: [PATCH 03/15] reorganize adapter code Former-commit-id: 54cd743ebfbd296ae9eaf10c33f59e127f451785 --- src/llamafactory/hparams/model_args.py | 9 +- src/llamafactory/model/adapter.py | 408 +++++++++++++------------ 2 files changed, 224 insertions(+), 193 deletions(-) diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 20271173..6352a420 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -15,7 +15,12 @@ class ModelArguments: ) adapter_name_or_path: Optional[str] = field( default=None, - metadata={"help": "Path to the adapter weight or identifier from huggingface.co/models."}, + metadata={ + "help": ( + "Path to the adapter weight or identifier from huggingface.co/models. " + "Use commas to separate multiple adapters." + ) + }, ) cache_dir: Optional[str] = field( default=None, @@ -35,7 +40,7 @@ class ModelArguments: ) new_special_tokens: Optional[str] = field( default=None, - metadata={"help": "Special tokens to be added into the tokenizer."}, + metadata={"help": "Special tokens to be added into the tokenizer. Use commas to separate multiple tokens."}, ) model_revision: str = field( default="main", diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index bd14a52f..f4e501a7 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -21,6 +21,218 @@ if TYPE_CHECKING: logger = get_logger(__name__) +def _setup_full_tuning( + model: "PreTrainedModel", + model_args: "ModelArguments", + finetuning_args: "FinetuningArguments", + cast_trainable_params_to_fp32: bool, +) -> None: + logger.info("Fine-tuning method: Full") + forbidden_modules = set() + if model_args.visual_inputs and finetuning_args.freeze_vision_tower: + forbidden_modules.add("vision_tower") + + if model_args.visual_inputs and finetuning_args.train_mm_proj_only: + forbidden_modules.add("language_model") + + for name, param in model.named_parameters(): + if not any(forbidden_module in name for forbidden_module in forbidden_modules): + if cast_trainable_params_to_fp32: + param.data = param.data.to(torch.float32) + else: + param.requires_grad_(False) + + +def _setup_freeze_tuning( + model: "PreTrainedModel", + model_args: "ModelArguments", + finetuning_args: "FinetuningArguments", + cast_trainable_params_to_fp32: bool, +) -> None: + logger.info("Fine-tuning method: Freeze") + if model_args.visual_inputs: + config = model.config.text_config + else: + config = model.config + + num_layers = ( + getattr(config, "num_hidden_layers", None) + or getattr(config, "num_layers", None) + or getattr(config, "n_layer", None) + ) + if not num_layers: + raise ValueError("Current model does not support freeze tuning.") + + if finetuning_args.use_llama_pro: + if num_layers % finetuning_args.freeze_trainable_layers != 0: + raise ValueError( + "`num_layers` {} should be divisible by `num_layer_trainable` {}.".format( + num_layers, finetuning_args.freeze_trainable_layers + ) + ) + + stride = num_layers // finetuning_args.freeze_trainable_layers + trainable_layer_ids = range(stride - 1, num_layers + stride - 1, stride) + elif finetuning_args.freeze_trainable_layers > 0: # fine-tuning the last n layers if num_layer_trainable > 0 + trainable_layer_ids = range(max(0, num_layers - finetuning_args.freeze_trainable_layers), num_layers) + else: # fine-tuning the first n layers if num_layer_trainable < 0 + trainable_layer_ids = range(min(-finetuning_args.freeze_trainable_layers, num_layers)) + + hidden_modules = set() + non_hidden_modules = set() + for name, _ in model.named_parameters(): + if ".0." in name: + hidden_modules.add(name.split(".0.")[-1].split(".")[0]) + elif ".1." in name: # MoD starts from layer 1 + hidden_modules.add(name.split(".1.")[-1].split(".")[0]) + + if re.search(r"\.\d+\.", name) is None: + non_hidden_modules.add(name.split(".")[-2]) + + trainable_layers = [] + for module_name in finetuning_args.freeze_trainable_modules: + if module_name != "all" and module_name not in hidden_modules: + raise ValueError( + "Module {} is not found, please choose from {}".format(module_name, ", ".join(hidden_modules)) + ) + + for idx in trainable_layer_ids: + trainable_layers.append(".{:d}.{}".format(idx, module_name if module_name != "all" else "")) + + if finetuning_args.freeze_extra_modules: + for module_name in finetuning_args.freeze_extra_modules: + if module_name not in non_hidden_modules: + raise ValueError( + "Module {} is not found, please choose from {}".format(module_name, ", ".join(non_hidden_modules)) + ) + + trainable_layers.append(module_name) + + forbidden_modules = set() + if model_args.visual_inputs and finetuning_args.freeze_vision_tower: + forbidden_modules.add("vision_tower") + + for name, param in model.named_parameters(): + if any(trainable_layer in name for trainable_layer in trainable_layers) and not any( + forbidden_module in name for forbidden_module in forbidden_modules + ): + if cast_trainable_params_to_fp32: + param.data = param.data.to(torch.float32) + else: + param.requires_grad_(False) + + logger.info("Set trainable layers: {}".format(",".join(trainable_layers))) + + +def _setup_lora_tuning( + config: "PretrainedConfig", + model: "PreTrainedModel", + model_args: "ModelArguments", + finetuning_args: "FinetuningArguments", + is_trainable: bool, + cast_trainable_params_to_fp32: bool, +) -> "PeftModel": + logger.info("Fine-tuning method: {}".format("DoRA" if finetuning_args.use_dora else "LoRA")) + adapter_to_resume = None + + if model_args.adapter_name_or_path is not None: + is_mergeable = True + if getattr(model, "quantization_method", None): # merge lora in quantized model is unstable + assert len(model_args.adapter_name_or_path) == 1, "Quantized model only accepts a single adapter." + is_mergeable = False + + if is_deepspeed_zero3_enabled(): + assert len(model_args.adapter_name_or_path) == 1, "Cannot use multiple adapters in DeepSpeed ZeRO-3." + is_mergeable = False + + if model_args.use_unsloth: + assert len(model_args.adapter_name_or_path) == 1, "Unsloth model only accepts a single adapter." + is_mergeable = False + + if (is_trainable and not finetuning_args.create_new_adapter) or (not is_mergeable): + adapter_to_merge = model_args.adapter_name_or_path[:-1] + adapter_to_resume = model_args.adapter_name_or_path[-1] + else: + adapter_to_merge = model_args.adapter_name_or_path + + for adapter in adapter_to_merge: + model: "LoraModel" = PeftModel.from_pretrained(model, adapter, offload_folder=model_args.offload_folder) + model = model.merge_and_unload() + + if len(adapter_to_merge) > 0: + logger.info("Merged {} adapter(s).".format(len(adapter_to_merge))) + + if adapter_to_resume is not None: # resume lora training + if model_args.use_unsloth: + model = load_unsloth_peft_model(config, model_args, is_trainable=is_trainable) + else: + model = PeftModel.from_pretrained( + model, + adapter_to_resume, + is_trainable=is_trainable, + offload_folder=model_args.offload_folder, + ) + + if is_trainable and adapter_to_resume is None: # create new lora weights while training + if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all": + target_modules = find_all_linear_modules(model, finetuning_args.freeze_vision_tower) + else: + target_modules = finetuning_args.lora_target + + if finetuning_args.use_llama_pro: + target_modules = find_expanded_modules(model, target_modules, finetuning_args.freeze_trainable_layers) + + if model_args.visual_inputs and finetuning_args.freeze_vision_tower: + target_modules = "^(?!.*vision_tower).*(?:{}).*".format("|".join(target_modules)) + + if ( + finetuning_args.use_dora + and getattr(model, "quantization_method", None) is not None + and getattr(model, "quantization_method", None) != QuantizationMethod.BITS_AND_BYTES + ): + raise ValueError("DoRA is not compatible with PTQ-quantized models.") + + if model_args.resize_vocab and finetuning_args.additional_target is None: + input_embeddings = model.get_input_embeddings() + output_embeddings = model.get_output_embeddings() + module_names = set() + for name, module in model.named_modules(): + if module in [input_embeddings, output_embeddings]: + module_names.add(name.split(".")[-1]) + + finetuning_args.additional_target = module_names + logger.warning("Vocab has been resized, add {} to trainable params.".format(",".join(module_names))) + + peft_kwargs = { + "r": finetuning_args.lora_rank, + "target_modules": target_modules, + "lora_alpha": finetuning_args.lora_alpha, + "lora_dropout": finetuning_args.lora_dropout, + "use_rslora": finetuning_args.use_rslora, + "modules_to_save": finetuning_args.additional_target, + } + + if model_args.use_unsloth: + model = get_unsloth_peft_model(model, model_args, peft_kwargs) + else: + lora_config = LoraConfig( + task_type=TaskType.CAUSAL_LM, + inference_mode=False, + use_dora=finetuning_args.use_dora, + **peft_kwargs, + ) + model = get_peft_model(model, lora_config) + + if is_trainable and cast_trainable_params_to_fp32: + for param in filter(lambda p: p.requires_grad, model.parameters()): + param.data = param.data.to(torch.float32) + + if model_args.adapter_name_or_path is not None: + logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path))) + + return model + + def init_adapter( config: "PretrainedConfig", model: "PreTrainedModel", @@ -35,7 +247,6 @@ def init_adapter( Note that the trainable parameters must be cast to float32. """ - if (not is_trainable) and model_args.adapter_name_or_path is None: logger.info("Adapter is not found at evaluation, load the base model.") return model @@ -51,199 +262,14 @@ def init_adapter( cast_trainable_params_to_fp32 = True if is_trainable and finetuning_args.finetuning_type == "full": - logger.info("Fine-tuning method: Full") - - forbidden_modules = set() - if model_args.visual_inputs and finetuning_args.freeze_vision_tower: - forbidden_modules.add("vision_tower") - - if model_args.visual_inputs and finetuning_args.train_mm_proj_only: - forbidden_modules.add("language_model") - - for name, param in model.named_parameters(): - if not any(forbidden_module in name for forbidden_module in forbidden_modules): - if cast_trainable_params_to_fp32: - param.data = param.data.to(torch.float32) - else: - param.requires_grad_(False) + _setup_full_tuning(model, model_args, finetuning_args, cast_trainable_params_to_fp32) if is_trainable and finetuning_args.finetuning_type == "freeze": - logger.info("Fine-tuning method: Freeze") - - if model_args.visual_inputs: - config = model.config.text_config - else: - config = model.config - - num_layers = ( - getattr(config, "num_hidden_layers", None) - or getattr(config, "num_layers", None) - or getattr(config, "n_layer", None) - ) - if not num_layers: - raise ValueError("Current model does not support freeze tuning.") - - if finetuning_args.use_llama_pro: - if num_layers % finetuning_args.freeze_trainable_layers != 0: - raise ValueError( - "`num_layers` {} should be divisible by `num_layer_trainable` {}.".format( - num_layers, finetuning_args.freeze_trainable_layers - ) - ) - - stride = num_layers // finetuning_args.freeze_trainable_layers - trainable_layer_ids = range(stride - 1, num_layers + stride - 1, stride) - elif finetuning_args.freeze_trainable_layers > 0: # fine-tuning the last n layers if num_layer_trainable > 0 - trainable_layer_ids = range(max(0, num_layers - finetuning_args.freeze_trainable_layers), num_layers) - else: # fine-tuning the first n layers if num_layer_trainable < 0 - trainable_layer_ids = range(min(-finetuning_args.freeze_trainable_layers, num_layers)) - - hidden_modules = set() - non_hidden_modules = set() - for name, _ in model.named_parameters(): - if ".0." in name: - hidden_modules.add(name.split(".0.")[-1].split(".")[0]) - elif ".1." in name: # MoD starts from layer 1 - hidden_modules.add(name.split(".1.")[-1].split(".")[0]) - - if re.search(r"\.\d+\.", name) is None: - non_hidden_modules.add(name.split(".")[-2]) - - trainable_layers = [] - for module_name in finetuning_args.freeze_trainable_modules: - if module_name != "all" and module_name not in hidden_modules: - raise ValueError( - "Module {} is not found, please choose from {}".format(module_name, ", ".join(hidden_modules)) - ) - - for idx in trainable_layer_ids: - trainable_layers.append(".{:d}.{}".format(idx, module_name if module_name != "all" else "")) - - if finetuning_args.freeze_extra_modules: - for module_name in finetuning_args.freeze_extra_modules: - if module_name not in non_hidden_modules: - raise ValueError( - "Module {} is not found, please choose from {}".format( - module_name, ", ".join(non_hidden_modules) - ) - ) - - trainable_layers.append(module_name) - - forbidden_modules = set() - if model_args.visual_inputs and finetuning_args.freeze_vision_tower: - forbidden_modules.add("vision_tower") - - for name, param in model.named_parameters(): - if any(trainable_layer in name for trainable_layer in trainable_layers) and not any( - forbidden_module in name for forbidden_module in forbidden_modules - ): - if cast_trainable_params_to_fp32: - param.data = param.data.to(torch.float32) - else: - param.requires_grad_(False) - - logger.info("Set trainable layers: {}".format(",".join(map(str, trainable_layer_ids)))) + _setup_freeze_tuning(model, model_args, finetuning_args, cast_trainable_params_to_fp32) if finetuning_args.finetuning_type == "lora": - logger.info("Fine-tuning method: {}".format("DoRA" if finetuning_args.use_dora else "LoRA")) - adapter_to_resume = None - - if model_args.adapter_name_or_path is not None: - is_mergeable = True - if getattr(model, "quantization_method", None): # merge lora in quantized model is unstable - assert len(model_args.adapter_name_or_path) == 1, "Quantized model only accepts a single adapter." - is_mergeable = False - - if is_deepspeed_zero3_enabled(): - assert len(model_args.adapter_name_or_path) == 1, "Cannot use multiple adapters in DeepSpeed ZeRO-3." - is_mergeable = False - - if model_args.use_unsloth: - assert len(model_args.adapter_name_or_path) == 1, "Unsloth model only accepts a single adapter." - is_mergeable = False - - if (is_trainable and not finetuning_args.create_new_adapter) or (not is_mergeable): - adapter_to_merge = model_args.adapter_name_or_path[:-1] - adapter_to_resume = model_args.adapter_name_or_path[-1] - else: - adapter_to_merge = model_args.adapter_name_or_path - - for adapter in adapter_to_merge: - model: "LoraModel" = PeftModel.from_pretrained( - model, adapter, offload_folder=model_args.offload_folder - ) - model = model.merge_and_unload() - - if len(adapter_to_merge) > 0: - logger.info("Merged {} adapter(s).".format(len(adapter_to_merge))) - - if adapter_to_resume is not None: # resume lora training - if model_args.use_unsloth: - model = load_unsloth_peft_model(config, model_args, is_trainable=is_trainable) - else: - model = PeftModel.from_pretrained( - model, - adapter_to_resume, - is_trainable=is_trainable, - offload_folder=model_args.offload_folder, - ) - - if is_trainable and adapter_to_resume is None: # create new lora weights while training - if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all": - target_modules = find_all_linear_modules(model, finetuning_args.freeze_vision_tower) - else: - target_modules = finetuning_args.lora_target - - if finetuning_args.use_llama_pro: - target_modules = find_expanded_modules(model, target_modules, finetuning_args.freeze_trainable_layers) - - if model_args.visual_inputs and finetuning_args.freeze_vision_tower: - target_modules = "^(?!.*vision_tower).*(?:{}).*".format("|".join(target_modules)) - - if ( - finetuning_args.use_dora - and getattr(model, "quantization_method", None) is not None - and getattr(model, "quantization_method", None) != QuantizationMethod.BITS_AND_BYTES - ): - raise ValueError("DoRA is not compatible with PTQ-quantized models.") - - if model_args.resize_vocab and finetuning_args.additional_target is None: - input_embeddings = model.get_input_embeddings() - output_embeddings = model.get_output_embeddings() - module_names = set() - for name, module in model.named_modules(): - if module in [input_embeddings, output_embeddings]: - module_names.add(name.split(".")[-1]) - - finetuning_args.additional_target = module_names - logger.warning("Vocab has been resized, add {} to trainable params.".format(",".join(module_names))) - - peft_kwargs = { - "r": finetuning_args.lora_rank, - "target_modules": target_modules, - "lora_alpha": finetuning_args.lora_alpha, - "lora_dropout": finetuning_args.lora_dropout, - "use_rslora": finetuning_args.use_rslora, - "modules_to_save": finetuning_args.additional_target, - } - - if model_args.use_unsloth: - model = get_unsloth_peft_model(model, model_args, peft_kwargs) - else: - lora_config = LoraConfig( - task_type=TaskType.CAUSAL_LM, - inference_mode=False, - use_dora=finetuning_args.use_dora, - **peft_kwargs, - ) - model = get_peft_model(model, lora_config) - - if is_trainable and cast_trainable_params_to_fp32: - for param in filter(lambda p: p.requires_grad, model.parameters()): - param.data = param.data.to(torch.float32) - - if model_args.adapter_name_or_path is not None: - logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path))) + model = _setup_lora_tuning( + config, model, model_args, finetuning_args, is_trainable, cast_trainable_params_to_fp32 + ) return model From 64cf35cccc3a6437e83b7dfc58678a02e73a6a3d Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 00:58:10 +0800 Subject: [PATCH 04/15] Delete .readthedocs.yaml Former-commit-id: 4b55f35662c9f424f07196ada7f94746f78f95e7 --- .readthedocs.yaml | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index 3a9eaea1..00000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -version: 2 - -build: - os: ubuntu-22.04 - tools: - python: "3.8" - -sphinx: - configuration: docs/source/conf.py - -formats: - - pdf - -python: - install: - - requirements: docs/requirements-docs.txt From 4f3e680b5770bd0b0a2b29dccbf5146202d10153 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 01:35:58 +0800 Subject: [PATCH 05/15] init unittest Former-commit-id: 1c7f0ab51906b20190f8d4db932623cff76efc01 --- README.md | 1 - README_zh.md | 1 - docker-compose.yml | 2 -- {tests => scripts}/test_toolcall.py | 2 +- tests/model/test_attn.py | 35 +++++++++++++++++++++++++++++ tests/test_throughput.py | 30 ------------------------- 6 files changed, 36 insertions(+), 35 deletions(-) rename {tests => scripts}/test_toolcall.py (97%) create mode 100644 tests/model/test_attn.py delete mode 100644 tests/test_throughput.py diff --git a/README.md b/README.md index 77d9c709..44897420 100644 --- a/README.md +++ b/README.md @@ -430,7 +430,6 @@ docker run --gpus=all \ -v ./hf_cache:/root/.cache/huggingface/ \ -v ./data:/app/data \ -v ./output:/app/output \ - -e CUDA_VISIBLE_DEVICES=0 \ -p 7860:7860 \ --shm-size 16G \ --name llama_factory \ diff --git a/README_zh.md b/README_zh.md index 9a52a963..8321d202 100644 --- a/README_zh.md +++ b/README_zh.md @@ -428,7 +428,6 @@ docker run --gpus=all \ -v ./hf_cache:/root/.cache/huggingface/ \ -v ./data:/app/data \ -v ./output:/app/output \ - -e CUDA_VISIBLE_DEVICES=0 \ -p 7860:7860 \ --shm-size 16G \ --name llama_factory \ diff --git a/docker-compose.yml b/docker-compose.yml index 333dc51e..9602a3e3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,8 +10,6 @@ services: - ./hf_cache:/root/.cache/huggingface/ - ./data:/app/data - ./output:/app/output - environment: - - CUDA_VISIBLE_DEVICES=0 ports: - "7860:7860" ipc: host diff --git a/tests/test_toolcall.py b/scripts/test_toolcall.py similarity index 97% rename from tests/test_toolcall.py rename to scripts/test_toolcall.py index d36e7fec..7e460017 100644 --- a/tests/test_toolcall.py +++ b/scripts/test_toolcall.py @@ -20,7 +20,7 @@ def calculate_gpa(grades: Sequence[str], hours: Sequence[int]) -> float: def main(): client = OpenAI( - api_key="0", + api_key="{}".format(os.environ.get("API_KEY", "0")), base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)), ) tools = [ diff --git a/tests/model/test_attn.py b/tests/model/test_attn.py new file mode 100644 index 00000000..12d920ef --- /dev/null +++ b/tests/model/test_attn.py @@ -0,0 +1,35 @@ +import os + +from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available + +from llamafactory.hparams import get_infer_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") + + +def test_attention(): + attention_available = ["off"] + if is_torch_sdpa_available(): + attention_available.append("sdpa") + + if is_flash_attn_2_available(): + attention_available.append("fa2") + + llama_attention_classes = { + "off": "LlamaAttention", + "sdpa": "LlamaSdpaAttention", + "fa2": "LlamaFlashAttention2", + } + for requested_attention in attention_available: + model_args, _, finetuning_args, _ = get_infer_args({ + "model_name_or_path": TINY_LLAMA, + "template": "llama2", + "flash_attn": requested_attention, + }) + tokenizer = load_tokenizer(model_args) + model = load_model(tokenizer["tokenizer"], model_args, finetuning_args) + for module in model.modules(): + if "Attention" in module.__class__.__name__: + assert module.__class__.__name__ == llama_attention_classes[requested_attention] diff --git a/tests/test_throughput.py b/tests/test_throughput.py deleted file mode 100644 index e8048910..00000000 --- a/tests/test_throughput.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -import time - -from openai import OpenAI -from transformers.utils.versions import require_version - - -require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0") - - -def main(): - client = OpenAI( - api_key="0", - base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)), - ) - messages = [{"role": "user", "content": "Write a long essay about environment protection as long as possible."}] - num_tokens = 0 - start_time = time.time() - for _ in range(8): - result = client.chat.completions.create(messages=messages, model="test") - num_tokens += result.usage.completion_tokens - - elapsed_time = time.time() - start_time - print("Throughput: {:.2f} tokens/s".format(num_tokens / elapsed_time)) - # --infer_backend hf: 27.22 tokens/s (1.0x) - # --infer_backend vllm: 73.03 tokens/s (2.7x) - - -if __name__ == "__main__": - main() From 6a5e3816cf36c9ea24b24318734da9edf733612a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 01:48:30 +0800 Subject: [PATCH 06/15] add ci Former-commit-id: aa2578bea072354656dbdb62b970e16a8b86f16b --- .github/workflows/tests.yml | 46 ++++++++++++++++++++++++++++++------- Makefile | 5 +++- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f891f711..a8246986 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,28 +2,58 @@ name: tests on: push: - branches: [ "main" ] + branches: + - main + paths: + - "**.py" + - "requirements.txt" + - ".github/workflows/*.yml" pull_request: - branches: [ "main" ] + branches: + - main + paths: + - "**.py" + - "requirements.txt" + - ".github/workflows/*.yml" jobs: check_code_quality: - runs-on: ubuntu-latest - steps: - uses: actions/checkout@v4 - - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.8" - + cache: "pip" + cache-dependency-path: "setup.py" - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install ruff - + python -m pip install .[torch,metrics,quality] - name: Check quality run: | make style && make quality + + pytest: + needs: check_code_quality + strategy: + matrix: + python-version: ["3.8", "3.9"] + os: ["ubuntu-latest", "windows-latest"] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + cache-dependency-path: "setup.py" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install .[torch,metrics,quality] + - name: Test with pytest + run: | + make test diff --git a/Makefile b/Makefile index 3a4a12c9..65be047b 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: quality style +.PHONY: quality style test check_dirs := scripts src tests @@ -9,3 +9,6 @@ quality: style: ruff check $(check_dirs) --fix ruff format $(check_dirs) + +test: + pytest tests/ From d3eb985bb62382bac38dad6d6d01ac1ea4e57490 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 01:57:36 +0800 Subject: [PATCH 07/15] fix ci Former-commit-id: 7f20e4722ae6ac907b36a3219dcd09d2ff5d071a --- .github/workflows/tests.yml | 6 +++--- setup.py | 2 +- tests/model/{test_attn.py => test_attention.py} | 14 ++++++++------ 3 files changed, 12 insertions(+), 10 deletions(-) rename tests/model/{test_attn.py => test_attention.py} (73%) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a8246986..a66b579b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,10 +30,10 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install .[torch,metrics,quality] + python -m pip install .[torch,dev] - name: Check quality run: | - make style && make quality + make style && make quality pytest: needs: check_code_quality @@ -53,7 +53,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install .[torch,metrics,quality] + python -m pip install .[torch,dev] - name: Test with pytest run: | make test diff --git a/setup.py b/setup.py index c32be8af..405ac46e 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ extra_require = { "aqlm": ["aqlm[gpu]>=1.1.0"], "qwen": ["transformers_stream_generator"], "modelscope": ["modelscope"], - "quality": ["ruff"], + "dev": ["ruff", "pytest"], } diff --git a/tests/model/test_attn.py b/tests/model/test_attention.py similarity index 73% rename from tests/model/test_attn.py rename to tests/model/test_attention.py index 12d920ef..6dd46050 100644 --- a/tests/model/test_attn.py +++ b/tests/model/test_attention.py @@ -23,13 +23,15 @@ def test_attention(): "fa2": "LlamaFlashAttention2", } for requested_attention in attention_available: - model_args, _, finetuning_args, _ = get_infer_args({ - "model_name_or_path": TINY_LLAMA, - "template": "llama2", - "flash_attn": requested_attention, - }) + model_args, _, finetuning_args, _ = get_infer_args( + { + "model_name_or_path": TINY_LLAMA, + "template": "llama2", + "flash_attn": requested_attention, + } + ) tokenizer = load_tokenizer(model_args) model = load_model(tokenizer["tokenizer"], model_args, finetuning_args) for module in model.modules(): if "Attention" in module.__class__.__name__: - assert module.__class__.__name__ == llama_attention_classes[requested_attention] + assert module.__class__.__name__ == llama_attention_classes[requested_attention] From de9e773764273db6d25ce2b6913e763e637e707a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 02:00:44 +0800 Subject: [PATCH 08/15] fix ci Former-commit-id: 42d9b26fc81d66cb0485a2ae148c3e719df59b7d --- .github/workflows/tests.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a66b579b..818d58fc 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -39,8 +39,10 @@ jobs: needs: check_code_quality strategy: matrix: - python-version: ["3.8", "3.9"] - os: ["ubuntu-latest", "windows-latest"] + python-version: + - "3.8" + os: + - "ubuntu-latest" runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 From 3547a26f86087a35fc005ceffba195d4106500d5 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 02:42:34 +0800 Subject: [PATCH 09/15] add ultrafeedback and fineweb #4085 #4132 Former-commit-id: 12d79f89c5082eb29842b501e1cb88433a248ba3 --- .github/workflows/tests.yml | 24 +----------------------- README.md | 3 +++ README_zh.md | 3 +++ data/dataset_info.json | 31 +++++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 23 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 818d58fc..32edf6a8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -17,7 +17,7 @@ on: - ".github/workflows/*.yml" jobs: - check_code_quality: + tests: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -34,28 +34,6 @@ jobs: - name: Check quality run: | make style && make quality - - pytest: - needs: check_code_quality - strategy: - matrix: - python-version: - - "3.8" - os: - - "ubuntu-latest" - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: "pip" - cache-dependency-path: "setup.py" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install .[torch,dev] - name: Test with pytest run: | make test diff --git a/README.md b/README.md index 44897420..fb6c5782 100644 --- a/README.md +++ b/README.md @@ -214,6 +214,8 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered) - [Pile (en)](https://huggingface.co/datasets/EleutherAI/pile) - [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B) +- [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb) +- [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu) - [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack) - [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata) @@ -273,6 +275,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t
Preference datasets - [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k) +- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized) - [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs) - [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf) - [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar) diff --git a/README_zh.md b/README_zh.md index 8321d202..142254df 100644 --- a/README_zh.md +++ b/README_zh.md @@ -214,6 +214,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered) - [Pile (en)](https://huggingface.co/datasets/EleutherAI/pile) - [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B) +- [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb) +- [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu) - [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack) - [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata) @@ -273,6 +275,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
偏好数据集 - [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k) +- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized) - [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs) - [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf) - [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar) diff --git a/data/dataset_info.json b/data/dataset_info.json index 2d9b0c83..8c5cbb45 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -391,6 +391,16 @@ "rejected": "rejected" } }, + "ultrafeedback": { + "hf_hub_url": "llamafactory/ultrafeedback_binarized", + "ms_hub_url": "llamafactory/ultrafeedback_binarized", + "ranking": true, + "columns": { + "prompt": "instruction", + "chosen": "chosen", + "rejected": "rejected" + } + }, "orca_pairs": { "hf_hub_url": "Intel/orca_dpo_pairs", "ranking": true, @@ -448,6 +458,15 @@ "assistant_tag": "assistant" } }, + "ultrafeedback_kto": { + "hf_hub_url": "argilla/ultrafeedback-binarized-preferences-cleaned-kto", + "ms_hub_url": "AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto", + "columns": { + "prompt": "prompt", + "response": "completion", + "kto_tag": "label" + } + }, "wiki_demo": { "file_name": "wiki_demo.txt", "columns": { @@ -501,6 +520,18 @@ "prompt": "text" } }, + "fileweb": { + "hf_hub_url": "HuggingFaceFW/fineweb", + "columns": { + "prompt": "text" + } + }, + "fileweb_edu": { + "hf_hub_url": "HuggingFaceFW/fineweb-edu", + "columns": { + "prompt": "text" + } + }, "the_stack": { "hf_hub_url": "bigcode/the-stack", "ms_hub_url": "AI-ModelScope/the-stack", From ce40d1269269bc3e698da8714b54f038b330ae70 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 05:20:54 +0800 Subject: [PATCH 10/15] release v0.8.0 Former-commit-id: 5aa4ce47567146cd97c61623018153b41d7c1278 --- src/llamafactory/data/template.py | 11 +--- src/llamafactory/extras/env.py | 2 +- tests/data/test_supervised.py | 44 +++++++++++++ .../model/{ => model_utils}/test_attention.py | 4 +- tests/model/test_freeze.py | 61 +++++++++++++++++++ tests/model/test_full.py | 33 ++++++++++ 6 files changed, 142 insertions(+), 13 deletions(-) create mode 100644 tests/data/test_supervised.py rename tests/model/{ => model_utils}/test_attention.py (88%) create mode 100644 tests/model/test_freeze.py create mode 100644 tests/model/test_full.py diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 3dce5ec6..b600c567 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -700,17 +700,8 @@ _register_template( _register_template( name="llama2", format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]), + format_assistant=StringFormatter(slots=[" {{content}} ", {"eos_token"}]), format_system=StringFormatter(slots=["<>\n{{content}}\n<>\n\n"]), - default_system=( - "You are a helpful, respectful and honest assistant. " - "Always answer as helpfully as possible, while being safe. " - "Your answers should not include any harmful, unethical, " - "racist, sexist, toxic, dangerous, or illegal content. " - "Please ensure that your responses are socially unbiased and positive in nature.\n\n" - "If a question does not make any sense, or is not factually coherent, " - "explain why instead of answering something not correct. " - "If you don't know the answer to a question, please don't share false information." - ), ) diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index fdccf86b..cd81442d 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -12,7 +12,7 @@ from transformers.utils import is_bitsandbytes_available, is_torch_cuda_availabl from .packages import is_vllm_available -VERSION = "0.7.2.dev0" +VERSION = "0.8.0" def print_env() -> None: diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py new file mode 100644 index 00000000..bb7f71df --- /dev/null +++ b/tests/data/test_supervised.py @@ -0,0 +1,44 @@ +import os + +import pytest +from datasets import load_dataset + +from llamafactory.data import get_dataset +from llamafactory.hparams import get_train_args +from llamafactory.model import load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") + +TRAINING_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "full", + "dataset": "llamafactory/tiny_dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + + +@pytest.mark.parametrize("test_num", [5]) +def test_supervised(test_num: int): + model_args, data_args, training_args, _, _ = get_train_args(TRAINING_ARGS) + tokenizer_module = load_tokenizer(model_args) + tokenizer = tokenizer_module["tokenizer"] + tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module) + + original_data = load_dataset(TRAINING_ARGS["dataset"], split="train") + for test_idx in range(test_num): + decode_result = tokenizer.decode(tokenized_data["input_ids"][test_idx]) + messages = [ + {"role": "user", "content": original_data[test_idx]["instruction"]}, + {"role": "assistant", "content": original_data[test_idx]["output"]}, + ] + templated_result = tokenizer.apply_chat_template(messages, tokenize=False) + assert decode_result == templated_result diff --git a/tests/model/test_attention.py b/tests/model/model_utils/test_attention.py similarity index 88% rename from tests/model/test_attention.py rename to tests/model/model_utils/test_attention.py index 6dd46050..4d414289 100644 --- a/tests/model/test_attention.py +++ b/tests/model/model_utils/test_attention.py @@ -30,8 +30,8 @@ def test_attention(): "flash_attn": requested_attention, } ) - tokenizer = load_tokenizer(model_args) - model = load_model(tokenizer["tokenizer"], model_args, finetuning_args) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args) for module in model.modules(): if "Attention" in module.__class__.__name__: assert module.__class__.__name__ == llama_attention_classes[requested_attention] diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py new file mode 100644 index 00000000..c6cdec78 --- /dev/null +++ b/tests/model/test_freeze.py @@ -0,0 +1,61 @@ +import os + +import torch + +from llamafactory.hparams import get_train_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") + +TRAINING_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "freeze", + "dataset": "llamafactory/tiny_dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + + +def test_freeze_all_modules(): + model_args, _, _, finetuning_args, _ = get_train_args( + { + "freeze_trainable_layers": 1, + **TRAINING_ARGS, + } + ) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): + if name.startswith("model.layers.1."): + assert param.requires_grad is True + assert param.dtype == torch.float32 + else: + assert param.requires_grad is False + assert param.dtype == torch.float16 + + +def test_freeze_extra_modules(): + model_args, _, _, finetuning_args, _ = get_train_args( + { + "freeze_trainable_layers": 1, + "freeze_extra_modules": "embed_tokens,lm_head", + **TRAINING_ARGS, + } + ) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): + if name.startswith("model.layers.1.") or any(module in name for module in ["embed_tokens", "lm_head"]): + assert param.requires_grad is True + assert param.dtype == torch.float32 + else: + assert param.requires_grad is False + assert param.dtype == torch.float16 diff --git a/tests/model/test_full.py b/tests/model/test_full.py new file mode 100644 index 00000000..ef57a980 --- /dev/null +++ b/tests/model/test_full.py @@ -0,0 +1,33 @@ +import os + +import torch + +from llamafactory.hparams import get_train_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") + +TRAINING_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "full", + "dataset": "llamafactory/tiny_dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + + +def test_full(): + model_args, _, _, finetuning_args, _ = get_train_args(TRAINING_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for param in model.parameters(): + assert param.requires_grad is True + assert param.dtype == torch.float32 From de3400a5212e34764dd91bb9fe6f759012cdb7f5 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 06:46:09 +0800 Subject: [PATCH 11/15] set dev version Former-commit-id: 3ac11e77cccf686e0da499bd152997133b49a265 --- src/llamafactory/extras/env.py | 2 +- tests/model/test_lora.py | 72 ++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 tests/model/test_lora.py diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index cd81442d..2b9c6458 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -12,7 +12,7 @@ from transformers.utils import is_bitsandbytes_available, is_torch_cuda_availabl from .packages import is_vllm_available -VERSION = "0.8.0" +VERSION = "0.8.1.dev0" def print_env() -> None: diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py new file mode 100644 index 00000000..1f2c02ae --- /dev/null +++ b/tests/model/test_lora.py @@ -0,0 +1,72 @@ +import os + +import torch + +from llamafactory.hparams import get_train_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") + +TRAINING_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "lora", + "dataset": "llamafactory/tiny_dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + + +def test_lora_all_modules(): + model_args, _, _, finetuning_args, _ = get_train_args( + { + "lora_target": "all", + **TRAINING_ARGS, + } + ) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + linear_modules = set() + for name, param in model.named_parameters(): + if any(module in name for module in ["lora_A", "lora_B"]): + linear_modules.add(name.split(".lora_", maxsplit=1)[0].split(".")[-1]) + assert param.requires_grad is True + assert param.dtype == torch.float32 + else: + assert param.requires_grad is False + assert param.dtype == torch.float16 + + assert linear_modules == {"q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"} + + +def test_lora_extra_modules(): + model_args, _, _, finetuning_args, _ = get_train_args( + { + "lora_target": "all", + "additional_target": "embed_tokens,lm_head", + **TRAINING_ARGS, + } + ) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + extra_modules = set() + for name, param in model.named_parameters(): + if any(module in name for module in ["lora_A", "lora_B"]): + assert param.requires_grad is True + assert param.dtype == torch.float32 + elif "modules_to_save" in name: + extra_modules.add(name.split(".modules_to_save", maxsplit=1)[0].split(".")[-1]) + assert param.requires_grad is True + assert param.dtype == torch.float32 + else: + assert param.requires_grad is False + assert param.dtype == torch.float16 + + assert extra_modules == {"embed_tokens", "lm_head"} From 1a261add61b3269eb98b726f966495408bb1018e Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 07:15:45 +0800 Subject: [PATCH 12/15] fix llamafactory-cli env Former-commit-id: 972ec9c668de1a9b6d872187dbc0c1d94f6fec6b --- src/llamafactory/extras/env.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index 2b9c6458..1d4e43f1 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -6,10 +6,7 @@ import peft import torch import transformers import trl -from transformers.integrations import is_deepspeed_available -from transformers.utils import is_bitsandbytes_available, is_torch_cuda_available, is_torch_npu_available - -from .packages import is_vllm_available +from transformers.utils import is_torch_cuda_available, is_torch_npu_available VERSION = "0.8.1.dev0" @@ -37,19 +34,25 @@ def print_env() -> None: info["NPU type"] = torch.npu.get_device_name() info["CANN version"] = torch.version.cann - if is_deepspeed_available(): + try: import deepspeed # type: ignore info["DeepSpeed version"] = deepspeed.__version__ + except Exception: + pass - if is_bitsandbytes_available(): + try: import bitsandbytes info["Bitsandbytes version"] = bitsandbytes.__version__ + except Exception: + pass - if is_vllm_available(): + try: import vllm info["vLLM version"] = vllm.__version__ + except Exception: + pass print("\n" + "\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n") From 1c31809652f9e970c681ba846f41b266035253f2 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 21:11:32 +0800 Subject: [PATCH 13/15] update git workflows Former-commit-id: 4c4f950f3936edbce84796c9a5e11b61ea07864e --- .github/workflows/label_issue.yml | 17 +++++++++++++++++ .github/workflows/tests.yml | 17 ++++++++--------- 2 files changed, 25 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/label_issue.yml diff --git a/.github/workflows/label_issue.yml b/.github/workflows/label_issue.yml new file mode 100644 index 00000000..b9a5543c --- /dev/null +++ b/.github/workflows/label_issue.yml @@ -0,0 +1,17 @@ +name: label_issue + +on: + issues: + types: + - opened + +jobs: + label_issue: + runs-on: ubuntu-latest + + steps: + - env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE_URL: ${{ github.event.issue.html_url }} + run: | + gh issue edit $ISSUE_URL --add-label "pending" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 32edf6a8..6ddcbc05 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -3,14 +3,7 @@ name: tests on: push: branches: - - main - paths: - - "**.py" - - "requirements.txt" - - ".github/workflows/*.yml" - pull_request: - branches: - - main + - $default-branch paths: - "**.py" - "requirements.txt" @@ -19,21 +12,27 @@ on: jobs: tests: runs-on: ubuntu-latest + steps: - - uses: actions/checkout@v4 + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.8" cache: "pip" cache-dependency-path: "setup.py" + - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install .[torch,dev] + - name: Check quality run: | make style && make quality + - name: Test with pytest run: | make test From 0c44309e155f95f97853bdbf4d11e6254f434558 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 21:15:36 +0800 Subject: [PATCH 14/15] Update tests.yml Former-commit-id: 25c635ef2849fd173617b6a8d8d3f5d4f800a893 --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6ddcbc05..f3ac96db 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -3,7 +3,7 @@ name: tests on: push: branches: - - $default-branch + - main paths: - "**.py" - "requirements.txt" From e4b9c8010449357055da24cc33e8c2b2386a2889 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 21:25:35 +0800 Subject: [PATCH 15/15] add pr ci Former-commit-id: b61d25cd705db821664f87adf37d2ed56c68130f --- .github/workflows/tests.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f3ac96db..96092662 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,6 +8,15 @@ on: - "**.py" - "requirements.txt" - ".github/workflows/*.yml" + pull_request: + types: + - review_requested + branches: + - main + paths: + - "**.py" + - "requirements.txt" + - ".github/workflows/*.yml" jobs: tests: