From 5e1cb05b95b8a5801edc1aa8e0894bd6bceb6435 Mon Sep 17 00:00:00 2001 From: zhangzc <2608882093@qq.com> Date: Wed, 27 Mar 2024 14:22:50 +0800 Subject: [PATCH 001/162] Supports custom data set sampling quantity Former-commit-id: fa8325401df27595de4611a89dfcc14644956abd --- data/README.md | 5 +++-- data/README_zh.md | 3 ++- src/llmtuner/data/loader.py | 13 +++++++++++++ src/llmtuner/data/parser.py | 4 +++- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/data/README.md b/data/README.md index fa2c9ee0..c4a1b298 100644 --- a/data/README.md +++ b/data/README.md @@ -27,8 +27,9 @@ If you are using a custom dataset, please provide your dataset definition in the "assistant_tag": "the value of the role_tag represents the assistant. (default: gpt)", "observation_tag": "the value of the role_tag represents the tool results. (default: observation)", "function_tag": "the value of the role_tag represents the function call. (default: function_call)", - "system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)" - } + "system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)", + }, + "sample_num": "the number of samples from this dataset can be greater than the total amount of the dataset. (default: None)" } ``` diff --git a/data/README_zh.md b/data/README_zh.md index e0004f4a..6396688a 100644 --- a/data/README_zh.md +++ b/data/README_zh.md @@ -28,7 +28,8 @@ "observation_tag": "消息中代表工具返回结果的 role_tag(默认:observation)", "function_tag": "消息中代表工具调用的 role_tag(默认:function_call)", "system_tag": "消息中代表系统提示的 role_tag(默认:system,会覆盖 system 列)" - } + }, + "sample_num": "从该数据集采样的数量,可大于该数据集总量(默认:None)" } ``` diff --git a/src/llmtuner/data/loader.py b/src/llmtuner/data/loader.py index 935695ad..bebe5718 100644 --- a/src/llmtuner/data/loader.py +++ b/src/llmtuner/data/loader.py @@ -1,5 +1,7 @@ import inspect import os +import numpy as np +from numpy.random import RandomState from typing import TYPE_CHECKING, Literal, Union from datasets import load_dataset, load_from_disk @@ -108,6 +110,17 @@ def load_single_dataset( num_samples = min(data_args.max_samples, len(dataset)) dataset = dataset.select(range(num_samples)) + if dataset_attr.sample_num: + dataset_sample_num = dataset_attr.sample_num + logger.info(f"从 {dataset_attr.dataset_name} 采样 {dataset_sample_num} 条训练样本") + random_state = RandomState(42) + idx = random_state.permutation(len(dataset))[:dataset_sample_num] + dataset_sample_num -= len(idx) + if dataset_sample_num > 0: + idx2 = random_state.choice(len(dataset), dataset_sample_num) + idx = np.concatenate([idx, idx2], axis=0) + dataset = dataset.select(idx) + return align_dataset(dataset, dataset_attr, data_args) diff --git a/src/llmtuner/data/parser.py b/src/llmtuner/data/parser.py index 861396a0..9746b5b2 100644 --- a/src/llmtuner/data/parser.py +++ b/src/llmtuner/data/parser.py @@ -44,6 +44,7 @@ class DatasetAttr: observation_tag: Optional[str] = "observation" function_tag: Optional[str] = "function_call" system_tag: Optional[str] = "system" + sample_num: Optional[int] = None def __repr__(self) -> str: return self.dataset_name @@ -90,7 +91,8 @@ def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]: dataset_attr.set_attr("folder", dataset_info[name]) dataset_attr.set_attr("ranking", dataset_info[name], default=False) dataset_attr.set_attr("formatting", dataset_info[name], default="alpaca") - + dataset_attr.set_attr("sample_num", dataset_info[name]) + if "columns" in dataset_info[name]: column_names = ["system"] if dataset_attr.formatting == "alpaca": From aaadaa18f6d276db026037ce4718b8745420d380 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Tue, 21 May 2024 08:57:14 +0800 Subject: [PATCH 002/162] support pretraining of llava Former-commit-id: 6a4c8cf0a6a1674c693b9337f018ff8df7477f8f --- data/dataset_info.json | 14 ++++ data/mllm_pt_demo.json | 92 ++++++++++++++++++++++++++ src/llamafactory/hparams/model_args.py | 4 ++ src/llamafactory/model/loader.py | 5 ++ 4 files changed, 115 insertions(+) create mode 100644 data/mllm_pt_demo.json diff --git a/data/dataset_info.json b/data/dataset_info.json index b985582e..5a90e077 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -38,6 +38,20 @@ "assistant_tag": "assistant" } }, + "mllm_pt_demo": { + "file_name": "mllm_pt_demo.json", + "formatting": "sharegpt", + "columns": { + "messages": "messages", + "images": "images" + }, + "tags": { + "role_tag": "role", + "content_tag": "content", + "user_tag": "user", + "assistant_tag": "assistant" + } + }, "alpaca_en": { "hf_hub_url": "llamafactory/alpaca_en", "ms_hub_url": "llamafactory/alpaca_en" diff --git a/data/mllm_pt_demo.json b/data/mllm_pt_demo.json new file mode 100644 index 00000000..2ee01ce6 --- /dev/null +++ b/data/mllm_pt_demo.json @@ -0,0 +1,92 @@ +[ + { + "messages": [ + { + "content": "Render a clear and concise summary of the photo.", + "role": "user" + }, + { + "content": "There are two soccer players on the field.", + "role": "assistant" + } + ], + "images": [ + "mllm_demo_data/1.jpg" + ] + }, + { + "messages": [ + { + "content": "Write a terse but informative summary of the picture.", + "role": "user" + }, + { + "content": "A soccer player is sliding on his knees to celebrate", + "role": "assistant" + } + ], + "images": [ + "mllm_demo_data/2.jpg" + ] + }, + { + "messages": [ + { + "content": "What is this?", + "role": "user" + }, + { + "content": "A man is giving a speech.", + "role": "assistant" + } + ], + "images": [ + "mllm_demo_data/3.jpg" + ] + }, + { + "messages": [ + { + "content": "对照片进行简明扼要的概括。", + "role": "user" + }, + { + "content": "两个足球运动员在场上", + "role": "assistant" + } + ], + "images": [ + "mllm_demo_data/1.jpg" + ] + }, + { + "messages": [ + { + "content": "为图片写一个简短但内容丰富的摘要。", + "role": "user" + }, + { + "content": "一个足球运动员在跪地滑行庆祝", + "role": "assistant" + } + ], + "images": [ + "mllm_demo_data/2.jpg" + ] + }, + { + "messages": [ + { + "content": "这是什么?", + "role": "user" + }, + { + "content": "一个男人在演讲", + "role": "assistant" + } + ], + "images": [ + "mllm_demo_data/3.jpg" + ] + } +] \ No newline at end of file diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 5885bb09..255051dc 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -85,6 +85,10 @@ class ModelArguments: default=False, metadata={"help": "Whethor or not to use multimodal LLM that accepts visual inputs."}, ) + tune_mm_proj: bool = field( + default=False, + metadata={"help": "Whethor or not only finetune mm_projector for MLLM."}, + ) moe_aux_loss_coef: Optional[float] = field( default=None, metadata={"help": "Coefficient of the auxiliary router loss in mixture-of-experts model."}, diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index 49b347d5..d9784593 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -163,6 +163,11 @@ def load_model( else: model.train() + if model_args.visual_inputs and model_args.tune_mm_proj: + lm_params = [param for name, param in model.named_parameters() if "language_model" in name] + for param in lm_params: + param.requires_grad_(False) + trainable_params, all_param = count_parameters(model) if is_trainable: param_stats = "trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format( From 765cd370daba83dcbdaec0f99b9a86e7baaa6708 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Mon, 27 May 2024 18:59:00 +0800 Subject: [PATCH 003/162] add regex of only tune lm and mm_proj Former-commit-id: 38d540b3e69bceabafafab524fcfc78aeb05612d --- sites/paligemma-pt.yaml | 49 ++++++++++++++++++++++++++ sites/paligemma.yaml | 49 ++++++++++++++++++++++++++ sites/paligemma_lora.yaml | 40 +++++++++++++++++++++ src/llamafactory/model/adapter.py | 7 ++++ src/llamafactory/model/loader.py | 5 --- src/llamafactory/model/utils/visual.py | 7 +++- 6 files changed, 151 insertions(+), 6 deletions(-) create mode 100644 sites/paligemma-pt.yaml create mode 100644 sites/paligemma.yaml create mode 100644 sites/paligemma_lora.yaml diff --git a/sites/paligemma-pt.yaml b/sites/paligemma-pt.yaml new file mode 100644 index 00000000..4305cf5f --- /dev/null +++ b/sites/paligemma-pt.yaml @@ -0,0 +1,49 @@ +# model +model_name_or_path: google/paligemma-3b-mix-448 +visual_inputs: true +tune_mm_proj: true +#print_param_status: true + +# method +stage: sft +do_train: true +finetuning_type: full + +# ddp +ddp_timeout: 180000000 +deepspeed: examples/deepspeed/ds_z2_offload_config.json + +# dataset +dataset: mllm_pt_demo +dataset_dir: data +template: gemma +cutoff_len: 2048 +max_samples: 3 +#val_size: 0.0001 +overwrite_cache: true +preprocessing_num_workers: 16 + +# output +output_dir: saves/paligemma/full/sft_llava_pt_test +logging_steps: 1 +save_steps: 50 +plot_loss: true +overwrite_output_dir: true +#save_strategy: epoch +#save_total_limit: 2 + +# train +per_device_train_batch_size: 1 +gradient_accumulation_steps: 16 +learning_rate: 0.00001 +num_train_epochs: 100 +lr_scheduler_type: cosine +warmup_steps: 0.1 +#bf16: true +pure_bf16: true + +# eval +do_eval: false +#per_device_eval_batch_size: 1 +#evaluation_strategy: steps +#eval_steps: 500 diff --git a/sites/paligemma.yaml b/sites/paligemma.yaml new file mode 100644 index 00000000..f3257cfc --- /dev/null +++ b/sites/paligemma.yaml @@ -0,0 +1,49 @@ +# model +model_name_or_path: google/paligemma-3b-mix-448 +visual_inputs: true +#print_param_status: true +use_fast_tokenizer: false + +# method +stage: sft +do_train: true +finetuning_type: full + +# ddp +ddp_timeout: 180000000 +deepspeed: examples/deepspeed/ds_z2_offload_config.json + +# dataset +dataset: mllm_demo +dataset_dir: data +template: gemma +cutoff_len: 2048 +max_samples: 3 +#val_size: 0.0001 +overwrite_cache: true +preprocessing_num_workers: 16 + +# output +output_dir: saves/paligemma/full/sft_llava_1k +logging_steps: 1 +save_steps: 50 +plot_loss: true +overwrite_output_dir: true +#save_strategy: epoch +#save_total_limit: 2 + +# train +per_device_train_batch_size: 1 +gradient_accumulation_steps: 16 +learning_rate: 0.00001 +num_train_epochs: 100 +lr_scheduler_type: cosine +warmup_steps: 0.1 +#bf16: true +pure_bf16: true + +# eval +do_eval: false +#per_device_eval_batch_size: 1 +#evaluation_strategy: steps +#eval_steps: 500 diff --git a/sites/paligemma_lora.yaml b/sites/paligemma_lora.yaml new file mode 100644 index 00000000..0693a6ae --- /dev/null +++ b/sites/paligemma_lora.yaml @@ -0,0 +1,40 @@ +### model +model_name_or_path: google/paligemma-3b-mix-448 +visual_inputs: true +use_fast_tokenizer: false + +### method +stage: sft +do_train: true +finetuning_type: lora +lora_target: q_proj,v_proj + +### dataset +dataset: mllm_demo +template: gemma +cutoff_len: 1024 +max_samples: 1000 +overwrite_cache: true +preprocessing_num_workers: 16 + +### output +output_dir: saves/paligemma/lora/sft_mllm +logging_steps: 10 +save_steps: 500 +plot_loss: true +overwrite_output_dir: true + +### train +per_device_train_batch_size: 1 +gradient_accumulation_steps: 8 +learning_rate: 0.0001 +num_train_epochs: 3.0 +lr_scheduler_type: cosine +warmup_steps: 0.1 +fp16: true + +### eval +val_size: 0.1 +per_device_eval_batch_size: 1 +evaluation_strategy: steps +eval_steps: 500 diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index f37f3bbb..015db8a0 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -10,6 +10,7 @@ from ..extras.logging import get_logger from .utils.misc import find_all_linear_modules, find_expanded_modules from .utils.quantization import QuantizationMethod from .utils.unsloth import get_unsloth_peft_model, load_unsloth_peft_model +from .utils.visual import filter_vision_tower_linear if TYPE_CHECKING: @@ -58,6 +59,9 @@ def init_adapter( if model_args.visual_inputs and hasattr(model, "vision_tower"): # freeze vision model model.vision_tower.requires_grad_(False) + if model_args.visual_inputs and hasattr(model, "language_model") and model_args.tune_mm_proj: # freeze language model if only tune mm_proj + model.language_model.requires_grad_(False) + if finetuning_args.finetuning_type == "freeze" and is_trainable: logger.info("Fine-tuning method: Freeze") num_layers = ( @@ -180,6 +184,9 @@ def init_adapter( if finetuning_args.use_llama_pro: target_modules = find_expanded_modules(model, target_modules, finetuning_args.num_layer_trainable) + if model_args.visual_inputs: + target_modules = filter_vision_tower_linear(target_modules) + if ( finetuning_args.use_dora and getattr(model, "quantization_method", None) is not None diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index d9784593..49b347d5 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -163,11 +163,6 @@ def load_model( else: model.train() - if model_args.visual_inputs and model_args.tune_mm_proj: - lm_params = [param for name, param in model.named_parameters() if "language_model" in name] - for param in lm_params: - param.requires_grad_(False) - trainable_params, all_param = count_parameters(model) if is_trainable: param_stats = "trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format( diff --git a/src/llamafactory/model/utils/visual.py b/src/llamafactory/model/utils/visual.py index c8260b7f..a91777ba 100644 --- a/src/llamafactory/model/utils/visual.py +++ b/src/llamafactory/model/utils/visual.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Tuple +from typing import TYPE_CHECKING, Tuple, List import torch import transformers.models @@ -82,3 +82,8 @@ def configure_visual_model(config: "PretrainedConfig") -> None: if getattr(config, "is_yi_vl_derived_model", None): logger.info("Detected Yi-VL model, applying projector patch.") transformers.models.llava.modeling_llava.LlavaMultiModalProjector = LlavaMultiModalProjectorForYiVL + + +def filter_vision_tower_linear(target_modules: List[str]) -> str: + target_modules = f"^(?!.*vision_tower).*(?:{'|'.join(target_modules)}).*" + return target_modules From f67e4f14ab22d0c5215e3fd76cad31a2bd3dda74 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Mon, 27 May 2024 19:00:15 +0800 Subject: [PATCH 004/162] add only tune lm and mm_proj Former-commit-id: ba12ca430ec527fbfe4cd1eace0adb5c7712146a --- sites/paligemma-pt.yaml | 49 --------------------------------------- sites/paligemma.yaml | 49 --------------------------------------- sites/paligemma_lora.yaml | 40 -------------------------------- 3 files changed, 138 deletions(-) delete mode 100644 sites/paligemma-pt.yaml delete mode 100644 sites/paligemma.yaml delete mode 100644 sites/paligemma_lora.yaml diff --git a/sites/paligemma-pt.yaml b/sites/paligemma-pt.yaml deleted file mode 100644 index 4305cf5f..00000000 --- a/sites/paligemma-pt.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# model -model_name_or_path: google/paligemma-3b-mix-448 -visual_inputs: true -tune_mm_proj: true -#print_param_status: true - -# method -stage: sft -do_train: true -finetuning_type: full - -# ddp -ddp_timeout: 180000000 -deepspeed: examples/deepspeed/ds_z2_offload_config.json - -# dataset -dataset: mllm_pt_demo -dataset_dir: data -template: gemma -cutoff_len: 2048 -max_samples: 3 -#val_size: 0.0001 -overwrite_cache: true -preprocessing_num_workers: 16 - -# output -output_dir: saves/paligemma/full/sft_llava_pt_test -logging_steps: 1 -save_steps: 50 -plot_loss: true -overwrite_output_dir: true -#save_strategy: epoch -#save_total_limit: 2 - -# train -per_device_train_batch_size: 1 -gradient_accumulation_steps: 16 -learning_rate: 0.00001 -num_train_epochs: 100 -lr_scheduler_type: cosine -warmup_steps: 0.1 -#bf16: true -pure_bf16: true - -# eval -do_eval: false -#per_device_eval_batch_size: 1 -#evaluation_strategy: steps -#eval_steps: 500 diff --git a/sites/paligemma.yaml b/sites/paligemma.yaml deleted file mode 100644 index f3257cfc..00000000 --- a/sites/paligemma.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# model -model_name_or_path: google/paligemma-3b-mix-448 -visual_inputs: true -#print_param_status: true -use_fast_tokenizer: false - -# method -stage: sft -do_train: true -finetuning_type: full - -# ddp -ddp_timeout: 180000000 -deepspeed: examples/deepspeed/ds_z2_offload_config.json - -# dataset -dataset: mllm_demo -dataset_dir: data -template: gemma -cutoff_len: 2048 -max_samples: 3 -#val_size: 0.0001 -overwrite_cache: true -preprocessing_num_workers: 16 - -# output -output_dir: saves/paligemma/full/sft_llava_1k -logging_steps: 1 -save_steps: 50 -plot_loss: true -overwrite_output_dir: true -#save_strategy: epoch -#save_total_limit: 2 - -# train -per_device_train_batch_size: 1 -gradient_accumulation_steps: 16 -learning_rate: 0.00001 -num_train_epochs: 100 -lr_scheduler_type: cosine -warmup_steps: 0.1 -#bf16: true -pure_bf16: true - -# eval -do_eval: false -#per_device_eval_batch_size: 1 -#evaluation_strategy: steps -#eval_steps: 500 diff --git a/sites/paligemma_lora.yaml b/sites/paligemma_lora.yaml deleted file mode 100644 index 0693a6ae..00000000 --- a/sites/paligemma_lora.yaml +++ /dev/null @@ -1,40 +0,0 @@ -### model -model_name_or_path: google/paligemma-3b-mix-448 -visual_inputs: true -use_fast_tokenizer: false - -### method -stage: sft -do_train: true -finetuning_type: lora -lora_target: q_proj,v_proj - -### dataset -dataset: mllm_demo -template: gemma -cutoff_len: 1024 -max_samples: 1000 -overwrite_cache: true -preprocessing_num_workers: 16 - -### output -output_dir: saves/paligemma/lora/sft_mllm -logging_steps: 10 -save_steps: 500 -plot_loss: true -overwrite_output_dir: true - -### train -per_device_train_batch_size: 1 -gradient_accumulation_steps: 8 -learning_rate: 0.0001 -num_train_epochs: 3.0 -lr_scheduler_type: cosine -warmup_steps: 0.1 -fp16: true - -### eval -val_size: 0.1 -per_device_eval_batch_size: 1 -evaluation_strategy: steps -eval_steps: 500 From 136e64081f2a994d61bdbc92fadd90a5ae63a500 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Mon, 27 May 2024 20:10:31 +0800 Subject: [PATCH 005/162] remove mllm_pt_demo.json Former-commit-id: 5402589f021056f9c9e7b68421282039a508d5b9 --- data/dataset_info.json | 2 +- data/mllm_pt_demo.json | 92 ------------------------------------------ 2 files changed, 1 insertion(+), 93 deletions(-) delete mode 100644 data/mllm_pt_demo.json diff --git a/data/dataset_info.json b/data/dataset_info.json index 5a90e077..1deb3d6d 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -39,7 +39,7 @@ } }, "mllm_pt_demo": { - "file_name": "mllm_pt_demo.json", + "hf_hub_url": "BUAADreamer/mllm_pt_demo", "formatting": "sharegpt", "columns": { "messages": "messages", diff --git a/data/mllm_pt_demo.json b/data/mllm_pt_demo.json deleted file mode 100644 index 2ee01ce6..00000000 --- a/data/mllm_pt_demo.json +++ /dev/null @@ -1,92 +0,0 @@ -[ - { - "messages": [ - { - "content": "Render a clear and concise summary of the photo.", - "role": "user" - }, - { - "content": "There are two soccer players on the field.", - "role": "assistant" - } - ], - "images": [ - "mllm_demo_data/1.jpg" - ] - }, - { - "messages": [ - { - "content": "Write a terse but informative summary of the picture.", - "role": "user" - }, - { - "content": "A soccer player is sliding on his knees to celebrate", - "role": "assistant" - } - ], - "images": [ - "mllm_demo_data/2.jpg" - ] - }, - { - "messages": [ - { - "content": "What is this?", - "role": "user" - }, - { - "content": "A man is giving a speech.", - "role": "assistant" - } - ], - "images": [ - "mllm_demo_data/3.jpg" - ] - }, - { - "messages": [ - { - "content": "对照片进行简明扼要的概括。", - "role": "user" - }, - { - "content": "两个足球运动员在场上", - "role": "assistant" - } - ], - "images": [ - "mllm_demo_data/1.jpg" - ] - }, - { - "messages": [ - { - "content": "为图片写一个简短但内容丰富的摘要。", - "role": "user" - }, - { - "content": "一个足球运动员在跪地滑行庆祝", - "role": "assistant" - } - ], - "images": [ - "mllm_demo_data/2.jpg" - ] - }, - { - "messages": [ - { - "content": "这是什么?", - "role": "user" - }, - { - "content": "一个男人在演讲", - "role": "assistant" - } - ], - "images": [ - "mllm_demo_data/3.jpg" - ] - } -] \ No newline at end of file From a72387666319e2e5c7fd62079d1b0c9f96d8d0ad Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 27 May 2024 20:23:24 +0800 Subject: [PATCH 006/162] support Aya23 Former-commit-id: 071935b90006e2c79e39bb9ee0c5d48c6c910501 --- src/llamafactory/extras/constants.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 087612fc..32ca5387 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -86,6 +86,19 @@ def register_model_group( VISION_MODELS.add(prefix) +register_model_group( + models={ + "Aya-23-8B-Chat": { + DownloadSource.DEFAULT: "CohereForAI/aya-23-8B", + }, + "Aya-23-35B-Chat": { + DownloadSource.DEFAULT: "CohereForAI/aya-23-35B", + }, + }, + template="cohere", +) + + register_model_group( models={ "Baichuan-7B-Base": { From a3dd6f887c7c268c09265a3c8d9194f0c5ed89b0 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 27 May 2024 20:37:57 +0800 Subject: [PATCH 007/162] fix full/freeze tuning for mllm Former-commit-id: df5860ddb593d5b82163a585d12160b41dbce0f3 --- data/dataset_info.json | 28 +++++------ src/llamafactory/hparams/finetuning_args.py | 16 ++++++- src/llamafactory/hparams/model_args.py | 4 -- src/llamafactory/model/adapter.py | 50 +++++++++++++------- src/llamafactory/model/utils/misc.py | 30 +++++------- src/llamafactory/model/utils/quantization.py | 2 + src/llamafactory/model/utils/visual.py | 7 +-- 7 files changed, 76 insertions(+), 61 deletions(-) diff --git a/data/dataset_info.json b/data/dataset_info.json index 0a148431..2d9b0c83 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -38,20 +38,6 @@ "assistant_tag": "assistant" } }, - "mllm_pt_demo": { - "hf_hub_url": "BUAADreamer/mllm_pt_demo", - "formatting": "sharegpt", - "columns": { - "messages": "messages", - "images": "images" - }, - "tags": { - "role_tag": "role", - "content_tag": "content", - "user_tag": "user", - "assistant_tag": "assistant" - } - }, "alpaca_en": { "hf_hub_url": "llamafactory/alpaca_en", "ms_hub_url": "llamafactory/alpaca_en" @@ -322,6 +308,20 @@ "assistant_tag": "assistant" } }, + "mllm_pt_demo": { + "hf_hub_url": "BUAADreamer/mllm_pt_demo", + "formatting": "sharegpt", + "columns": { + "messages": "messages", + "images": "images" + }, + "tags": { + "role_tag": "role", + "content_tag": "content", + "user_tag": "user", + "assistant_tag": "assistant" + } + }, "oasst_de": { "hf_hub_url": "mayflowergmbh/oasst_de" }, diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index 05b246ae..b9322f18 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -311,6 +311,14 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA default=False, metadata={"help": "Whether or not to make only the parameters in the expanded blocks trainable."}, ) + freeze_vision_tower: bool = field( + default=True, + metadata={"help": "Whether ot not to freeze vision tower in MLLM training."}, + ) + train_mm_proj_only: bool = field( + default=False, + metadata={"help": "Whether or not to train the multimodal projector for MLLM only."}, + ) plot_loss: bool = field( default=False, metadata={"help": "Whether or not to save the training loss curves."}, @@ -328,6 +336,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA self.lora_target = split_arg(self.lora_target) self.additional_target = split_arg(self.additional_target) self.galore_target = split_arg(self.galore_target) + self.freeze_vision_tower = self.freeze_vision_tower or self.train_mm_proj_only assert self.finetuning_type in ["lora", "freeze", "full"], "Invalid fine-tuning method." assert self.ref_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization." @@ -345,7 +354,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA raise ValueError("`dpo_label_smoothing` is only valid for sigmoid loss function.") if self.use_llama_pro and self.finetuning_type == "full": - raise ValueError("`use_llama_pro` is only valid for the Freeze or LoRA training.") + raise ValueError("`use_llama_pro` is only valid for Freeze or LoRA training.") if self.finetuning_type == "lora" and (self.use_galore or self.use_badam): raise ValueError("Cannot use LoRA with GaLore or BAdam together.") @@ -354,4 +363,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA raise ValueError("Cannot use GaLore with BAdam together.") if self.loraplus_lr_ratio is not None and self.finetuning_type != "lora": - raise ValueError("`loraplus_lr_ratio` is only valid for the LoRA training.") + raise ValueError("`loraplus_lr_ratio` is only valid for LoRA training.") + + if self.train_mm_proj_only and self.finetuning_type != "full": + raise ValueError("`train_mm_proj_only` is only valid for full training.") diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 781c7e99..650d1c22 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -85,10 +85,6 @@ class ModelArguments: default=False, metadata={"help": "Whethor or not to use multimodal LLM that accepts visual inputs."}, ) - tune_mm_proj: bool = field( - default=False, - metadata={"help": "Whethor or not only finetune mm_projector for MLLM."}, - ) moe_aux_loss_coef: Optional[float] = field( default=None, metadata={"help": "Coefficient of the auxiliary router loss in mixture-of-experts model."}, diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index 015db8a0..a9204ef0 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -10,7 +10,6 @@ from ..extras.logging import get_logger from .utils.misc import find_all_linear_modules, find_expanded_modules from .utils.quantization import QuantizationMethod from .utils.unsloth import get_unsloth_peft_model, load_unsloth_peft_model -from .utils.visual import filter_vision_tower_linear if TYPE_CHECKING: @@ -53,21 +52,33 @@ def init_adapter( if finetuning_args.finetuning_type == "full" and is_trainable: logger.info("Fine-tuning method: Full") - if cast_trainable_params_to_fp32: - model = model.float() - if model_args.visual_inputs and hasattr(model, "vision_tower"): # freeze vision model - model.vision_tower.requires_grad_(False) + forbidden_modules = set() + if model_args.visual_inputs and finetuning_args.freeze_vision_tower: + forbidden_modules.add("vision_tower") - if model_args.visual_inputs and hasattr(model, "language_model") and model_args.tune_mm_proj: # freeze language model if only tune mm_proj - model.language_model.requires_grad_(False) + if model_args.visual_inputs and finetuning_args.train_mm_proj_only: + forbidden_modules.add("language_model") + + for name, param in model.named_parameters(): + if not any(forbidden_module in name for forbidden_module in forbidden_modules): + if cast_trainable_params_to_fp32: + param.data = param.data.to(torch.float32) + else: + param.requires_grad_(False) if finetuning_args.finetuning_type == "freeze" and is_trainable: logger.info("Fine-tuning method: Freeze") + + if model_args.visual_inputs: + config = model.config.text_config + else: + config = model.config + num_layers = ( - getattr(model.config, "num_hidden_layers", None) - or getattr(model.config, "num_layers", None) - or getattr(model.config, "n_layer", None) + getattr(config, "num_hidden_layers", None) + or getattr(config, "num_layers", None) + or getattr(config, "n_layer", None) ) if not num_layers: raise ValueError("Current model does not support freeze tuning.") @@ -119,16 +130,19 @@ def init_adapter( trainable_layers.append(module_name) + forbidden_modules = set() + if model_args.visual_inputs and finetuning_args.freeze_vision_tower: + forbidden_modules.add("vision_tower") + for name, param in model.named_parameters(): - if any(trainable_layer in name for trainable_layer in trainable_layers): + if any(trainable_layer in name for trainable_layer in trainable_layers) and not any( + forbidden_module in name for forbidden_module in forbidden_modules + ): if cast_trainable_params_to_fp32: param.data = param.data.to(torch.float32) else: param.requires_grad_(False) - if model_args.visual_inputs and hasattr(model, "vision_tower"): # freeze vision model - model.vision_tower.requires_grad_(False) - logger.info("Set trainable layers: {}".format(",".join(map(str, trainable_layer_ids)))) if finetuning_args.finetuning_type == "lora": @@ -177,15 +191,15 @@ def init_adapter( if is_trainable and adapter_to_resume is None: # create new lora weights while training if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all": - target_modules = find_all_linear_modules(model) + target_modules = find_all_linear_modules(model, finetuning_args.freeze_vision_tower) else: target_modules = finetuning_args.lora_target if finetuning_args.use_llama_pro: - target_modules = find_expanded_modules(model, target_modules, finetuning_args.num_layer_trainable) + target_modules = find_expanded_modules(model, target_modules, finetuning_args.freeze_trainable_layers) - if model_args.visual_inputs: - target_modules = filter_vision_tower_linear(target_modules) + if model_args.visual_inputs and finetuning_args.freeze_vision_tower: + target_modules = "^(?!.*vision_tower).*(?:{}).*".format("|".join(target_modules)) if ( finetuning_args.use_dora diff --git a/src/llamafactory/model/utils/misc.py b/src/llamafactory/model/utils/misc.py index eca68866..4851bd29 100644 --- a/src/llamafactory/model/utils/misc.py +++ b/src/llamafactory/model/utils/misc.py @@ -1,9 +1,6 @@ from typing import TYPE_CHECKING, List -import torch - from ...extras.logging import get_logger -from .quantization import QuantizationMethod if TYPE_CHECKING: @@ -13,29 +10,28 @@ if TYPE_CHECKING: logger = get_logger(__name__) -def find_all_linear_modules(model: "PreTrainedModel") -> List[str]: +def find_all_linear_modules(model: "PreTrainedModel", freeze_vision_tower: bool) -> List[str]: r""" Finds all available modules to apply lora or galore. """ - quantization_method = getattr(model, "quantization_method", None) - if quantization_method is None: - linear_cls = torch.nn.Linear - elif quantization_method == QuantizationMethod.BITS_AND_BYTES: - import bitsandbytes as bnb + forbidden_modules = {"lm_head"} - linear_cls = bnb.nn.Linear4bit if getattr(model, "is_loaded_in_4bit", False) else bnb.nn.Linear8bitLt - else: - raise ValueError("Finding linear modules for {} models is not supported.".format(quantization_method)) - - output_layer_names = ["lm_head"] if model.config.model_type == "chatglm": - output_layer_names.append("output_layer") + forbidden_modules.add("output_layer") elif model.config.model_type == "internlm2": - output_layer_names.append("output") + forbidden_modules.add("output") + elif model.config.model_type in ["llava", "paligemma"]: + forbidden_modules.add("multi_modal_projector") + + if freeze_vision_tower: + forbidden_modules.add("vision_tower") module_names = set() for name, module in model.named_modules(): - if isinstance(module, linear_cls) and not any(output_layer in name for output_layer in output_layer_names): + if any(forbidden_module in name for forbidden_module in forbidden_modules): + continue + + if "Linear" in module.__class__.__name__ and "Embedding" not in module.__class__.__name__: module_names.add(name.split(".")[-1]) logger.info("Found linear modules: {}".format(",".join(module_names))) diff --git a/src/llamafactory/model/utils/quantization.py b/src/llamafactory/model/utils/quantization.py index 161ad5aa..02a54f07 100644 --- a/src/llamafactory/model/utils/quantization.py +++ b/src/llamafactory/model/utils/quantization.py @@ -35,6 +35,8 @@ class QuantizationMethod(str, Enum): AWQ = "awq" AQLM = "aqlm" QUANTO = "quanto" + EETQ = "eetq" + HQQ = "hqq" def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments") -> List[str]: diff --git a/src/llamafactory/model/utils/visual.py b/src/llamafactory/model/utils/visual.py index a91777ba..c8260b7f 100644 --- a/src/llamafactory/model/utils/visual.py +++ b/src/llamafactory/model/utils/visual.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Tuple, List +from typing import TYPE_CHECKING, Tuple import torch import transformers.models @@ -82,8 +82,3 @@ def configure_visual_model(config: "PretrainedConfig") -> None: if getattr(config, "is_yi_vl_derived_model", None): logger.info("Detected Yi-VL model, applying projector patch.") transformers.models.llava.modeling_llava.LlavaMultiModalProjector = LlavaMultiModalProjectorForYiVL - - -def filter_vision_tower_linear(target_modules: List[str]) -> str: - target_modules = f"^(?!.*vision_tower).*(?:{'|'.join(target_modules)}).*" - return target_modules From db745355bb0aaa5a132ccc52929c26eeddbe86c4 Mon Sep 17 00:00:00 2001 From: Jianbai Ye Date: Mon, 27 May 2024 20:42:08 +0800 Subject: [PATCH 008/162] add openchat-3.6-8B support Former-commit-id: b66f39d50d896d7597a1506e67ec210b31c9b700 --- src/llamafactory/data/template.py | 17 +++++++++++++++++ src/llamafactory/extras/constants.py | 9 +++++++++ 2 files changed, 26 insertions(+) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 979390ce..eba23271 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -756,6 +756,23 @@ _register_template( force_system=True, ) +_register_template( + name="openchat-3.6", + format_user=StringFormatter( + slots=[ + ( + "<|start_header_id|>GPT4 Correct User<|end_header_id|>\n\n{{content}}<|eot_id|>" + "<|start_header_id|>GPT4 Correct Assistant<|end_header_id|>\n\n" + ) + ] + ), + format_system=StringFormatter( + slots=[{"bos_token"}, "<|start_header_id|>System<|end_header_id|>\n\n{{content}}<|eot_id|>"] + ), + stop_words=["<|eot_id|>"], + replace_eos=True, +) + _register_template( name="orion", diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 087612fc..503df641 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -706,6 +706,15 @@ register_model_group( template="openchat", ) +register_model_group( + models={ + "OpenChat3.6-8B-Chat": { + DownloadSource.DEFAULT: "openchat/openchat-3.6-8b-20240522", + } + }, + template="openchat-3.6", +) + register_model_group( models={ From 234b4a4f2e60788e86aaee5f02568eafb8716543 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 27 May 2024 20:51:26 +0800 Subject: [PATCH 009/162] Update template.py Former-commit-id: af869e4c48eb426c4078415533f6dab89123a9d8 --- src/llamafactory/data/template.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index eba23271..3d719e5b 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -761,16 +761,15 @@ _register_template( format_user=StringFormatter( slots=[ ( - "<|start_header_id|>GPT4 Correct User<|end_header_id|>\n\n{{content}}<|eot_id|>" + "<|start_header_id|>GPT4 Correct User<|end_header_id|>\n\n{{content}}<|eot_id|>", "<|start_header_id|>GPT4 Correct Assistant<|end_header_id|>\n\n" ) ] ), - format_system=StringFormatter( - slots=[{"bos_token"}, "<|start_header_id|>System<|end_header_id|>\n\n{{content}}<|eot_id|>"] - ), + format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), stop_words=["<|eot_id|>"], replace_eos=True, + force_system=True, ) From f734d04f41e3453580dac5183511b3d4974cc271 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 27 May 2024 20:51:56 +0800 Subject: [PATCH 010/162] Update template.py Former-commit-id: f4dabce0a71c9978e051e70886941b64b928ffe2 --- src/llamafactory/data/template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 3d719e5b..5a12524d 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -761,7 +761,7 @@ _register_template( format_user=StringFormatter( slots=[ ( - "<|start_header_id|>GPT4 Correct User<|end_header_id|>\n\n{{content}}<|eot_id|>", + "<|start_header_id|>GPT4 Correct User<|end_header_id|>\n\n{{content}}<|eot_id|>" "<|start_header_id|>GPT4 Correct Assistant<|end_header_id|>\n\n" ) ] From 7e9372bb2f685be2a68253356cc97a1d31009337 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 27 May 2024 20:54:26 +0800 Subject: [PATCH 011/162] tiny fix Former-commit-id: 4c47b3dcef9e400a1c35fce1ad53619a0a86fe81 --- src/llamafactory/data/template.py | 3 +-- src/llamafactory/extras/constants.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 5a12524d..8332b0cb 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -742,7 +742,6 @@ _register_template( _register_template( name="olmo", format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>\n"]), - format_assistant=StringFormatter(slots=["{{content}}", {"eos_token"}]), format_system=StringFormatter(slots=[{"eos_token"}, "{{content}}"]), force_system=True, ) @@ -751,11 +750,11 @@ _register_template( _register_template( name="openchat", format_user=StringFormatter(slots=["GPT4 Correct User: {{content}}", {"eos_token"}, "GPT4 Correct Assistant:"]), - format_assistant=StringFormatter(slots=["{{content}}", {"eos_token"}]), format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), force_system=True, ) + _register_template( name="openchat-3.6", format_user=StringFormatter( diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 3cb221bf..d1fcd4f6 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -719,6 +719,7 @@ register_model_group( template="openchat", ) + register_model_group( models={ "OpenChat3.6-8B-Chat": { From 7aae43aa0e977ce99cd1f0829fa40a767b40a95e Mon Sep 17 00:00:00 2001 From: Yimi81 <1548222878@qq.com> Date: Mon, 27 May 2024 13:11:25 +0000 Subject: [PATCH 012/162] fix yi template Former-commit-id: b3669c8989c3adda305416245e32e9e5a3b7caac --- src/llamafactory/data/template.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 8332b0cb..f13b303b 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -872,6 +872,7 @@ _register_template( _register_template( name="yi", format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), + format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]), format_separator=EmptyFormatter(slots=["\n"]), stop_words=["<|im_end|>"], replace_eos=True, From 60f6fc39351867add400d767ca9886f7dc487bcb Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Tue, 28 May 2024 01:33:54 +0000 Subject: [PATCH 013/162] add Ascend npu doc and dependency Former-commit-id: 803d9f142a294f8c1e0b4e2046c214b0857ccfd6 --- README.md | 20 +++++++++++++++++++- README_zh.md | 20 +++++++++++++++++++- setup.py | 1 + 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fcc96882..ca70110f 100644 --- a/README.md +++ b/README.md @@ -347,7 +347,25 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec Join [NPU user group](assets/wechat_npu.jpg). -To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[torch-npu](https://gitee.com/ascend/pytorch)** library and the **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. +Use `pip install -e .[torch_npu]` to install LLaMA-Factory with **[torch-npu](https://gitee.com/ascend/pytorch)** library. + +To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. You can follow chapter **[install CANN](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)** in the installation tutorial to install CANN Toolkit and the kernels, or use the fast installation as following: + +```bash +# replace the url according to your choice +# install CANN Toolkit +wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run +chmod +x Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run +./Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install + +# install CANN Kernels +wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C18B800TP015/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run +chmod +x Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run +./Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install + +# set env variables +source /usr/local/Ascend/ascend-toolkit/set_env.sh +``` | Requirement | Minimum | Recommend | | ------------ | ------- | --------- | diff --git a/README_zh.md b/README_zh.md index 2e0b4f34..4f659588 100644 --- a/README_zh.md +++ b/README_zh.md @@ -347,7 +347,25 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl 加入 [NPU 用户群](assets/wechat_npu.jpg)。 -如果使用昇腾 NPU 设备进行(分布式)训练或推理,需要安装 **[torch-npu](https://gitee.com/ascend/pytorch)** 库和 **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。 +使用 `pip install -e .[torch_npu]` 来安装带有 **[torch-npu](https://gitee.com/ascend/pytorch)** 的 LLaMA-Factory。 + +如果使用昇腾 NPU 设备进行(分布式)训练或推理,需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。可以参考安装教程中的**[安装 CANN](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)**章节来进行安装,或使用以下快速安装指令: + +```bash +# 请替换 URL 为你需要的 CANN 版本对应的 URL +# 安装 CANN Toolkit +wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run +chmod +x Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run +./Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install + +# 安装 CANN Kernels +wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C18B800TP015/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run +chmod +x Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run +./Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install + +# 设置环境变量 +source /usr/local/Ascend/ascend-toolkit/set_env.sh +``` | 依赖项 | 至少 | 推荐 | | ------------ | ------- | --------- | diff --git a/setup.py b/setup.py index 4d948450..87620c37 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ extra_require = { "qwen": ["tiktoken", "transformers_stream_generator"], "modelscope": ["modelscope"], "quality": ["ruff"], + "torch_npu": ["torch==2.2.0", "torch_npu==2.2.0", "decorator"] } From f867958f91fef88ddd91dcf275d83f204497a84f Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 28 May 2024 13:44:22 +0800 Subject: [PATCH 014/162] fix #3931 Former-commit-id: 47e0072416b545d9718af4fa266a83f747b9a4f7 --- .dockerignore | 2 ++ README.md | 2 +- README_zh.md | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.dockerignore b/.dockerignore index ce67d58a..2ac0e11d 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,6 +4,8 @@ .venv cache data +hf_cache +output examples .dockerignore .gitattributes diff --git a/README.md b/README.md index 78312e07..974b30d0 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Choose your path: ## Features - **Various models**: LLaMA, LLaVA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc. -- **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO, KTO and ORPO. +- **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO, KTO, ORPO, etc. - **Scalable resources**: 32-bit full-tuning, 16-bit freeze-tuning, 16-bit LoRA and 2/4/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8. - **Advanced algorithms**: GaLore, BAdam, DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ and Agent tuning. - **Practical tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune and rsLoRA. diff --git a/README_zh.md b/README_zh.md index 5acf3dd1..7106bbab 100644 --- a/README_zh.md +++ b/README_zh.md @@ -45,7 +45,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd ## 项目特色 - **多种模型**:LLaMA、LLaVA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。 -- **集成方法**:(增量)预训练、(多模态)指令监督微调、奖励模型训练、PPO 训练、DPO 训练、KTO 训练和 ORPO 训练。 +- **集成方法**:(增量)预训练、(多模态)指令监督微调、奖励模型训练、PPO 训练、DPO 训练、KTO 训练、ORPO 训练等等。 - **多种精度**:32 比特全参数微调、16 比特冻结微调、16 比特 LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 QLoRA 微调。 - **先进算法**:GaLore、BAdam、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ 和 Agent 微调。 - **实用技巧**:FlashAttention-2、Unsloth、RoPE scaling、NEFTune 和 rsLoRA。 From dbd4ba35c4645f53da0f1da7474f889ef29e2b39 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 28 May 2024 16:19:56 +0800 Subject: [PATCH 015/162] update readme Former-commit-id: 30ef8ee1e86136f38f105b67f70c417d20552f41 --- README.md | 15 ++++++++++++--- README_zh.md | 15 ++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 974b30d0..9a4bd934 100644 --- a/README.md +++ b/README.md @@ -174,9 +174,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan | > [!NOTE] -> **Default module** is used for the `--lora_target` argument, you can use `--lora_target all` to specify all the available modules for better convergence. +> **Default module** is used for the `lora_target` argument, you can use `lora_target: all` to specify all the available modules for better convergence. > -> For the "base" models, the `--template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models. +> For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models. > > Remember to use the **SAME** template in training and inference. @@ -448,7 +448,16 @@ If you have trouble with downloading models and datasets from Hugging Face, you export USE_MODELSCOPE_HUB=1 # `set USE_MODELSCOPE_HUB=1` for Windows ``` -Train the model by specifying a model ID of the ModelScope Hub as the `--model_name_or_path`. You can find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models), e.g., `LLM-Research/Meta-Llama-3-8B-Instruct`. +Train the model by specifying a model ID of the ModelScope Hub as the `model_name_or_path`. You can find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models), e.g., `LLM-Research/Meta-Llama-3-8B-Instruct`. + +### Use W&B Logging + +To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments. + +```yaml +report_to: wandb +run_name: test_run # optional +``` ## Projects using LLaMA Factory diff --git a/README_zh.md b/README_zh.md index 7106bbab..73426a7f 100644 --- a/README_zh.md +++ b/README_zh.md @@ -174,9 +174,9 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd | [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan | > [!NOTE] -> **默认模块**应作为 `--lora_target` 参数的默认值,可使用 `--lora_target all` 参数指定全部模块以取得更好的效果。 +> **默认模块**应作为 `lora_target` 参数的默认值,可使用 `lora_target: all` 参数指定全部模块以取得更好的效果。 > -> 对于所有“基座”(Base)模型,`--template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”(Instruct/Chat)模型请务必使用**对应的模板**。 +> 对于所有“基座”(Base)模型,`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”(Instruct/Chat)模型请务必使用**对应的模板**。 > > 请务必在训练和推理时使用**完全一致**的模板。 @@ -446,7 +446,16 @@ CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/l export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1` ``` -将 `--model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔搭社区](https://modelscope.cn/models)查看所有可用的模型,例如 `LLM-Research/Meta-Llama-3-8B-Instruct`。 +将 `model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔搭社区](https://modelscope.cn/models)查看所有可用的模型,例如 `LLM-Research/Meta-Llama-3-8B-Instruct`。 + +### 使用 W&B 面板 + +若要使用 [Weights & Biases](https://wandb.ai) 记录实验数据,请添加下面的参数。 + +```yaml +report_to: wandb +run_name: test_run # 可选 +``` ## 使用了 LLaMA Factory 的项目 From 1d5f6960060f7d8d4433e32e02312e94c15412f7 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 28 May 2024 16:41:34 +0800 Subject: [PATCH 016/162] update readme Former-commit-id: e2c7de1b5147801b301cfc5da0e2866273da18f5 --- README.md | 3 ++- README_zh.md | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9a4bd934..b9225a94 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,10 @@ [![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls) [![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK) [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai) +[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing) +[![Open in DSW](https://gallery.pai-ml.com/assets/open-in-dsw.svg)](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) [![Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue)](https://huggingface.co/spaces/hiyouga/LLaMA-Board) [![Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board) -[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing) [![GitHub Tread](https://trendshift.io/api/badge/repositories/4535)](https://trendshift.io/repositories/4535) diff --git a/README_zh.md b/README_zh.md index 73426a7f..0f47b88e 100644 --- a/README_zh.md +++ b/README_zh.md @@ -8,9 +8,10 @@ [![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls) [![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK) [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai) +[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing) +[![Open in DSW](https://gallery.pai-ml.com/assets/open-in-dsw.svg)](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) [![Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue)](https://huggingface.co/spaces/hiyouga/LLaMA-Board) [![Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board) -[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing) [![GitHub Tread](https://trendshift.io/api/badge/repositories/4535)](https://trendshift.io/repositories/4535) From edbc4bdac43fcdcdee0565cb5bc0ef7257bd4544 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 28 May 2024 19:24:22 +0800 Subject: [PATCH 017/162] support DDP in webui Former-commit-id: d059262ff8dc857f597d2657546ec625726a664a --- examples/README.md | 16 +++++++----- examples/README_zh.md | 16 +++++++----- examples/accelerate/master_config.yaml | 18 ------------- examples/accelerate/single_config.yaml | 16 ------------ examples/accelerate/slave_config.yaml | 18 ------------- examples/full_multi_gpu/multi_node.sh | 15 ----------- examples/full_multi_gpu/predict.sh | 5 ---- examples/full_multi_gpu/single_node.sh | 15 ----------- examples/lora_multi_gpu/ds_zero3.sh | 15 ----------- examples/lora_multi_gpu/multi_node.sh | 6 ----- examples/lora_multi_gpu/single_node.sh | 5 ---- examples/lora_multi_npu/ds_zero0.sh | 15 ----------- src/llamafactory/cli.py | 36 +++++++++++++++++++++++++- src/llamafactory/extras/misc.py | 17 +++++++++--- src/llamafactory/hparams/parser.py | 5 ---- src/llamafactory/launcher.py | 9 +++++++ src/llamafactory/webui/locales.py | 5 ---- src/llamafactory/webui/runner.py | 9 ++----- src/llamafactory/webui/utils.py | 3 +-- 19 files changed, 78 insertions(+), 166 deletions(-) delete mode 100644 examples/accelerate/master_config.yaml delete mode 100644 examples/accelerate/single_config.yaml delete mode 100644 examples/accelerate/slave_config.yaml delete mode 100644 examples/full_multi_gpu/multi_node.sh delete mode 100644 examples/full_multi_gpu/predict.sh delete mode 100644 examples/full_multi_gpu/single_node.sh delete mode 100644 examples/lora_multi_gpu/ds_zero3.sh delete mode 100644 examples/lora_multi_gpu/multi_node.sh delete mode 100644 examples/lora_multi_gpu/single_node.sh delete mode 100644 examples/lora_multi_npu/ds_zero0.sh create mode 100644 src/llamafactory/launcher.py diff --git a/examples/README.md b/examples/README.md index 9c6d5fb0..727b27c8 100644 --- a/examples/README.md +++ b/examples/README.md @@ -110,19 +110,20 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_l #### Supervised Fine-Tuning with Accelerate on Single Node ```bash -bash examples/lora_multi_gpu/single_node.sh +CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml ``` #### Supervised Fine-Tuning with Accelerate on Multiple Nodes ```bash -bash examples/lora_multi_gpu/multi_node.sh +CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml +CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml ``` #### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding) ```bash -bash examples/lora_multi_gpu/ds_zero3.sh +CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft_ds.yaml ``` ### LoRA Fine-Tuning on Multiple NPUs @@ -130,7 +131,7 @@ bash examples/lora_multi_gpu/ds_zero3.sh #### Supervised Fine-Tuning with DeepSpeed ZeRO-0 ```bash -bash examples/lora_multi_npu/ds_zero0.sh +ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml ``` ### Full-Parameter Fine-Tuning on Multiple GPUs @@ -138,19 +139,20 @@ bash examples/lora_multi_npu/ds_zero0.sh #### Supervised Fine-Tuning with Accelerate on Single Node ```bash -bash examples/full_multi_gpu/single_node.sh +CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml ``` #### Supervised Fine-Tuning with Accelerate on Multiple Nodes ```bash -bash examples/full_multi_gpu/multi_node.sh +CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml +CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml ``` #### Batch Predicting and Computing BLEU and ROUGE Scores ```bash -bash examples/full_multi_gpu/predict.sh +CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_predict.yaml ``` ### Merging LoRA Adapters and Quantization diff --git a/examples/README_zh.md b/examples/README_zh.md index 0ff33398..6974faa9 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -110,19 +110,20 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_l #### 使用 Accelerate 进行单节点训练 ```bash -bash examples/lora_multi_gpu/single_node.sh +CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml ``` #### 使用 Accelerate 进行多节点训练 ```bash -bash examples/lora_multi_gpu/multi_node.sh +CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml +CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml ``` #### 使用 DeepSpeed ZeRO-3 平均分配显存 ```bash -bash examples/lora_multi_gpu/ds_zero3.sh +CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft_ds.yaml ``` ### 多 NPU LoRA 微调 @@ -130,7 +131,7 @@ bash examples/lora_multi_gpu/ds_zero3.sh #### 使用 DeepSpeed ZeRO-0 训练 ```bash -bash examples/lora_multi_npu/ds_zero0.sh +ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml ``` ### 多 GPU 全参数微调 @@ -138,19 +139,20 @@ bash examples/lora_multi_npu/ds_zero0.sh #### 使用 DeepSpeed 进行单节点训练 ```bash -bash examples/full_multi_gpu/single_node.sh +CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml ``` #### 使用 DeepSpeed 进行多节点训练 ```bash -bash examples/full_multi_gpu/multi_node.sh +CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml +CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml ``` #### 批量预测并计算 BLEU 和 ROUGE 分数 ```bash -bash examples/full_multi_gpu/predict.sh +CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_predict.yaml ``` ### 合并 LoRA 适配器与模型量化 diff --git a/examples/accelerate/master_config.yaml b/examples/accelerate/master_config.yaml deleted file mode 100644 index a1018313..00000000 --- a/examples/accelerate/master_config.yaml +++ /dev/null @@ -1,18 +0,0 @@ -compute_environment: LOCAL_MACHINE -debug: false -distributed_type: MULTI_GPU -downcast_bf16: 'no' -gpu_ids: all -machine_rank: 0 -main_process_ip: 192.168.0.1 -main_process_port: 29555 -main_training_function: main -mixed_precision: fp16 -num_machines: 2 # the number of nodes -num_processes: 8 # the number of GPUs in all nodes -rdzv_backend: static -same_network: true -tpu_env: [] -tpu_use_cluster: false -tpu_use_sudo: false -use_cpu: false diff --git a/examples/accelerate/single_config.yaml b/examples/accelerate/single_config.yaml deleted file mode 100644 index 97f8c633..00000000 --- a/examples/accelerate/single_config.yaml +++ /dev/null @@ -1,16 +0,0 @@ -compute_environment: LOCAL_MACHINE -debug: false -distributed_type: MULTI_GPU -downcast_bf16: 'no' -gpu_ids: all -machine_rank: 0 -main_training_function: main -mixed_precision: fp16 -num_machines: 1 # the number of nodes -num_processes: 4 # the number of GPUs in all nodes -rdzv_backend: static -same_network: true -tpu_env: [] -tpu_use_cluster: false -tpu_use_sudo: false -use_cpu: false diff --git a/examples/accelerate/slave_config.yaml b/examples/accelerate/slave_config.yaml deleted file mode 100644 index e610fd0e..00000000 --- a/examples/accelerate/slave_config.yaml +++ /dev/null @@ -1,18 +0,0 @@ -compute_environment: LOCAL_MACHINE -debug: false -distributed_type: MULTI_GPU -downcast_bf16: 'no' -gpu_ids: all -machine_rank: 1 -main_process_ip: 192.168.0.1 -main_process_port: 29555 -main_training_function: main -mixed_precision: fp16 -num_machines: 2 # the number of nodes -num_processes: 8 # the number of GPUs in all nodes -rdzv_backend: static -same_network: true -tpu_env: [] -tpu_use_cluster: false -tpu_use_sudo: false -use_cpu: false diff --git a/examples/full_multi_gpu/multi_node.sh b/examples/full_multi_gpu/multi_node.sh deleted file mode 100644 index 34c038d4..00000000 --- a/examples/full_multi_gpu/multi_node.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -NPROC_PER_NODE=4 -NNODES=2 -RANK=0 -MASTER_ADDR=192.168.0.1 -MASTER_PORT=29500 - -CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun \ - --nproc_per_node $NPROC_PER_NODE \ - --nnodes $NNODES \ - --node_rank $RANK \ - --master_addr $MASTER_ADDR \ - --master_port $MASTER_PORT \ - src/train.py examples/full_multi_gpu/llama3_full_sft.yaml diff --git a/examples/full_multi_gpu/predict.sh b/examples/full_multi_gpu/predict.sh deleted file mode 100644 index 2445f444..00000000 --- a/examples/full_multi_gpu/predict.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \ - --config_file examples/accelerate/single_config.yaml \ - src/train.py examples/full_multi_gpu/llama3_full_predict.yaml diff --git a/examples/full_multi_gpu/single_node.sh b/examples/full_multi_gpu/single_node.sh deleted file mode 100644 index ac29c097..00000000 --- a/examples/full_multi_gpu/single_node.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -NPROC_PER_NODE=4 -NNODES=1 -RANK=0 -MASTER_ADDR=127.0.0.1 -MASTER_PORT=29500 - -CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun \ - --nproc_per_node $NPROC_PER_NODE \ - --nnodes $NNODES \ - --node_rank $RANK \ - --master_addr $MASTER_ADDR \ - --master_port $MASTER_PORT \ - src/train.py examples/full_multi_gpu/llama3_full_sft.yaml diff --git a/examples/lora_multi_gpu/ds_zero3.sh b/examples/lora_multi_gpu/ds_zero3.sh deleted file mode 100644 index 90ea00dd..00000000 --- a/examples/lora_multi_gpu/ds_zero3.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -NPROC_PER_NODE=4 -NNODES=1 -RANK=0 -MASTER_ADDR=127.0.0.1 -MASTER_PORT=29500 - -CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun \ - --nproc_per_node $NPROC_PER_NODE \ - --nnodes $NNODES \ - --node_rank $RANK \ - --master_addr $MASTER_ADDR \ - --master_port $MASTER_PORT \ - src/train.py examples/lora_multi_gpu/llama3_lora_sft_ds.yaml diff --git a/examples/lora_multi_gpu/multi_node.sh b/examples/lora_multi_gpu/multi_node.sh deleted file mode 100644 index 401fac5f..00000000 --- a/examples/lora_multi_gpu/multi_node.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -# also launch it on slave machine using slave_config.yaml - -CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \ - --config_file examples/accelerate/master_config.yaml \ - src/train.py examples/lora_multi_gpu/llama3_lora_sft.yaml diff --git a/examples/lora_multi_gpu/single_node.sh b/examples/lora_multi_gpu/single_node.sh deleted file mode 100644 index 885a0e8c..00000000 --- a/examples/lora_multi_gpu/single_node.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \ - --config_file examples/accelerate/single_config.yaml \ - src/train.py examples/lora_multi_gpu/llama3_lora_sft.yaml diff --git a/examples/lora_multi_npu/ds_zero0.sh b/examples/lora_multi_npu/ds_zero0.sh deleted file mode 100644 index 4ffaa1b0..00000000 --- a/examples/lora_multi_npu/ds_zero0.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -NPROC_PER_NODE=4 -NNODES=1 -RANK=0 -MASTER_ADDR=127.0.0.1 -MASTER_PORT=29500 - -ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 torchrun \ - --nproc_per_node $NPROC_PER_NODE \ - --nnodes $NNODES \ - --node_rank $RANK \ - --master_addr $MASTER_ADDR \ - --master_port $MASTER_PORT \ - src/train.py examples/lora_multi_npu/llama3_lora_sft_ds.yaml diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index f9b63ded..26975f3c 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -1,9 +1,16 @@ +import os +import random +import subprocess import sys from enum import Enum, unique +from llamafactory import launcher + from .api.app import run_api from .chat.chat_model import run_chat from .eval.evaluator import run_eval +from .extras.logging import get_logger +from .extras.misc import get_device_count from .train.tuner import export_model, run_exp from .webui.interface import run_web_demo, run_web_ui @@ -37,6 +44,8 @@ WELCOME = ( + "-" * 58 ) +logger = get_logger(__name__) + @unique class Command(str, Enum): @@ -62,7 +71,32 @@ def main(): elif command == Command.EXPORT: export_model() elif command == Command.TRAIN: - run_exp() + if get_device_count() > 1: + nnodes = os.environ.get("NNODES", "1") + node_rank = os.environ.get("RANK", "0") + nproc_per_node = os.environ.get("NPROC_PER_NODE", str(get_device_count())) + master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") + master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999))) + logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port)) + subprocess.run( + [ + "torchrun", + "--nnodes", + nnodes, + "--node_rank", + node_rank, + "--nproc_per_node", + nproc_per_node, + "--master_addr", + master_addr, + "--master_port", + master_port, + launcher.__file__, + *sys.argv[1:], + ] + ) + else: + run_exp() elif command == Command.WEBDEMO: run_web_demo() elif command == Command.WEBUI: diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 0dc07d28..2c7f170c 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -165,13 +165,15 @@ def get_current_device() -> torch.device: def get_device_count() -> int: r""" - Gets the number of available GPU devices. + Gets the number of available GPU or NPU devices. """ - if not torch.cuda.is_available(): + if is_torch_npu_available(): + return torch.npu.device_count() + elif is_torch_cuda_available(): + return torch.cuda.device_count() + else: return 0 - return torch.cuda.device_count() - def get_logits_processor() -> "LogitsProcessorList": r""" @@ -194,6 +196,13 @@ def infer_optim_dtype(model_dtype: torch.dtype) -> torch.dtype: return torch.float32 +def is_gpu_or_npu_available() -> bool: + r""" + Checks if the GPU or NPU is available. + """ + return is_torch_npu_available() or is_torch_cuda_available() + + def has_tokenized_data(path: os.PathLike) -> bool: r""" Checks if the path has a tokenized dataset. diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 6311297e..c6869e4c 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -10,7 +10,6 @@ from transformers.trainer_utils import get_last_checkpoint from transformers.utils import is_torch_bf16_gpu_available from transformers.utils.versions import require_version -from ..extras.constants import TRAINER_CONFIG from ..extras.logging import get_logger from ..extras.misc import check_dependencies, get_current_device from .data_args import DataArguments @@ -252,10 +251,6 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: and can_resume_from_checkpoint ): last_checkpoint = get_last_checkpoint(training_args.output_dir) - files = os.listdir(training_args.output_dir) - if last_checkpoint is None and len(files) > 0 and (len(files) != 1 or files[0] != TRAINER_CONFIG): - raise ValueError("Output directory already exists and is not empty. Please set `overwrite_output_dir`.") - if last_checkpoint is not None: training_args.resume_from_checkpoint = last_checkpoint logger.info( diff --git a/src/llamafactory/launcher.py b/src/llamafactory/launcher.py new file mode 100644 index 00000000..de154db9 --- /dev/null +++ b/src/llamafactory/launcher.py @@ -0,0 +1,9 @@ +from llamafactory.train.tuner import run_exp + + +def launch(): + run_exp() + + +if __name__ == "__main__": + launch() diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index bd4a4205..570a8b42 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -1469,11 +1469,6 @@ ALERTS = { "ru": "Обучение недоступно в демонстрационном режиме, сначала скопируйте пространство в частное.", "zh": "展示模式不支持训练,请先复制到私人空间。", }, - "err_device_count": { - "en": "Multiple GPUs are not supported yet.", - "ru": "Пока не поддерживается множественные GPU.", - "zh": "尚不支持多 GPU 训练。", - }, "err_tool_name": { "en": "Tool name not found.", "ru": "Имя инструмента не найдено.", diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 57595a08..1310b999 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -6,10 +6,9 @@ from typing import TYPE_CHECKING, Any, Dict, Generator, Optional import psutil from transformers.trainer import TRAINING_ARGS_NAME -from transformers.utils import is_torch_cuda_available from ..extras.constants import TRAINING_STAGES -from ..extras.misc import get_device_count, torch_gc +from ..extras.misc import is_gpu_or_npu_available, torch_gc from ..extras.packages import is_gradio_available from .common import get_module, get_save_dir, load_args, load_config, save_args from .locales import ALERTS @@ -64,16 +63,13 @@ class Runner: if not from_preview and self.demo_mode: return ALERTS["err_demo"][lang] - if not from_preview and get_device_count() > 1: - return ALERTS["err_device_count"][lang] - if do_train: stage = TRAINING_STAGES[get("train.training_stage")] reward_model = get("train.reward_model") if stage == "ppo" and not reward_model: return ALERTS["err_no_reward_model"][lang] - if not from_preview and not is_torch_cuda_available(): + if not from_preview and not is_gpu_or_npu_available(): gr.Warning(ALERTS["warn_no_cuda"][lang]) return "" @@ -273,7 +269,6 @@ class Runner: self.do_train, self.running_data = do_train, data args = self._parse_train_args(data) if do_train else self._parse_eval_args(data) env = deepcopy(os.environ) - env["CUDA_VISIBLE_DEVICES"] = os.environ.get("CUDA_VISIBLE_DEVICES", "0") env["LLAMABOARD_ENABLED"] = "1" self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True) yield from self.monitor() diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index 3d34f0d2..ceeb9352 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -42,8 +42,7 @@ def clean_cmd(args: Dict[str, Any]) -> Dict[str, Any]: def gen_cmd(args: Dict[str, Any]) -> str: - current_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "0") - cmd_lines = ["CUDA_VISIBLE_DEVICES={} llamafactory-cli train ".format(current_devices)] + cmd_lines = ["llamafactory-cli train "] for k, v in clean_cmd(args).items(): cmd_lines.append(" --{} {} ".format(k, str(v))) From 89776097bc9d8bd626d450c1a22c3e0c51e4dacb Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 28 May 2024 19:35:52 +0800 Subject: [PATCH 018/162] update readme Former-commit-id: bc861f76706df3f643028f1dfc8ec2044b067a08 --- README.md | 7 +++---- README_zh.md | 5 ++--- src/llamafactory/cli.py | 3 +-- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index b9225a94..fdc77777 100644 --- a/README.md +++ b/README.md @@ -395,9 +395,6 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr ### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio)) -> [!IMPORTANT] -> LLaMA Board GUI only supports training on a single GPU. - #### Use local environment ```bash @@ -451,7 +448,7 @@ export USE_MODELSCOPE_HUB=1 # `set USE_MODELSCOPE_HUB=1` for Windows Train the model by specifying a model ID of the ModelScope Hub as the `model_name_or_path`. You can find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models), e.g., `LLM-Research/Meta-Llama-3-8B-Instruct`. -### Use W&B Logging +### Use W&B Logger To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments. @@ -460,6 +457,8 @@ report_to: wandb run_name: test_run # optional ``` +Set `WANDB_API_KEY` to [your key](https://wandb.ai/authorize) when launching training tasks to log in with your W&B account. + ## Projects using LLaMA Factory If you have a project that should be incorporated, please contact via email or create a pull request. diff --git a/README_zh.md b/README_zh.md index 0f47b88e..65a87705 100644 --- a/README_zh.md +++ b/README_zh.md @@ -395,9 +395,6 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s ### LLaMA Board 可视化微调(由 [Gradio](https://github.com/gradio-app/gradio) 驱动) -> [!IMPORTANT] -> LLaMA Board 可视化界面目前仅支持单 GPU 训练。 - #### 使用本地环境 ```bash @@ -458,6 +455,8 @@ report_to: wandb run_name: test_run # 可选 ``` +在启动训练任务时,将 `WANDB_API_KEY` 设置为[密钥](https://wandb.ai/authorize)来登录 W&B 账户。 + ## 使用了 LLaMA Factory 的项目 如果您有项目希望添加至下述列表,请通过邮件联系或者创建一个 PR。 diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index 26975f3c..a74445a6 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -4,8 +4,7 @@ import subprocess import sys from enum import Enum, unique -from llamafactory import launcher - +from . import launcher from .api.app import run_api from .chat.chat_model import run_chat from .eval.evaluator import run_eval From fdfb5e548583593e505fa698d94a513b3265772d Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 28 May 2024 21:27:27 +0800 Subject: [PATCH 019/162] bump vllm version to 0.4.1 Former-commit-id: a00fd39a4c2f270620711f2bfbad8d460fb4aa89 --- setup.py | 2 +- src/llamafactory/hparams/parser.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 4d948450..b44e83ac 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ extra_require = { "metrics": ["nltk", "jieba", "rouge-chinese"], "deepspeed": ["deepspeed>=0.10.0,<=0.14.0"], "bitsandbytes": ["bitsandbytes>=0.39.0"], - "vllm": ["vllm>=0.4.0"], + "vllm": ["vllm>=0.4.1"], "galore": ["galore-torch"], "badam": ["badam"], "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"], diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index c6869e4c..adb206f8 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -6,6 +6,7 @@ from typing import Any, Dict, Optional, Tuple import torch import transformers from transformers import HfArgumentParser, Seq2SeqTrainingArguments +from transformers.integrations import is_deepspeed_zero3_enabled from transformers.trainer_utils import get_last_checkpoint from transformers.utils import is_torch_bf16_gpu_available from transformers.utils.versions import require_version @@ -63,6 +64,9 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin if model_args.adapter_name_or_path is not None and finetuning_args.finetuning_type != "lora": raise ValueError("Adapter is only valid for the LoRA method.") + if model_args.use_unsloth and is_deepspeed_zero3_enabled(): + raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.") + if model_args.quantization_bit is not None: if finetuning_args.finetuning_type != "lora": raise ValueError("Quantization is only compatible with the LoRA method.") @@ -89,7 +93,7 @@ def _check_extra_dependencies( require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6") if model_args.infer_backend == "vllm": - require_version("vllm>=0.4.0", "To fix: pip install vllm>=0.4.0") + require_version("vllm>=0.4.1", "To fix: pip install vllm>=0.4.1") if finetuning_args.use_galore: require_version("galore_torch", "To fix: pip install galore_torch") From e15389be7d393b2fadbf49dac9890a01b5b3120c Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 28 May 2024 21:43:26 +0800 Subject: [PATCH 020/162] clean kto trainer Former-commit-id: 76402bd78cbd3a99a544f0ac019468b569b0e1d1 --- src/llamafactory/train/kto/trainer.py | 95 ++++++++++++++------------- 1 file changed, 50 insertions(+), 45 deletions(-) diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 096fd935..292e61c7 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -1,7 +1,7 @@ from collections import defaultdict from contextlib import nullcontext from types import MethodType -from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple, Union import torch from transformers import Trainer @@ -101,42 +101,39 @@ class CustomKTOTrainer(KTOTrainer): return -all_logps def forward( - self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"] - ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]: - with torch.no_grad(): - kl_model_inputs = {"input_ids": batch["kl_input_ids"], "attention_mask": batch["kl_attention_mask"]} - if "pixel_values" in batch: - kl_model_inputs["pixel_values"] = batch["pixel_values"] - - if "kl_token_type_ids" in batch: - kl_model_inputs["token_type_ids"] = batch["kl_token_type_ids"] - - kl_logits = model(**kl_model_inputs, return_dict=True, use_cache=False).logits.to(torch.float32) - - model_inputs = {"input_ids": batch["input_ids"], "attention_mask": batch["attention_mask"]} + self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"], prefix: Literal["", "kl_"] = "" + ) -> Tuple["torch.Tensor", "torch.Tensor"]: + r""" + Runs forward pass and computes the log probabilities. + """ + batch = {k: v.detach().clone() for k, v in batch.items()} # avoid error + model_inputs = { + "input_ids": batch["{}input_ids".format(prefix)], + "attention_mask": batch["{}attention_mask".format(prefix)], + } if "pixel_values" in batch: model_inputs["pixel_values"] = batch["pixel_values"] - if "token_type_ids" in batch: - model_inputs["token_type_ids"] = batch["token_type_ids"] + if "{}token_type_ids".format(prefix) in batch: + model_inputs["token_type_ids"] = batch["{}token_type_ids".format(prefix)] - target_logits = model(**model_inputs, return_dict=True, use_cache=False).logits.to(torch.float32) + logits = model(**model_inputs, return_dict=True, use_cache=False).logits.to(torch.float32) - target_logps = self.get_batch_logps( - logits=target_logits, - labels=batch["labels"], + logps = self.get_batch_logps( + logits=logits, + labels=batch["{}labels".format(prefix)], average_log_prob=False, is_encoder_decoder=self.is_encoder_decoder, label_pad_token_id=self.label_pad_token_id, ) + return logits, logps - kl_logps = self.get_batch_logps( - logits=kl_logits, - labels=batch["kl_labels"], - average_log_prob=False, - is_encoder_decoder=self.is_encoder_decoder, - label_pad_token_id=self.label_pad_token_id, - ) + def concatenated_forward( + self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"] + ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]: + target_logits, target_logps = self.forward(model, batch) + with torch.no_grad(): + _, kl_logps = self.forward(model, batch, prefix="kl_") if len(target_logps) != len(batch["kto_tags"]): raise ValueError("Mismatched shape of inputs and labels.") @@ -152,6 +149,30 @@ class CustomKTOTrainer(KTOTrainer): return chosen_logps, rejected_logps, chosen_logits, rejected_logits, kl_logps + def compute_reference_log_probs( + self, batch: Dict[str, "torch.Tensor"] + ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor"]: + r""" + Computes log probabilities of the reference model. + """ + if self.ref_model is None: + ref_model = self.model + ref_context = self.accelerator.unwrap_model(self.model).disable_adapter() + else: + ref_model = self.ref_model + ref_context = nullcontext() + + with torch.no_grad(), ref_context: + ( + reference_chosen_logps, + reference_rejected_logps, + _, + _, + reference_kl_logps, + ) = self.concatenated_forward(ref_model, batch) + + return reference_chosen_logps, reference_rejected_logps, reference_kl_logps + def get_batch_loss_metrics( self, model: "PreTrainedModel", @@ -167,25 +188,9 @@ class CustomKTOTrainer(KTOTrainer): policy_chosen_logits, _, policy_kl_logps, - ) = self.forward(model, batch) - - with torch.no_grad(): - if self.ref_model is None: - ref_model = self.model - ref_context = self.accelerator.unwrap_model(self.model).disable_adapter() - else: - ref_model = self.ref_model - ref_context = nullcontext() - - with ref_context: - ( - reference_chosen_logps, - reference_rejected_logps, - _, - _, - reference_kl_logps, - ) = self.forward(ref_model, batch) + ) = self.concatenated_forward(model, batch) + reference_chosen_logps, reference_rejected_logps, reference_kl_logps = self.compute_reference_log_probs(batch) losses, chosen_rewards, rejected_rewards, kl = self.kto_loss( policy_chosen_logps, policy_rejected_logps, From 0de2ab5d163368adc728b3465441bf10bdc3901a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 29 May 2024 00:14:29 +0800 Subject: [PATCH 021/162] update dpo, kto trainer Former-commit-id: 4a6cc3c7046f8b27d05ea53ef216bab6fa7ebfaf --- src/llamafactory/train/dpo/trainer.py | 10 +++++----- src/llamafactory/train/kto/trainer.py | 12 +++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index f3c2443c..542335a3 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -7,7 +7,7 @@ import torch import torch.nn.functional as F from transformers import Trainer from trl import DPOTrainer -from trl.trainer.utils import disable_dropout_in_model +from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX from ..utils import create_custom_optimzer, create_custom_scheduler @@ -179,7 +179,7 @@ class CustomDPOTrainer(DPOTrainer): return chosen_logps, rejected_logps, chosen_logits, rejected_logits def compute_reference_log_probs( - self, batch: Dict[str, "torch.Tensor"] + self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"] ) -> Tuple[Optional["torch.Tensor"], Optional["torch.Tensor"]]: r""" Computes log probabilities of the reference model. @@ -188,8 +188,8 @@ class CustomDPOTrainer(DPOTrainer): return None, None if self.ref_model is None: - ref_model = self.model - ref_context = self.accelerator.unwrap_model(self.model).disable_adapter() + ref_model = model + ref_context = self.accelerator.unwrap_model(model).disable_adapter() else: ref_model = self.ref_model ref_context = nullcontext() @@ -221,7 +221,7 @@ class CustomDPOTrainer(DPOTrainer): policy_rejected_logits, ) = self.concatenated_forward(model, batch) - reference_chosen_logps, reference_rejected_logps = self.compute_reference_log_probs(batch) + reference_chosen_logps, reference_rejected_logps = self.compute_reference_log_probs(model, batch) losses, chosen_rewards, rejected_rewards = self.compute_preference_loss( policy_chosen_logps, policy_rejected_logps, diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 292e61c7..82ae722d 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple, Union import torch from transformers import Trainer from trl import KTOTrainer -from trl.trainer.utils import disable_dropout_in_model +from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX from ..utils import create_custom_optimzer, create_custom_scheduler @@ -150,14 +150,14 @@ class CustomKTOTrainer(KTOTrainer): return chosen_logps, rejected_logps, chosen_logits, rejected_logits, kl_logps def compute_reference_log_probs( - self, batch: Dict[str, "torch.Tensor"] + self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"] ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor"]: r""" Computes log probabilities of the reference model. """ if self.ref_model is None: - ref_model = self.model - ref_context = self.accelerator.unwrap_model(self.model).disable_adapter() + ref_model = model + ref_context = self.accelerator.unwrap_model(model).disable_adapter() else: ref_model = self.ref_model ref_context = nullcontext() @@ -190,7 +190,9 @@ class CustomKTOTrainer(KTOTrainer): policy_kl_logps, ) = self.concatenated_forward(model, batch) - reference_chosen_logps, reference_rejected_logps, reference_kl_logps = self.compute_reference_log_probs(batch) + reference_chosen_logps, reference_rejected_logps, reference_kl_logps = self.compute_reference_log_probs( + model, batch + ) losses, chosen_rewards, rejected_rewards, kl = self.kto_loss( policy_chosen_logps, policy_rejected_logps, From 3e80365646e83d687b72b2b50cbb7e63b3794f15 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 29 May 2024 00:23:23 +0800 Subject: [PATCH 022/162] 10x generate in ppo w/ zero3 https://github.com/huggingface/trl/pull/1483 Former-commit-id: 5dc43ba8b373d8803bc22d88b3d0d95ef8b9c7f8 --- src/llamafactory/train/ppo/trainer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 985664b7..27353c72 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -13,6 +13,7 @@ from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR from transformers.utils import SAFE_WEIGHTS_NAME, WEIGHTS_NAME from trl import PPOConfig, PPOTrainer from trl.core import PPODecorators, logprobs_from_logits +from trl.models.utils import unwrap_model_for_generation from ...extras.callbacks import FixValueHeadModelCallback, LogCallback from ...extras.logging import get_logger @@ -322,10 +323,10 @@ class CustomPPOTrainer(PPOTrainer, Trainer): for k, v in batch.items(): batch[k] = v[:, start_index:] - unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model) - generate_output: torch.Tensor = unwrapped_model.generate( - generation_config=self.generation_config, logits_processor=get_logits_processor(), **batch - ) + with unwrap_model_for_generation(self.model, self.accelerator) as unwrapped_model: + generate_output: torch.Tensor = unwrapped_model.generate( + generation_config=self.generation_config, logits_processor=get_logits_processor(), **batch + ) if self.model_args.upcast_layernorm: restore_layernorm(self.model, layernorm_params) From 21dbc483dea84da0868cc83e5294e67c33d76eb0 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 29 May 2024 01:13:17 +0800 Subject: [PATCH 023/162] add ds config to webui Former-commit-id: 66d72b263d36dc81de9f6152077663b613035977 --- src/llamafactory/webui/components/train.py | 12 ++++- src/llamafactory/webui/engine.py | 4 +- src/llamafactory/webui/locales.py | 42 +++++++++++++++ src/llamafactory/webui/runner.py | 8 ++- src/llamafactory/webui/utils.py | 61 ++++++++++++++++++++++ 5 files changed, 123 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index d399106f..8db5c2ba 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Dict from transformers.trainer_utils import SchedulerType from ...extras.constants import TRAINING_STAGES +from ...extras.misc import get_device_count from ...extras.packages import is_gradio_available from ..common import DEFAULT_DATA_DIR, autoset_packing, list_adapters, list_dataset from ..components.data import create_preview_box @@ -258,6 +259,11 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: output_dir = gr.Textbox() config_path = gr.Textbox() + with gr.Row(): + device_count = gr.Textbox(value=str(get_device_count()), interactive=False) + ds_stage = gr.Dropdown(choices=["none", "2", "3"], value="none") + ds_offload = gr.Checkbox() + with gr.Row(): resume_btn = gr.Checkbox(visible=False, interactive=False) progress_bar = gr.Slider(visible=False, interactive=False) @@ -268,6 +274,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: with gr.Column(scale=1): loss_viewer = gr.Plot() + input_elems.update({output_dir, config_path, device_count, ds_stage, ds_offload}) elem_dict.update( dict( cmd_preview_btn=cmd_preview_btn, @@ -277,14 +284,15 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: stop_btn=stop_btn, output_dir=output_dir, config_path=config_path, + device_count=device_count, + ds_stage=ds_stage, + ds_offload=ds_offload, resume_btn=resume_btn, progress_bar=progress_bar, output_box=output_box, loss_viewer=loss_viewer, ) ) - - input_elems.update({output_dir, config_path}) output_elems = [output_box, progress_bar, loss_viewer] cmd_preview_btn.click(engine.runner.preview_train, input_elems, output_elems, concurrency_limit=None) diff --git a/src/llamafactory/webui/engine.py b/src/llamafactory/webui/engine.py index 964d65a2..fb568737 100644 --- a/src/llamafactory/webui/engine.py +++ b/src/llamafactory/webui/engine.py @@ -5,7 +5,7 @@ from .common import get_model_path, list_dataset, load_config from .locales import LOCALES from .manager import Manager from .runner import Runner -from .utils import get_time +from .utils import get_time, save_ds_config if TYPE_CHECKING: @@ -19,6 +19,8 @@ class Engine: self.manager = Manager() self.runner = Runner(self.manager, demo_mode) self.chatter = WebChatModel(self.manager, demo_mode, lazy_init=(not pure_chat)) + if not demo_mode: + save_ds_config() def _update_component(self, input_dict: Dict[str, Dict[str, Any]]) -> Dict["Component", "Component"]: r""" diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index 570a8b42..4657f9a3 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -1103,6 +1103,48 @@ LOCALES = { "info": "保存训练参数的配置文件路径。", }, }, + "device_count": { + "en": { + "label": "Device count", + "info": "Number of devices available.", + }, + "ru": { + "label": "Количество устройств", + "info": "Количество доступных устройств.", + }, + "zh": { + "label": "设备数量", + "info": "当前可用的运算设备数。", + }, + }, + "ds_stage": { + "en": { + "label": "DeepSpeed stage", + "info": "DeepSpeed stage for distributed training.", + }, + "ru": { + "label": "Этап DeepSpeed", + "info": "Этап DeepSpeed для распределенного обучения.", + }, + "zh": { + "label": "DeepSpeed stage", + "info": "多卡训练的 DeepSpeed stage。", + }, + }, + "ds_offload": { + "en": { + "label": "Enable offload", + "info": "Enable DeepSpeed offload (slow down training).", + }, + "ru": { + "label": "Включить выгрузку", + "info": "включить выгрузку DeepSpeed (замедлит обучение).", + }, + "zh": { + "label": "使用 offload", + "info": "使用 DeepSpeed offload(会减慢速度)。", + }, + }, "output_box": { "en": { "value": "Ready.", diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 1310b999..c2e46e97 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -10,7 +10,7 @@ from transformers.trainer import TRAINING_ARGS_NAME from ..extras.constants import TRAINING_STAGES from ..extras.misc import is_gpu_or_npu_available, torch_gc from ..extras.packages import is_gradio_available -from .common import get_module, get_save_dir, load_args, load_config, save_args +from .common import DEFAULT_CACHE_DIR, get_module, get_save_dir, load_args, load_config, save_args from .locales import ALERTS from .utils import gen_cmd, get_eval_results, get_trainer_info, save_cmd @@ -201,6 +201,12 @@ class Runner: args["eval_steps"] = args["save_steps"] args["per_device_eval_batch_size"] = args["per_device_train_batch_size"] + # ds config + if get("train.ds_stage") != "none": + ds_stage = get("train.ds_stage") + ds_offload = "offload_" if get("train.ds_offload") else "" + args["deepspeed"] = os.path.join(DEFAULT_CACHE_DIR, "ds_z{}_{}config.json".format(ds_stage, ds_offload)) + return args def _parse_eval_args(self, data: Dict["Component", Any]) -> Dict[str, Any]: diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index ceeb9352..654d1f8d 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -8,6 +8,7 @@ from yaml import safe_dump from ..extras.constants import RUNNING_LOG, TRAINER_CONFIG, TRAINER_LOG from ..extras.packages import is_gradio_available, is_matplotlib_available from ..extras.ploting import gen_loss_plot +from .common import DEFAULT_CACHE_DIR from .locales import ALERTS @@ -103,3 +104,63 @@ def save_cmd(args: Dict[str, Any]) -> str: safe_dump(clean_cmd(args), f) return os.path.join(output_dir, TRAINER_CONFIG) + + +def save_ds_config() -> None: + os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True) + ds_config = { + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "zero_allow_untested_optimizer": True, + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1, + }, + "bf16": {"enabled": "auto"}, + } + offload_config = { + "device": "cpu", + "pin_memory": True, + } + ds_config["zero_optimization"] = { + "stage": 2, + "allgather_partitions": True, + "allgather_bucket_size": 5e8, + "overlap_comm": True, + "reduce_scatter": True, + "reduce_bucket_size": 5e8, + "contiguous_gradients": True, + "round_robin_gradients": True, + } + with open(os.path.join(DEFAULT_CACHE_DIR, "ds_z2_config.json"), "w", encoding="utf-8") as f: + json.dump(ds_config, f, indent=2) + + ds_config["zero_optimization"]["offload_optimizer"] = offload_config + with open(os.path.join(DEFAULT_CACHE_DIR, "ds_z2_offload_config.json"), "w", encoding="utf-8") as f: + json.dump(ds_config, f, indent=2) + + ds_config["zero_optimization"] = { + "stage": 3, + "overlap_comm": True, + "contiguous_gradients": True, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1e9, + "stage3_max_reuse_distance": 1e9, + "stage3_gather_16bit_weights_on_model_save": True, + } + with open(os.path.join(DEFAULT_CACHE_DIR, "ds_z3_config.json"), "w", encoding="utf-8") as f: + json.dump(ds_config, f, indent=2) + + ds_config["zero_optimization"]["offload_optimizer"] = offload_config + ds_config["zero_optimization"]["offload_param"] = offload_config + with open(os.path.join(DEFAULT_CACHE_DIR, "ds_z3_offload_config.json"), "w", encoding="utf-8") as f: + json.dump(ds_config, f, indent=2) From 51139c5fc4a1e9f0148a65416265870c8ee385ce Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 29 May 2024 01:20:07 +0800 Subject: [PATCH 024/162] fix hf chat engine Former-commit-id: 76ce52911690ab0dd8ffa5587127afb4ec942abe --- src/llamafactory/chat/hf_engine.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py index 2148f8cd..ad0e90fe 100644 --- a/src/llamafactory/chat/hf_engine.py +++ b/src/llamafactory/chat/hf_engine.py @@ -8,6 +8,7 @@ import torch from transformers import GenerationConfig, TextIteratorStreamer from ..data import get_template_and_fix_tokenizer +from ..extras.logging import get_logger from ..extras.misc import get_logits_processor from ..model import load_model, load_tokenizer from .base_engine import BaseEngine, Response @@ -23,6 +24,9 @@ if TYPE_CHECKING: from ..hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments +logger = get_logger(__name__) + + class HuggingfaceEngine(BaseEngine): def __init__( self, @@ -92,7 +96,7 @@ class HuggingfaceEngine(BaseEngine): stop: Optional[Union[str, List[str]]] = input_kwargs.pop("stop", None) if stop is not None: - raise ValueError("Stop parameter is not supported in Huggingface engine yet.") + logger.warning("Stop parameter is not supported in Huggingface engine yet.") generating_args = generating_args.copy() generating_args.update( From 8bd3c0bae2dec9d13ed8f2d7b3a9f87a2a6a9c82 Mon Sep 17 00:00:00 2001 From: hzhaoy Date: Wed, 29 May 2024 15:00:37 +0800 Subject: [PATCH 025/162] add TeleChat-12B/TeleChat-12B-v2 models Former-commit-id: e0675385c88af03aaef8d51586c8a282829c4051 --- src/llamafactory/data/template.py | 10 ++++++++++ src/llamafactory/extras/constants.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index f13b303b..1a9f08ff 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -918,3 +918,13 @@ _register_template( format_user=StringFormatter(slots=[":{{content}}\n:"]), format_separator=EmptyFormatter(slots=["\n"]), ) + + +_register_template( + name="telechat", + format_user=StringFormatter(slots=["<_user>{{content}}<_bot>"]), + format_system=StringFormatter(slots=["<_system>{{content}}<_end>"]), + default_system="", + stop_words=["<_end>"], + replace_eos=True, +) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index d1fcd4f6..bf2388ab 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -1355,3 +1355,19 @@ register_model_group( }, template="zephyr", ) + + +register_model_group( + models={ + "TeleChat-12B-Chat": { + DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B", + DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B", + }, + "TeleChat-12B-v2-Chat": { + DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2", + DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B-v2", + }, + }, + module='query,key_value', + template="telechat", +) From ecada193581d0be9fe471104daf532a37e50b819 Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Wed, 29 May 2024 09:53:31 +0000 Subject: [PATCH 026/162] update cann kernels url Former-commit-id: 23c65e9d7e8817b5815264e44cbf4a7bcb88d3d7 --- README.md | 2 +- README_zh.md | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ca70110f..14a8f4a2 100644 --- a/README.md +++ b/README.md @@ -359,7 +359,7 @@ chmod +x Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run ./Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install # install CANN Kernels -wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C18B800TP015/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run +wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run chmod +x Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run ./Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install diff --git a/README_zh.md b/README_zh.md index 4f659588..66a4c031 100644 --- a/README_zh.md +++ b/README_zh.md @@ -359,7 +359,7 @@ chmod +x Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run ./Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install # 安装 CANN Kernels -wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C18B800TP015/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run +wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run chmod +x Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run ./Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install diff --git a/setup.py b/setup.py index 87620c37..f72eefb6 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ extra_require = { "qwen": ["tiktoken", "transformers_stream_generator"], "modelscope": ["modelscope"], "quality": ["ruff"], - "torch_npu": ["torch==2.2.0", "torch_npu==2.2.0", "decorator"] + "torch_npu": ["torch==2.1.0", "torch_npu==2.1.0", "decorator"] } From c21590ca5ec5aa302d902e02816d3ebf8702af02 Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Wed, 29 May 2024 10:05:11 +0000 Subject: [PATCH 027/162] update torch-npu version Former-commit-id: a70d7fcf2967eb30280a1fb845b39db7878f535c --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f72eefb6..06e6b750 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ extra_require = { "qwen": ["tiktoken", "transformers_stream_generator"], "modelscope": ["modelscope"], "quality": ["ruff"], - "torch_npu": ["torch==2.1.0", "torch_npu==2.1.0", "decorator"] + "torch_npu": ["torch==2.1.0", "torch_npu==2.1.0.post3", "decorator"] } From 0c722c879a90599783b7627ad02f660e487c6ff9 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 29 May 2024 18:39:11 +0800 Subject: [PATCH 028/162] update readme Former-commit-id: 440e9de66986ef7736361ce8ec3e23ce68655a56 --- .readthedocs.yaml | 19 +++++++++++++++ README.md | 26 +++++++++----------- README_zh.md | 28 ++++++++++------------ examples/accelerate/fsdp_config.yaml | 8 +++---- setup.py | 2 +- src/llamafactory/data/template.py | 19 +++++++-------- src/llamafactory/extras/constants.py | 36 +++++++++++++++------------- 7 files changed, 76 insertions(+), 62 deletions(-) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..3a9eaea1 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,19 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.8" + +sphinx: + configuration: docs/source/conf.py + +formats: + - pdf + +python: + install: + - requirements: docs/requirements-docs.txt diff --git a/README.md b/README.md index 3e5cdb91..4ca6d1ec 100644 --- a/README.md +++ b/README.md @@ -330,7 +330,7 @@ cd LLaMA-Factory pip install -e .[torch,metrics] ``` -Extra dependencies available: torch, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality +Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality > [!TIP] > Use `pip install --no-deps -e .` to resolve package conflicts. @@ -351,32 +351,28 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec Join [NPU user group](assets/wechat_npu.jpg). -Use `pip install -e .[torch_npu]` to install LLaMA-Factory with **[torch-npu](https://gitee.com/ascend/pytorch)** library. - -To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. You can follow chapter **[install CANN](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)** in the installation tutorial to install CANN Toolkit and the kernels, or use the fast installation as following: +To install LLaMA Factory on Ascend NPU devices, please specify extra dependencies: `pip install -e .[torch_npu,metrics]`. Additionally, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. Please follow the [installation tutorial](https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/softwareinstall/instg/atlasdeploy_03_0031.html) or use the following commands: ```bash -# replace the url according to your choice +# replace the url according to your CANN version and devices # install CANN Toolkit wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run -chmod +x Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run -./Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install +bash Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install # install CANN Kernels wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run -chmod +x Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run -./Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install +bash Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install # set env variables source /usr/local/Ascend/ascend-toolkit/set_env.sh ``` -| Requirement | Minimum | Recommend | -| ------------ | ------- | --------- | -| CANN | 8.0.RC1 | 8.0.RC1 | -| torch | 2.2.0 | 2.2.0 | -| torch-npu | 2.2.0 | 2.2.0 | -| deepspeed | 0.13.2 | 0.13.2 | +| Requirement | Minimum | Recommend | +| ------------ | ------- | ----------- | +| CANN | 8.0.RC1 | 8.0.RC1 | +| torch | 2.1.0 | 2.1.0 | +| torch-npu | 2.1.0 | 2.1.0.post3 | +| deepspeed | 0.13.2 | 0.13.2 | Docker image: diff --git a/README_zh.md b/README_zh.md index b50d52ef..70938e38 100644 --- a/README_zh.md +++ b/README_zh.md @@ -330,7 +330,7 @@ cd LLaMA-Factory pip install -e .[torch,metrics] ``` -可选的额外依赖项:torch、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality +可选的额外依赖项:torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality > [!TIP] > 遇到包冲突时,可使用 `pip install --no-deps -e .` 解决。 @@ -351,39 +351,35 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl 加入 [NPU 用户群](assets/wechat_npu.jpg)。 -使用 `pip install -e .[torch_npu]` 来安装带有 **[torch-npu](https://gitee.com/ascend/pytorch)** 的 LLaMA-Factory。 - -如果使用昇腾 NPU 设备进行(分布式)训练或推理,需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。可以参考安装教程中的**[安装 CANN](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)**章节来进行安装,或使用以下快速安装指令: +在昇腾 NPU 设备上安装 LLaMA Factory 时,需要指定额外依赖项,使用 `pip install -e .[torch_npu,metrics]` 命令安装。此外,还需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**,安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令: ```bash -# 请替换 URL 为你需要的 CANN 版本对应的 URL +# 请替换 URL 为 CANN 版本和设备型号对应的 URL # 安装 CANN Toolkit wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run -chmod +x Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run -./Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install +bash Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install # 安装 CANN Kernels wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run -chmod +x Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run -./Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install +bash Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install # 设置环境变量 source /usr/local/Ascend/ascend-toolkit/set_env.sh ``` -| 依赖项 | 至少 | 推荐 | -| ------------ | ------- | --------- | -| CANN | 8.0.RC1 | 8.0.RC1 | -| torch | 2.2.0 | 2.2.0 | -| torch-npu | 2.2.0 | 2.2.0 | -| deepspeed | 0.13.2 | 0.13.2 | +| 依赖项 | 至少 | 推荐 | +| ------------ | ------- | ----------- | +| CANN | 8.0.RC1 | 8.0.RC1 | +| torch | 2.1.0 | 2.1.0 | +| torch-npu | 2.1.0 | 2.1.0.post3 | +| deepspeed | 0.13.2 | 0.13.2 | Docker 镜像: - 32GB:[下载地址](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html) - 64GB:[下载地址](http://mirrors.cn-central-221.ovaijisuan.com/detail/131.html) -请记得使用 `ASCEND_RT_VISIBLE_DEVICES` 而非 `CUDA_VISIBLE_DEVICES` 来指定您使用的设备。 +请使用 `ASCEND_RT_VISIBLE_DEVICES` 而非 `CUDA_VISIBLE_DEVICES` 来指定运算设备。 如果遇到无法正常推理的情况,请尝试设置 `do_sample: false`。 diff --git a/examples/accelerate/fsdp_config.yaml b/examples/accelerate/fsdp_config.yaml index 60025597..cd65e074 100644 --- a/examples/accelerate/fsdp_config.yaml +++ b/examples/accelerate/fsdp_config.yaml @@ -5,16 +5,16 @@ downcast_bf16: 'no' fsdp_config: fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP fsdp_backward_prefetch: BACKWARD_PRE - fsdp_cpu_ram_efficient_loading: true fsdp_forward_prefetch: false - fsdp_offload_params: true + fsdp_cpu_ram_efficient_loading: true + fsdp_offload_params: true # offload may affect training speed fsdp_sharding_strategy: FULL_SHARD fsdp_state_dict_type: FULL_STATE_DICT fsdp_sync_module_states: true - fsdp_use_orig_params: false + fsdp_use_orig_params: true machine_rank: 0 main_training_function: main -mixed_precision: fp16 +mixed_precision: fp16 # or bf16 num_machines: 1 # the number of nodes num_processes: 2 # the number of GPUs in all nodes rdzv_backend: static diff --git a/setup.py b/setup.py index 45268d6e..45e73343 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,7 @@ def get_requires(): extra_require = { "torch": ["torch>=1.13.1"], + "torch_npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"], "metrics": ["nltk", "jieba", "rouge-chinese"], "deepspeed": ["deepspeed>=0.10.0,<=0.14.0"], "bitsandbytes": ["bitsandbytes>=0.39.0"], @@ -33,7 +34,6 @@ extra_require = { "qwen": ["tiktoken", "transformers_stream_generator"], "modelscope": ["modelscope"], "quality": ["ruff"], - "torch_npu": ["torch==2.1.0", "torch_npu==2.1.0.post3", "decorator"] } diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 1a9f08ff..35866e31 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -822,6 +822,15 @@ _register_template( ) +_register_template( + name="telechat", + format_user=StringFormatter(slots=["<_user>{{content}}<_bot>"]), + format_system=StringFormatter(slots=["<_system>{{content}}<_end>"]), + stop_words=["<_end>"], + replace_eos=True, +) + + _register_template( name="vicuna", format_user=StringFormatter(slots=["USER: {{content}} ASSISTANT:"]), @@ -918,13 +927,3 @@ _register_template( format_user=StringFormatter(slots=[":{{content}}\n:"]), format_separator=EmptyFormatter(slots=["\n"]), ) - - -_register_template( - name="telechat", - format_user=StringFormatter(slots=["<_user>{{content}}<_bot>"]), - format_system=StringFormatter(slots=["<_system>{{content}}<_end>"]), - default_system="", - stop_words=["<_end>"], - replace_eos=True, -) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index bf2388ab..5e2ee3ce 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -1095,6 +1095,26 @@ register_model_group( ) +register_model_group( + models={ + "TeleChat-7B-Chat": { + DownloadSource.DEFAULT: "Tele-AI/telechat-7B", + DownloadSource.MODELSCOPE: "TeleAI/telechat-7B", + }, + "TeleChat-12B-Chat": { + DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B", + DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B", + }, + "TeleChat-12B-v2-Chat": { + DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2", + DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B-v2", + }, + }, + module="query,key_value", + template="telechat", +) + + register_model_group( models={ "Vicuna1.5-7B-Chat": { @@ -1355,19 +1375,3 @@ register_model_group( }, template="zephyr", ) - - -register_model_group( - models={ - "TeleChat-12B-Chat": { - DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B", - DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B", - }, - "TeleChat-12B-v2-Chat": { - DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2", - DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B-v2", - }, - }, - module='query,key_value', - template="telechat", -) From a61ee00736c8031c798b7e64677ec6a0c4f945fa Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 29 May 2024 20:55:51 +0800 Subject: [PATCH 029/162] fix #3965 Former-commit-id: 37d15ac55d0be0ff47d6a88f07e2d823117a4a36 --- src/llamafactory/data/template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 35866e31..a117b689 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -544,7 +544,7 @@ _register_template( ) ] ), - format_system=EmptyFormatter(slots=[{"bos_token"}]), + format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), force_system=True, ) From 50940fe9126de429666edcdcc03764f76bca495b Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 29 May 2024 20:58:23 +0800 Subject: [PATCH 030/162] fix cohere system Former-commit-id: 5d629b29e705c8ff8dd4521719d9c0e67a3fe0a2 --- src/llamafactory/data/template.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index a117b689..00527b44 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -544,8 +544,13 @@ _register_template( ) ] ), - format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), - force_system=True, + format_system=StringFormatter( + slots=[{"bos_token"}, "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>"] + ), + default_system=( + "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users " + "by providing thorough responses. You are trained by Cohere." + ), ) From 219a16130a12bb523ddb78104db1732191c1f550 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 29 May 2024 23:55:38 +0800 Subject: [PATCH 031/162] better llamaboard * easily resume from checkpoint * support full and freeze checkpoints * faster ui Former-commit-id: 84cfb2452cc86b037ccddee6e833f8eb7c129fa4 --- src/llamafactory/__init__.py | 2 +- src/llamafactory/extras/constants.py | 21 +++- src/llamafactory/hparams/parser.py | 13 ++- src/llamafactory/webui/chatter.py | 33 +++--- src/llamafactory/webui/common.py | 109 +++++++++++++------- src/llamafactory/webui/components/eval.py | 4 +- src/llamafactory/webui/components/export.py | 29 +++--- src/llamafactory/webui/components/top.py | 23 ++--- src/llamafactory/webui/components/train.py | 33 +++--- src/llamafactory/webui/engine.py | 17 ++- src/llamafactory/webui/locales.py | 24 ++--- src/llamafactory/webui/manager.py | 2 +- src/llamafactory/webui/runner.py | 79 +++++++------- src/llamafactory/webui/utils.py | 107 ++++++++++++++++--- 14 files changed, 303 insertions(+), 193 deletions(-) diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index b889e268..78230937 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -1,4 +1,4 @@ -# Level: api, webui > chat, eval, train > data, model > extras, hparams +# Level: api, webui > chat, eval, train > data, model > hparams > extras from .cli import VERSION diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 5e2ee3ce..f365016f 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -2,6 +2,19 @@ from collections import OrderedDict, defaultdict from enum import Enum from typing import Dict, Optional +from peft.utils import SAFETENSORS_WEIGHTS_NAME as SAFE_ADAPTER_WEIGHTS_NAME +from peft.utils import WEIGHTS_NAME as ADAPTER_WEIGHTS_NAME +from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME + + +CHECKPOINT_NAMES = { + SAFE_ADAPTER_WEIGHTS_NAME, + ADAPTER_WEIGHTS_NAME, + SAFE_WEIGHTS_INDEX_NAME, + SAFE_WEIGHTS_NAME, + WEIGHTS_INDEX_NAME, + WEIGHTS_NAME, +} CHOICES = ["A", "B", "C", "D"] @@ -26,9 +39,9 @@ LAYERNORM_NAMES = {"norm", "ln"} METHODS = ["full", "freeze", "lora"] -MOD_SUPPORTED_MODELS = ["bloom", "falcon", "gemma", "llama", "mistral", "mixtral", "phi", "starcoder2"] +MOD_SUPPORTED_MODELS = {"bloom", "falcon", "gemma", "llama", "mistral", "mixtral", "phi", "starcoder2"} -PEFT_METHODS = ["lora"] +PEFT_METHODS = {"lora"} RUNNING_LOG = "running_log.txt" @@ -49,9 +62,9 @@ TRAINING_STAGES = { "Pre-Training": "pt", } -STAGES_USE_PAIR_DATA = ["rm", "dpo", "orpo"] +STAGES_USE_PAIR_DATA = {"rm", "dpo"} -SUPPORTED_CLASS_FOR_S2ATTN = ["llama"] +SUPPORTED_CLASS_FOR_S2ATTN = {"llama"} V_HEAD_WEIGHTS_NAME = "value_head.bin" diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index adb206f8..b3c673be 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -11,6 +11,7 @@ from transformers.trainer_utils import get_last_checkpoint from transformers.utils import is_torch_bf16_gpu_available from transformers.utils.versions import require_version +from ..extras.constants import CHECKPOINT_NAMES from ..extras.logging import get_logger from ..extras.misc import check_dependencies, get_current_device from .data_args import DataArguments @@ -255,13 +256,15 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: and can_resume_from_checkpoint ): last_checkpoint = get_last_checkpoint(training_args.output_dir) + if last_checkpoint is None and any( + os.path.isfile(os.path.join(training_args.output_dir, name)) for name in CHECKPOINT_NAMES + ): + raise ValueError("Output directory already exists and is not empty. Please set `overwrite_output_dir`.") + if last_checkpoint is not None: training_args.resume_from_checkpoint = last_checkpoint - logger.info( - "Resuming training from {}. Change `output_dir` or use `overwrite_output_dir` to avoid.".format( - training_args.resume_from_checkpoint - ) - ) + logger.info("Resuming training from {}.".format(training_args.resume_from_checkpoint)) + logger.info("Change `output_dir` or use `overwrite_output_dir` to avoid.") if ( finetuning_args.stage in ["rm", "ppo"] diff --git a/src/llamafactory/webui/chatter.py b/src/llamafactory/webui/chatter.py index a92f6ef7..c82710d3 100644 --- a/src/llamafactory/webui/chatter.py +++ b/src/llamafactory/webui/chatter.py @@ -6,6 +6,7 @@ from numpy.typing import NDArray from ..chat import ChatModel from ..data import Role +from ..extras.constants import PEFT_METHODS from ..extras.misc import torch_gc from ..extras.packages import is_gradio_available from .common import get_save_dir @@ -44,13 +45,14 @@ class WebChatModel(ChatModel): def load_model(self, data) -> Generator[str, None, None]: get = lambda elem_id: data[self.manager.get_elem_by_id(elem_id)] - lang = get("top.lang") + lang, model_name, model_path = get("top.lang"), get("top.model_name"), get("top.model_path") + finetuning_type, checkpoint_path = get("top.finetuning_type"), get("top.checkpoint_path") error = "" if self.loaded: error = ALERTS["err_exists"][lang] - elif not get("top.model_name"): + elif not model_name: error = ALERTS["err_no_model"][lang] - elif not get("top.model_path"): + elif not model_path: error = ALERTS["err_no_path"][lang] elif self.demo_mode: error = ALERTS["err_demo"][lang] @@ -60,21 +62,10 @@ class WebChatModel(ChatModel): yield error return - if get("top.adapter_path"): - adapter_name_or_path = ",".join( - [ - get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter) - for adapter in get("top.adapter_path") - ] - ) - else: - adapter_name_or_path = None - yield ALERTS["info_loading"][lang] args = dict( - model_name_or_path=get("top.model_path"), - adapter_name_or_path=adapter_name_or_path, - finetuning_type=get("top.finetuning_type"), + model_name_or_path=model_path, + finetuning_type=finetuning_type, quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None, template=get("top.template"), flash_attn="fa2" if get("top.booster") == "flashattn2" else "auto", @@ -83,8 +74,16 @@ class WebChatModel(ChatModel): rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None, infer_backend=get("infer.infer_backend"), ) - super().__init__(args) + if checkpoint_path: + if finetuning_type in PEFT_METHODS: # list + args["adapter_name_or_path"] = ",".join( + [get_save_dir(model_name, finetuning_type, adapter) for adapter in checkpoint_path] + ) + else: # str + args["model_name_or_path"] = get_save_dir(model_name, finetuning_type, checkpoint_path) + + super().__init__(args) yield ALERTS["info_loaded"][lang] def unload_model(self, data) -> Generator[str, None, None]: diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index ea82fd88..62004bce 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -1,12 +1,12 @@ import json import os from collections import defaultdict -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Tuple -from peft.utils import SAFETENSORS_WEIGHTS_NAME, WEIGHTS_NAME from yaml import safe_dump, safe_load from ..extras.constants import ( + CHECKPOINT_NAMES, DATA_CONFIG, DEFAULT_MODULE, DEFAULT_TEMPLATE, @@ -29,7 +29,6 @@ if is_gradio_available(): logger = get_logger(__name__) -ADAPTER_NAMES = {WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME} DEFAULT_CACHE_DIR = "cache" DEFAULT_CONFIG_DIR = "config" DEFAULT_DATA_DIR = "data" @@ -38,19 +37,31 @@ USER_CONFIG = "user_config.yaml" def get_save_dir(*paths: str) -> os.PathLike: + r""" + Gets the path to saved model checkpoints. + """ paths = (path.replace(os.path.sep, "").replace(" ", "").strip() for path in paths) return os.path.join(DEFAULT_SAVE_DIR, *paths) def get_config_path() -> os.PathLike: + r""" + Gets the path to user config. + """ return os.path.join(DEFAULT_CACHE_DIR, USER_CONFIG) -def get_save_path(config_path: str) -> os.PathLike: +def get_arg_save_path(config_path: str) -> os.PathLike: + r""" + Gets the path to saved arguments. + """ return os.path.join(DEFAULT_CONFIG_DIR, config_path) def load_config() -> Dict[str, Any]: + r""" + Loads user config if exists. + """ try: with open(get_config_path(), "r", encoding="utf-8") as f: return safe_load(f) @@ -59,6 +70,9 @@ def load_config() -> Dict[str, Any]: def save_config(lang: str, model_name: Optional[str] = None, model_path: Optional[str] = None) -> None: + r""" + Saves user config. + """ os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True) user_config = load_config() user_config["lang"] = lang or user_config["lang"] @@ -69,23 +83,10 @@ def save_config(lang: str, model_name: Optional[str] = None, model_path: Optiona safe_dump(user_config, f) -def load_args(config_path: str) -> Optional[Dict[str, Any]]: - try: - with open(get_save_path(config_path), "r", encoding="utf-8") as f: - return safe_load(f) - except Exception: - return None - - -def save_args(config_path: str, config_dict: Dict[str, Any]) -> str: - os.makedirs(DEFAULT_CONFIG_DIR, exist_ok=True) - with open(get_save_path(config_path), "w", encoding="utf-8") as f: - safe_dump(config_dict, f) - - return str(get_save_path(config_path)) - - -def get_model_path(model_name: str) -> str: +def get_model_path(model_name: str) -> Optional[str]: + r""" + Gets the model path according to the model name. + """ user_config = load_config() path_dict: Dict[DownloadSource, str] = SUPPORTED_MODELS.get(model_name, defaultdict(str)) model_path = user_config["path_dict"].get(model_name, None) or path_dict.get(DownloadSource.DEFAULT, None) @@ -99,40 +100,71 @@ def get_model_path(model_name: str) -> str: def get_prefix(model_name: str) -> str: + r""" + Gets the prefix of the model name to obtain the model family. + """ return model_name.split("-")[0] +def get_model_info(model_name: str) -> Tuple[str, str, bool]: + r""" + Gets the necessary information of this model. + + Returns: + model_path (str) + template (str) + visual (bool) + """ + return get_model_path(model_name), get_template(model_name), get_visual(model_name) + + def get_module(model_name: str) -> str: - return DEFAULT_MODULE.get(get_prefix(model_name), "q_proj,v_proj") + r""" + Gets the LoRA modules of this model. + """ + return DEFAULT_MODULE.get(get_prefix(model_name), "all") def get_template(model_name: str) -> str: + r""" + Gets the template name if the model is a chat model. + """ if model_name and model_name.endswith("Chat") and get_prefix(model_name) in DEFAULT_TEMPLATE: return DEFAULT_TEMPLATE[get_prefix(model_name)] return "default" def get_visual(model_name: str) -> bool: + r""" + Judges if the model is a vision language model. + """ return get_prefix(model_name) in VISION_MODELS -def list_adapters(model_name: str, finetuning_type: str) -> "gr.Dropdown": - if finetuning_type not in PEFT_METHODS: - return gr.Dropdown(value=[], choices=[], interactive=False) - - adapters = [] - if model_name and finetuning_type == "lora": +def list_checkpoints(model_name: str, finetuning_type: str) -> "gr.Dropdown": + r""" + Lists all available checkpoints. + """ + checkpoints = [] + if model_name: save_dir = get_save_dir(model_name, finetuning_type) if save_dir and os.path.isdir(save_dir): - for adapter in os.listdir(save_dir): - if os.path.isdir(os.path.join(save_dir, adapter)) and any( - os.path.isfile(os.path.join(save_dir, adapter, name)) for name in ADAPTER_NAMES + for checkpoint in os.listdir(save_dir): + if os.path.isdir(os.path.join(save_dir, checkpoint)) and any( + os.path.isfile(os.path.join(save_dir, checkpoint, name)) for name in CHECKPOINT_NAMES ): - adapters.append(adapter) - return gr.Dropdown(value=[], choices=adapters, interactive=True) + checkpoints.append(checkpoint) + + if finetuning_type in PEFT_METHODS: + return gr.Dropdown(value=[], choices=checkpoints, multiselect=True) + else: + return gr.Dropdown(value=None, choices=checkpoints, multiselect=False) def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]: + r""" + Loads dataset_info.json. + """ if dataset_dir == "ONLINE": logger.info("dataset_dir is ONLINE, using online dataset.") return {} @@ -145,12 +177,11 @@ def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]: return {} -def list_dataset(dataset_dir: str = None, training_stage: str = list(TRAINING_STAGES.keys())[0]) -> "gr.Dropdown": +def list_datasets(dataset_dir: str = None, training_stage: str = list(TRAINING_STAGES.keys())[0]) -> "gr.Dropdown": + r""" + Lists all available datasets in the dataset dir for the training stage. + """ dataset_info = load_dataset_info(dataset_dir if dataset_dir is not None else DEFAULT_DATA_DIR) ranking = TRAINING_STAGES[training_stage] in STAGES_USE_PAIR_DATA datasets = [k for k, v in dataset_info.items() if v.get("ranking", False) == ranking] - return gr.Dropdown(value=[], choices=datasets) - - -def autoset_packing(training_stage: str = list(TRAINING_STAGES.keys())[0]) -> "gr.Button": - return gr.Button(value=(TRAINING_STAGES[training_stage] == "pt")) + return gr.Dropdown(choices=datasets) diff --git a/src/llamafactory/webui/components/eval.py b/src/llamafactory/webui/components/eval.py index 8b70283b..99215fc2 100644 --- a/src/llamafactory/webui/components/eval.py +++ b/src/llamafactory/webui/components/eval.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING, Dict from ...extras.packages import is_gradio_available -from ..common import DEFAULT_DATA_DIR, list_dataset +from ..common import DEFAULT_DATA_DIR, list_datasets from .data import create_preview_box @@ -74,6 +74,6 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]: stop_btn.click(engine.runner.set_abort) resume_btn.change(engine.runner.monitor, outputs=output_elems, concurrency_limit=None) - dataset_dir.change(list_dataset, [dataset_dir], [dataset], queue=False) + dataset.focus(list_datasets, [dataset_dir], [dataset], queue=False) return elem_dict diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py index 134b77e0..2f354011 100644 --- a/src/llamafactory/webui/components/export.py +++ b/src/llamafactory/webui/components/export.py @@ -1,5 +1,6 @@ -from typing import TYPE_CHECKING, Dict, Generator, List +from typing import TYPE_CHECKING, Dict, Generator, List, Union +from ...extras.constants import PEFT_METHODS from ...extras.misc import torch_gc from ...extras.packages import is_gradio_available from ...train.tuner import export_model @@ -24,8 +25,8 @@ def save_model( lang: str, model_name: str, model_path: str, - adapter_path: List[str], finetuning_type: str, + checkpoint_path: Union[str, List[str]], template: str, visual_inputs: bool, export_size: int, @@ -45,9 +46,9 @@ def save_model( error = ALERTS["err_no_export_dir"][lang] elif export_quantization_bit in GPTQ_BITS and not export_quantization_dataset: error = ALERTS["err_no_dataset"][lang] - elif export_quantization_bit not in GPTQ_BITS and not adapter_path: + elif export_quantization_bit not in GPTQ_BITS and not checkpoint_path: error = ALERTS["err_no_adapter"][lang] - elif export_quantization_bit in GPTQ_BITS and adapter_path: + elif export_quantization_bit in GPTQ_BITS and isinstance(checkpoint_path, list): error = ALERTS["err_gptq_lora"][lang] if error: @@ -55,16 +56,8 @@ def save_model( yield error return - if adapter_path: - adapter_name_or_path = ",".join( - [get_save_dir(model_name, finetuning_type, adapter) for adapter in adapter_path] - ) - else: - adapter_name_or_path = None - args = dict( model_name_or_path=model_path, - adapter_name_or_path=adapter_name_or_path, finetuning_type=finetuning_type, template=template, visual_inputs=visual_inputs, @@ -77,6 +70,14 @@ def save_model( export_legacy_format=export_legacy_format, ) + if checkpoint_path: + if finetuning_type in PEFT_METHODS: # list + args["adapter_name_or_path"] = ",".join( + [get_save_dir(model_name, finetuning_type, adapter) for adapter in checkpoint_path] + ) + else: # str + args["model_name_or_path"] = get_save_dir(model_name, finetuning_type, checkpoint_path) + yield ALERTS["info_exporting"][lang] export_model(args) torch_gc() @@ -86,7 +87,7 @@ def save_model( def create_export_tab(engine: "Engine") -> Dict[str, "Component"]: with gr.Row(): export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1) - export_quantization_bit = gr.Dropdown(choices=["none", "8", "4", "3", "2"], value="none") + export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none") export_quantization_dataset = gr.Textbox(value="data/c4_demo.json") export_device = gr.Radio(choices=["cpu", "cuda"], value="cpu") export_legacy_format = gr.Checkbox() @@ -104,8 +105,8 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]: engine.manager.get_elem_by_id("top.lang"), engine.manager.get_elem_by_id("top.model_name"), engine.manager.get_elem_by_id("top.model_path"), - engine.manager.get_elem_by_id("top.adapter_path"), engine.manager.get_elem_by_id("top.finetuning_type"), + engine.manager.get_elem_by_id("top.checkpoint_path"), engine.manager.get_elem_by_id("top.template"), engine.manager.get_elem_by_id("top.visual_inputs"), export_size, diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index a75a4d62..ca093584 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Dict from ...data import templates from ...extras.constants import METHODS, SUPPORTED_MODELS from ...extras.packages import is_gradio_available -from ..common import get_model_path, get_template, get_visual, list_adapters, save_config +from ..common import get_model_info, list_checkpoints, save_config from ..utils import can_quantize @@ -25,8 +25,7 @@ def create_top() -> Dict[str, "Component"]: with gr.Row(): finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1) - adapter_path = gr.Dropdown(multiselect=True, allow_custom_value=True, scale=5) - refresh_btn = gr.Button(scale=1) + checkpoint_path = gr.Dropdown(multiselect=True, allow_custom_value=True, scale=6) with gr.Accordion(open=False) as advanced_tab: with gr.Row(): @@ -36,27 +35,17 @@ def create_top() -> Dict[str, "Component"]: booster = gr.Radio(choices=["none", "flashattn2", "unsloth"], value="none", scale=3) visual_inputs = gr.Checkbox(scale=1) - model_name.change(list_adapters, [model_name, finetuning_type], [adapter_path], queue=False).then( - get_model_path, [model_name], [model_path], queue=False - ).then(get_template, [model_name], [template], queue=False).then( - get_visual, [model_name], [visual_inputs], queue=False - ) # do not save config since the below line will save - + model_name.change(get_model_info, [model_name], [model_path, template, visual_inputs], queue=False) model_path.change(save_config, inputs=[lang, model_name, model_path], queue=False) - - finetuning_type.change(list_adapters, [model_name, finetuning_type], [adapter_path], queue=False).then( - can_quantize, [finetuning_type], [quantization_bit], queue=False - ) - - refresh_btn.click(list_adapters, [model_name, finetuning_type], [adapter_path], queue=False) + finetuning_type.change(can_quantize, [finetuning_type], [quantization_bit], queue=False) + checkpoint_path.focus(list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False) return dict( lang=lang, model_name=model_name, model_path=model_path, finetuning_type=finetuning_type, - adapter_path=adapter_path, - refresh_btn=refresh_btn, + checkpoint_path=checkpoint_path, advanced_tab=advanced_tab, quantization_bit=quantization_bit, template=template, diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 8db5c2ba..6f742bb1 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -5,8 +5,9 @@ from transformers.trainer_utils import SchedulerType from ...extras.constants import TRAINING_STAGES from ...extras.misc import get_device_count from ...extras.packages import is_gradio_available -from ..common import DEFAULT_DATA_DIR, autoset_packing, list_adapters, list_dataset -from ..components.data import create_preview_box +from ..common import DEFAULT_DATA_DIR, list_checkpoints, list_datasets +from ..utils import change_stage, check_output_dir, list_output_dirs +from .data import create_preview_box if is_gradio_available(): @@ -256,11 +257,12 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: with gr.Row(): with gr.Column(scale=3): with gr.Row(): - output_dir = gr.Textbox() + initial_dir = gr.Textbox(visible=False, interactive=False) + output_dir = gr.Dropdown(allow_custom_value=True) config_path = gr.Textbox() with gr.Row(): - device_count = gr.Textbox(value=str(get_device_count()), interactive=False) + device_count = gr.Textbox(value=str(get_device_count() or 1), interactive=False) ds_stage = gr.Dropdown(choices=["none", "2", "3"], value="none") ds_offload = gr.Checkbox() @@ -282,6 +284,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: arg_load_btn=arg_load_btn, start_btn=start_btn, stop_btn=stop_btn, + initial_dir=initial_dir, output_dir=output_dir, config_path=config_path, device_count=device_count, @@ -295,24 +298,24 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: ) output_elems = [output_box, progress_bar, loss_viewer] + lang = engine.manager.get_elem_by_id("top.lang") + model_name = engine.manager.get_elem_by_id("top.model_name") + finetuning_type = engine.manager.get_elem_by_id("top.finetuning_type") + cmd_preview_btn.click(engine.runner.preview_train, input_elems, output_elems, concurrency_limit=None) arg_save_btn.click(engine.runner.save_args, input_elems, output_elems, concurrency_limit=None) arg_load_btn.click( - engine.runner.load_args, - [engine.manager.get_elem_by_id("top.lang"), config_path], - list(input_elems) + [output_box], - concurrency_limit=None, + engine.runner.load_args, [lang, config_path], list(input_elems) + [output_box], concurrency_limit=None ) start_btn.click(engine.runner.run_train, input_elems, output_elems) stop_btn.click(engine.runner.set_abort) resume_btn.change(engine.runner.monitor, outputs=output_elems, concurrency_limit=None) - dataset_dir.change(list_dataset, [dataset_dir, training_stage], [dataset], queue=False) - training_stage.change(list_dataset, [dataset_dir, training_stage], [dataset], queue=False).then( - list_adapters, - [engine.manager.get_elem_by_id("top.model_name"), engine.manager.get_elem_by_id("top.finetuning_type")], - [reward_model], - queue=False, - ).then(autoset_packing, [training_stage], [packing], queue=False) + training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False) + dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False) + reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False) + output_dir.change( + list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], concurrency_limit=None + ).then(check_output_dir, inputs=[lang, model_name, finetuning_type, output_dir], concurrency_limit=None) return elem_dict diff --git a/src/llamafactory/webui/engine.py b/src/llamafactory/webui/engine.py index fb568737..00877115 100644 --- a/src/llamafactory/webui/engine.py +++ b/src/llamafactory/webui/engine.py @@ -1,11 +1,11 @@ from typing import TYPE_CHECKING, Any, Dict from .chatter import WebChatModel -from .common import get_model_path, list_dataset, load_config +from .common import load_config from .locales import LOCALES from .manager import Manager from .runner import Runner -from .utils import get_time, save_ds_config +from .utils import create_ds_config, get_time if TYPE_CHECKING: @@ -20,7 +20,7 @@ class Engine: self.runner = Runner(self.manager, demo_mode) self.chatter = WebChatModel(self.manager, demo_mode, lazy_init=(not pure_chat)) if not demo_mode: - save_ds_config() + create_ds_config() def _update_component(self, input_dict: Dict[str, Dict[str, Any]]) -> Dict["Component", "Component"]: r""" @@ -40,16 +40,15 @@ class Engine: init_dict = {"top.lang": {"value": lang}, "infer.chat_box": {"visible": self.chatter.loaded}} if not self.pure_chat: - init_dict["train.dataset"] = {"choices": list_dataset().choices} - init_dict["eval.dataset"] = {"choices": list_dataset().choices} - init_dict["train.output_dir"] = {"value": "train_{}".format(get_time())} - init_dict["train.config_path"] = {"value": "{}.yaml".format(get_time())} - init_dict["eval.output_dir"] = {"value": "eval_{}".format(get_time())} + current_time = get_time() + init_dict["train.initial_dir"] = {"value": "train_{}".format(current_time)} + init_dict["train.output_dir"] = {"value": "train_{}".format(current_time)} + init_dict["train.config_path"] = {"value": "{}.yaml".format(current_time)} + init_dict["eval.output_dir"] = {"value": "eval_{}".format(current_time)} init_dict["infer.image_box"] = {"visible": False} if user_config.get("last_model", None): init_dict["top.model_name"] = {"value": user_config["last_model"]} - init_dict["top.model_path"] = {"value": get_model_path(user_config["last_model"])} yield self._update_component(init_dict) diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index 4657f9a3..5b11c853 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -46,26 +46,15 @@ LOCALES = { "label": "微调方法", }, }, - "adapter_path": { + "checkpoint_path": { "en": { - "label": "Adapter path", + "label": "Checkpoint path", }, "ru": { - "label": "Путь к адаптеру", + "label": "Путь контрольной точки", }, "zh": { - "label": "适配器路径", - }, - }, - "refresh_btn": { - "en": { - "value": "Refresh adapters", - }, - "ru": { - "value": "Обновить адаптеры", - }, - "zh": { - "value": "刷新适配器", + "label": "检查点路径", }, }, "advanced_tab": { @@ -1531,6 +1520,11 @@ ALERTS = { "ru": "Среда CUDA не обнаружена.", "zh": "未检测到 CUDA 环境。", }, + "warn_output_dir_exists": { + "en": "Output dir already exists, will resume training from here.", + "ru": "Выходной каталог уже существует, обучение будет продолжено отсюда.", + "zh": "输出目录已存在,将从该断点恢复训练。", + }, "info_aborting": { "en": "Aborted, wait for terminating...", "ru": "Прервано, ожидание завершения...", diff --git a/src/llamafactory/webui/manager.py b/src/llamafactory/webui/manager.py index f65fa804..326fdb8d 100644 --- a/src/llamafactory/webui/manager.py +++ b/src/llamafactory/webui/manager.py @@ -55,7 +55,7 @@ class Manager: self._id_to_elem["top.model_name"], self._id_to_elem["top.model_path"], self._id_to_elem["top.finetuning_type"], - self._id_to_elem["top.adapter_path"], + self._id_to_elem["top.checkpoint_path"], self._id_to_elem["top.quantization_bit"], self._id_to_elem["top.template"], self._id_to_elem["top.rope_scaling"], diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index c2e46e97..7a305d62 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -7,12 +7,12 @@ from typing import TYPE_CHECKING, Any, Dict, Generator, Optional import psutil from transformers.trainer import TRAINING_ARGS_NAME -from ..extras.constants import TRAINING_STAGES +from ..extras.constants import PEFT_METHODS, TRAINING_STAGES from ..extras.misc import is_gpu_or_npu_available, torch_gc from ..extras.packages import is_gradio_available -from .common import DEFAULT_CACHE_DIR, get_module, get_save_dir, load_args, load_config, save_args +from .common import DEFAULT_CACHE_DIR, get_module, get_save_dir, load_config from .locales import ALERTS -from .utils import gen_cmd, get_eval_results, get_trainer_info, save_cmd +from .utils import gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd if is_gradio_available(): @@ -85,26 +85,16 @@ class Runner: def _parse_train_args(self, data: Dict["Component", Any]) -> Dict[str, Any]: get = lambda elem_id: data[self.manager.get_elem_by_id(elem_id)] + model_name, finetuning_type = get("top.model_name"), get("top.finetuning_type") user_config = load_config() - if get("top.adapter_path"): - adapter_name_or_path = ",".join( - [ - get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter) - for adapter in get("top.adapter_path") - ] - ) - else: - adapter_name_or_path = None - args = dict( stage=TRAINING_STAGES[get("train.training_stage")], do_train=True, model_name_or_path=get("top.model_path"), - adapter_name_or_path=adapter_name_or_path, cache_dir=user_config.get("cache_dir", None), preprocessing_num_workers=16, - finetuning_type=get("top.finetuning_type"), + finetuning_type=finetuning_type, quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None, template=get("top.template"), rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None, @@ -134,13 +124,23 @@ class Runner: report_to="all" if get("train.report_to") else "none", use_galore=get("train.use_galore"), use_badam=get("train.use_badam"), - output_dir=get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("train.output_dir")), + output_dir=get_save_dir(model_name, finetuning_type, get("train.output_dir")), fp16=(get("train.compute_type") == "fp16"), bf16=(get("train.compute_type") == "bf16"), pure_bf16=(get("train.compute_type") == "pure_bf16"), plot_loss=True, + ddp_timeout=180000000, ) + # checkpoints + if get("top.checkpoint_path"): + if finetuning_type in PEFT_METHODS: # list + args["adapter_name_or_path"] = ",".join( + [get_save_dir(model_name, finetuning_type, adapter) for adapter in get("top.checkpoint_path")] + ) + else: # str + args["model_name_or_path"] = get_save_dir(model_name, finetuning_type, get("top.checkpoint_path")) + # freeze config if args["finetuning_type"] == "freeze": args["freeze_trainable_layers"] = get("train.freeze_trainable_layers") @@ -156,7 +156,7 @@ class Runner: args["create_new_adapter"] = get("train.create_new_adapter") args["use_rslora"] = get("train.use_rslora") args["use_dora"] = get("train.use_dora") - args["lora_target"] = get("train.lora_target") or get_module(get("top.model_name")) + args["lora_target"] = get("train.lora_target") or get_module(model_name) args["additional_target"] = get("train.additional_target") or None if args["use_llama_pro"]: @@ -164,13 +164,14 @@ class Runner: # rlhf config if args["stage"] == "ppo": - args["reward_model"] = ",".join( - [ - get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter) - for adapter in get("train.reward_model") - ] - ) - args["reward_model_type"] = "lora" if args["finetuning_type"] == "lora" else "full" + if finetuning_type in PEFT_METHODS: + args["reward_model"] = ",".join( + [get_save_dir(model_name, finetuning_type, adapter) for adapter in get("train.reward_model")] + ) + else: + args["reward_model"] = get_save_dir(model_name, finetuning_type, get("train.reward_model")) + + args["reward_model_type"] = "lora" if finetuning_type == "lora" else "full" args["ppo_score_norm"] = get("train.ppo_score_norm") args["ppo_whiten_rewards"] = get("train.ppo_whiten_rewards") args["top_k"] = 0 @@ -211,25 +212,15 @@ class Runner: def _parse_eval_args(self, data: Dict["Component", Any]) -> Dict[str, Any]: get = lambda elem_id: data[self.manager.get_elem_by_id(elem_id)] + model_name, finetuning_type = get("top.model_name"), get("top.finetuning_type") user_config = load_config() - if get("top.adapter_path"): - adapter_name_or_path = ",".join( - [ - get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter) - for adapter in get("top.adapter_path") - ] - ) - else: - adapter_name_or_path = None - args = dict( stage="sft", model_name_or_path=get("top.model_path"), - adapter_name_or_path=adapter_name_or_path, cache_dir=user_config.get("cache_dir", None), preprocessing_num_workers=16, - finetuning_type=get("top.finetuning_type"), + finetuning_type=finetuning_type, quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None, template=get("top.template"), rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None, @@ -245,7 +236,7 @@ class Runner: max_new_tokens=get("eval.max_new_tokens"), top_p=get("eval.top_p"), temperature=get("eval.temperature"), - output_dir=get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("eval.output_dir")), + output_dir=get_save_dir(model_name, finetuning_type, get("eval.output_dir")), ) if get("eval.predict"): @@ -253,6 +244,14 @@ class Runner: else: args["do_eval"] = True + if get("top.checkpoint_path"): + if finetuning_type in PEFT_METHODS: # list + args["adapter_name_or_path"] = ",".join( + [get_save_dir(model_name, finetuning_type, adapter) for adapter in get("top.checkpoint_path")] + ) + else: # str + args["model_name_or_path"] = get_save_dir(model_name, finetuning_type, get("top.checkpoint_path")) + return args def _preview(self, data: Dict["Component", Any], do_train: bool) -> Generator[Dict["Component", str], None, None]: @@ -296,9 +295,7 @@ class Runner: self.running = True get = lambda elem_id: self.running_data[self.manager.get_elem_by_id(elem_id)] - lang = get("top.lang") - model_name = get("top.model_name") - finetuning_type = get("top.finetuning_type") + lang, model_name, finetuning_type = get("top.lang"), get("top.model_name"), get("top.finetuning_type") output_dir = get("{}.output_dir".format("train" if self.do_train else "eval")) output_path = get_save_dir(model_name, finetuning_type, output_dir) @@ -356,7 +353,7 @@ class Runner: config_dict: Dict[str, Any] = {} lang = data[self.manager.get_elem_by_id("top.lang")] config_path = data[self.manager.get_elem_by_id("train.config_path")] - skip_ids = ["top.lang", "top.model_path", "train.output_dir", "train.config_path"] + skip_ids = ["top.lang", "top.model_path", "train.output_dir", "train.config_path", "train.device_count"] for elem, value in data.items(): elem_id = self.manager.get_id_by_elem(elem) if elem_id not in skip_ids: diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index 654d1f8d..09cefa0e 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -3,12 +3,13 @@ import os from datetime import datetime from typing import Any, Dict, List, Optional, Tuple -from yaml import safe_dump +from transformers.trainer_utils import get_last_checkpoint +from yaml import safe_dump, safe_load -from ..extras.constants import RUNNING_LOG, TRAINER_CONFIG, TRAINER_LOG +from ..extras.constants import PEFT_METHODS, RUNNING_LOG, TRAINER_CONFIG, TRAINER_LOG, TRAINING_STAGES from ..extras.packages import is_gradio_available, is_matplotlib_available from ..extras.ploting import gen_loss_plot -from .common import DEFAULT_CACHE_DIR +from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_arg_save_path, get_save_dir from .locales import ALERTS @@ -17,13 +18,26 @@ if is_gradio_available(): def can_quantize(finetuning_type: str) -> "gr.Dropdown": - if finetuning_type != "lora": + r""" + Judges if the quantization is available in this finetuning type. + """ + if finetuning_type not in PEFT_METHODS: return gr.Dropdown(value="none", interactive=False) else: return gr.Dropdown(interactive=True) +def change_stage(training_stage: str = list(TRAINING_STAGES.keys())[0]) -> Tuple[List[str], bool]: + r""" + Modifys states after changing the training stage. + """ + return [], TRAINING_STAGES[training_stage] == "pt" + + def check_json_schema(text: str, lang: str) -> None: + r""" + Checks if the json schema is valid. + """ try: tools = json.loads(text) if tools: @@ -38,11 +52,17 @@ def check_json_schema(text: str, lang: str) -> None: def clean_cmd(args: Dict[str, Any]) -> Dict[str, Any]: + r""" + Removes args with NoneType or False or empty string value. + """ no_skip_keys = ["packing"] return {k: v for k, v in args.items() if (k in no_skip_keys) or (v is not None and v is not False and v != "")} def gen_cmd(args: Dict[str, Any]) -> str: + r""" + Generates arguments for previewing. + """ cmd_lines = ["llamafactory-cli train "] for k, v in clean_cmd(args).items(): cmd_lines.append(" --{} {} ".format(k, str(v))) @@ -52,17 +72,39 @@ def gen_cmd(args: Dict[str, Any]) -> str: return cmd_text +def save_cmd(args: Dict[str, Any]) -> str: + r""" + Saves arguments to launch training. + """ + output_dir = args["output_dir"] + os.makedirs(output_dir, exist_ok=True) + + with open(os.path.join(output_dir, TRAINER_CONFIG), "w", encoding="utf-8") as f: + safe_dump(clean_cmd(args), f) + + return os.path.join(output_dir, TRAINER_CONFIG) + + def get_eval_results(path: os.PathLike) -> str: + r""" + Gets scores after evaluation. + """ with open(path, "r", encoding="utf-8") as f: result = json.dumps(json.load(f), indent=4) return "```json\n{}\n```\n".format(result) def get_time() -> str: + r""" + Gets current date and time. + """ return datetime.now().strftime(r"%Y-%m-%d-%H-%M-%S") def get_trainer_info(output_path: os.PathLike, do_train: bool) -> Tuple[str, "gr.Slider", Optional["gr.Plot"]]: + r""" + Gets training infomation for monitor. + """ running_log = "" running_progress = gr.Slider(visible=False) running_loss = None @@ -96,17 +138,56 @@ def get_trainer_info(output_path: os.PathLike, do_train: bool) -> Tuple[str, "gr return running_log, running_progress, running_loss -def save_cmd(args: Dict[str, Any]) -> str: - output_dir = args["output_dir"] - os.makedirs(output_dir, exist_ok=True) - - with open(os.path.join(output_dir, TRAINER_CONFIG), "w", encoding="utf-8") as f: - safe_dump(clean_cmd(args), f) - - return os.path.join(output_dir, TRAINER_CONFIG) +def load_args(config_path: str) -> Optional[Dict[str, Any]]: + r""" + Loads saved arguments. + """ + try: + with open(get_arg_save_path(config_path), "r", encoding="utf-8") as f: + return safe_load(f) + except Exception: + return None -def save_ds_config() -> None: +def save_args(config_path: str, config_dict: Dict[str, Any]) -> str: + r""" + Saves arguments. + """ + os.makedirs(DEFAULT_CONFIG_DIR, exist_ok=True) + with open(get_arg_save_path(config_path), "w", encoding="utf-8") as f: + safe_dump(config_dict, f) + + return str(get_arg_save_path(config_path)) + + +def list_output_dirs(model_name: str, finetuning_type: str, initial_dir: str) -> "gr.Dropdown": + r""" + Lists all the directories that can resume from. + """ + output_dirs = [initial_dir] + if model_name: + save_dir = get_save_dir(model_name, finetuning_type) + if save_dir and os.path.isdir(save_dir): + for folder in os.listdir(save_dir): + output_dir = os.path.join(save_dir, folder) + if os.path.isdir(output_dir) and get_last_checkpoint(output_dir) is not None: + output_dirs.append(folder) + + return gr.Dropdown(choices=output_dirs) + + +def check_output_dir(lang: str, model_name: str, finetuning_type: str, output_dir: str) -> None: + r""" + Check if output dir exists. + """ + if os.path.isdir(get_save_dir(model_name, finetuning_type, output_dir)): + gr.Warning(ALERTS["warn_output_dir_exists"][lang]) + + +def create_ds_config() -> None: + r""" + Creates deepspeed config. + """ os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True) ds_config = { "train_batch_size": "auto", From 04dce0079e927f54091b21a75fa21c7a78194579 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 30 May 2024 00:04:26 +0800 Subject: [PATCH 032/162] Update README.md Former-commit-id: 65fb69e388c0a04c15ecd11441e567966f51fae5 --- data/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/README.md b/data/README.md index dd7ca201..5ceae666 100644 --- a/data/README.md +++ b/data/README.md @@ -12,6 +12,7 @@ Currently we support datasets in **alpaca** and **sharegpt** format. "ranking": "whether the dataset is a preference dataset or not. (default: False)", "subset": "the name of the subset. (optional, default: None)", "folder": "the name of the folder of the dataset repository on the Hugging Face hub. (optional, default: None)", + "num_samples": "the number of samples in the dataset used for training. (optional, default: None)", "columns (optional)": { "prompt": "the column name in the dataset containing the prompts. (default: instruction)", "query": "the column name in the dataset containing the queries. (default: input)", @@ -32,9 +33,8 @@ Currently we support datasets in **alpaca** and **sharegpt** format. "assistant_tag": "the value of the role_tag represents the assistant. (default: gpt)", "observation_tag": "the value of the role_tag represents the tool results. (default: observation)", "function_tag": "the value of the role_tag represents the function call. (default: function_call)", - "system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)", - }, - "sample_num": "the number of samples from this dataset can be greater than the total amount of the dataset. (default: None)" + "system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)" + } } ``` From 9256750add96407d2bcb5cbe954c1cb651054e81 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 30 May 2024 00:04:47 +0800 Subject: [PATCH 033/162] Update README_zh.md Former-commit-id: 3007d260ed45169583a74497a53b661337dd5f71 --- data/README_zh.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/README_zh.md b/data/README_zh.md index 1427e48d..1795f352 100644 --- a/data/README_zh.md +++ b/data/README_zh.md @@ -12,6 +12,7 @@ "ranking": "是否为偏好数据集(可选,默认:False)", "subset": "数据集子集的名称(可选,默认:None)", "folder": "Hugging Face 仓库的文件夹名称(可选,默认:None)", + "num_samples": "该数据集中用于训练的样本数量。(可选,默认:None)", "columns(可选)": { "prompt": "数据集代表提示词的表头名称(默认:instruction)", "query": "数据集代表请求的表头名称(默认:input)", @@ -32,9 +33,8 @@ "assistant_tag": "消息中代表助手的 role_tag(默认:gpt)", "observation_tag": "消息中代表工具返回结果的 role_tag(默认:observation)", "function_tag": "消息中代表工具调用的 role_tag(默认:function_call)", - "system_tag": "消息中代表系统提示的 role_tag(默认:system,会覆盖 system 列)" - }, - "sample_num": "从该数据集采样的数量,可大于该数据集总量(默认:None)" + "system_tag": "消息中代表系统提示的 role_tag(默认:system,会覆盖 system column)" + } } ``` From b221962ccf38f38aa6056ee273f02d4dff8e379f Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 30 May 2024 00:05:20 +0800 Subject: [PATCH 034/162] Update parser.py Former-commit-id: 310cc11e8c83f16fc5bccc349c38fea347ea9a97 --- src/llamafactory/data/parser.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/llamafactory/data/parser.py b/src/llamafactory/data/parser.py index 99b71cf0..ec97bfc1 100644 --- a/src/llamafactory/data/parser.py +++ b/src/llamafactory/data/parser.py @@ -20,11 +20,12 @@ class DatasetAttr: """ basic configs """ load_from: Literal["hf_hub", "ms_hub", "script", "file"] dataset_name: str + formatting: Literal["alpaca", "sharegpt"] = "alpaca" + ranking: bool = False """ extra configs """ subset: Optional[str] = None folder: Optional[str] = None - ranking: bool = False - formatting: Literal["alpaca", "sharegpt"] = "alpaca" + num_samples: Optional[int] = None """ common columns """ system: Optional[str] = None tools: Optional[str] = None @@ -48,7 +49,6 @@ class DatasetAttr: observation_tag: Optional[str] = "observation" function_tag: Optional[str] = "function_call" system_tag: Optional[str] = "system" - sample_num: Optional[int] = None def __repr__(self) -> str: return self.dataset_name @@ -103,12 +103,12 @@ def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]: else: dataset_attr = DatasetAttr("file", dataset_name=dataset_info[name]["file_name"]) + dataset_attr.set_attr("formatting", dataset_info[name], default="alpaca") + dataset_attr.set_attr("ranking", dataset_info[name], default=False) dataset_attr.set_attr("subset", dataset_info[name]) dataset_attr.set_attr("folder", dataset_info[name]) - dataset_attr.set_attr("ranking", dataset_info[name], default=False) - dataset_attr.set_attr("formatting", dataset_info[name], default="alpaca") - dataset_attr.set_attr("sample_num", dataset_info[name]) - + dataset_attr.set_attr("num_samples", dataset_info[name]) + if "columns" in dataset_info[name]: column_names = ["system", "tools", "images", "chosen", "rejected", "kto_tag"] if dataset_attr.formatting == "alpaca": From d5ee4854409fb8dec19b5d71b2921eae029c6c07 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 30 May 2024 00:12:12 +0800 Subject: [PATCH 035/162] Update loader.py Former-commit-id: 19d8fd62c18ee3ba0e431fc241f7d315cb716fef --- src/llamafactory/data/loader.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 5ce4392e..322eefa0 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -1,10 +1,9 @@ import inspect import os -import numpy as np -from numpy.random import RandomState import sys from typing import TYPE_CHECKING, Literal, Optional, Union +import numpy as np from datasets import load_dataset, load_from_disk from ..extras.constants import FILEEXT2TYPE @@ -108,20 +107,14 @@ def load_single_dataset( if data_args.streaming and (dataset_attr.load_from == "file"): # faster than specifying streaming=True dataset = dataset.to_iterable_dataset() # TODO: add num shards parameter - if data_args.max_samples is not None: # truncate dataset - num_samples = min(data_args.max_samples, len(dataset)) - dataset = dataset.select(range(num_samples)) + if dataset_attr.num_samples is not None and not data_args.streaming: + indexes = np.random.permutation(len(dataset))[: dataset_attr.num_samples] + dataset = dataset.select(indexes) + logger.info("Sampled {} examples from dataset {}.".format(dataset_attr.num_samples, dataset_attr)) - if dataset_attr.sample_num: - dataset_sample_num = dataset_attr.sample_num - logger.info(f"从 {dataset_attr.dataset_name} 采样 {dataset_sample_num} 条训练样本") - random_state = RandomState(42) - idx = random_state.permutation(len(dataset))[:dataset_sample_num] - dataset_sample_num -= len(idx) - if dataset_sample_num > 0: - idx2 = random_state.choice(len(dataset), dataset_sample_num) - idx = np.concatenate([idx, idx2], axis=0) - dataset = dataset.select(idx) + if data_args.max_samples is not None: # truncate dataset + indexes = np.random.permutation(len(dataset))[: data_args.max_samples] + dataset = dataset.select(indexes) return align_dataset(dataset, dataset_attr, data_args) From 88f4c583d3a64318111607e1a8f1f6782bccfe7b Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 30 May 2024 00:17:21 +0800 Subject: [PATCH 036/162] Update loader.py Former-commit-id: aa7f335e3ad5a78e4ed5f99c120be28e9733ea2e --- src/llamafactory/data/loader.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 322eefa0..fa5b12c5 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -108,7 +108,13 @@ def load_single_dataset( dataset = dataset.to_iterable_dataset() # TODO: add num shards parameter if dataset_attr.num_samples is not None and not data_args.streaming: - indexes = np.random.permutation(len(dataset))[: dataset_attr.num_samples] + target_num = dataset_attr.num_samples + indexes = np.random.permutation(len(dataset))[:target_num] + target_num -= len(indexes) + if target_num > 0: + expand_indexes = np.random.choice(len(dataset), target_num) + indexes = np.concatenate((indexes, expand_indexes), axis=0) + dataset = dataset.select(indexes) logger.info("Sampled {} examples from dataset {}.".format(dataset_attr.num_samples, dataset_attr)) From ae773f935587b9ddc1f9dd0cd9a7a074b8843b98 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 30 May 2024 00:20:20 +0800 Subject: [PATCH 037/162] Update loader.py Former-commit-id: 0aa59322906d91c5e385c9c02ebb5dd64ba060f3 --- src/llamafactory/data/loader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index fa5b12c5..d4a19e27 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -115,6 +115,7 @@ def load_single_dataset( expand_indexes = np.random.choice(len(dataset), target_num) indexes = np.concatenate((indexes, expand_indexes), axis=0) + assert len(indexes) == dataset_attr.num_samples, "Sample num mismatched." dataset = dataset.select(indexes) logger.info("Sampled {} examples from dataset {}.".format(dataset_attr.num_samples, dataset_attr)) From 78aef175fe3ea9b1e120a6df8ec63471af97cbe9 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 30 May 2024 00:52:26 +0800 Subject: [PATCH 038/162] fix #3837 Former-commit-id: 72965aa3f13a9c085c29781b6790d80d00a545d8 --- evaluation/mmlu/mmlu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation/mmlu/mmlu.py b/evaluation/mmlu/mmlu.py index f3218c38..a4530250 100644 --- a/evaluation/mmlu/mmlu.py +++ b/evaluation/mmlu/mmlu.py @@ -154,7 +154,7 @@ class MMLU(datasets.GeneratorBasedBuilder): ] def _generate_examples(self, filepath): - df = pd.read_csv(filepath) + df = pd.read_csv(filepath, header=None) df.columns = ["question", "A", "B", "C", "D", "answer"] for i, instance in enumerate(df.to_dict(orient="records")): From 26d48595389197efbdc18d80d0e509189e3a10e5 Mon Sep 17 00:00:00 2001 From: faddddeout <39449491+injet-zhou@users.noreply.github.com> Date: Thu, 30 May 2024 08:39:21 +0000 Subject: [PATCH 039/162] fix cann't interrupt training when using multi GPUs in webui Former-commit-id: a7fb02d52bc202c958490aa7081252be5d9eff50 --- src/llamafactory/webui/runner.py | 34 +++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 7a305d62..2580ded2 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -1,5 +1,6 @@ import os import signal +import random from copy import deepcopy from subprocess import Popen, TimeoutExpired from typing import TYPE_CHECKING, Any, Dict, Generator, Optional @@ -8,12 +9,12 @@ import psutil from transformers.trainer import TRAINING_ARGS_NAME from ..extras.constants import PEFT_METHODS, TRAINING_STAGES -from ..extras.misc import is_gpu_or_npu_available, torch_gc +from ..extras.misc import is_gpu_or_npu_available, torch_gc, get_device_count from ..extras.packages import is_gradio_available from .common import DEFAULT_CACHE_DIR, get_module, get_save_dir, load_config from .locales import ALERTS from .utils import gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd - +from .. import launcher if is_gradio_available(): import gradio as gr @@ -41,6 +42,10 @@ class Runner: self.aborted = True if self.trainer is not None: for children in psutil.Process(self.trainer.pid).children(): # abort the child process + grand_children = children.children() + if len(grand_children) > 0: + for grand_child in grand_children: + os.kill(grand_child.pid, signal.SIGABRT) os.kill(children.pid, signal.SIGABRT) def _initialize(self, data: Dict["Component", Any], do_train: bool, from_preview: bool) -> str: @@ -275,7 +280,30 @@ class Runner: args = self._parse_train_args(data) if do_train else self._parse_eval_args(data) env = deepcopy(os.environ) env["LLAMABOARD_ENABLED"] = "1" - self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True) + if get_device_count() > 1: + nnodes = os.environ.get("NNODES", "1") + node_rank = os.environ.get("RANK", "0") + nproc_per_node = os.environ.get("NPROC_PER_NODE", str(get_device_count())) + master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") + master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999))) + + self.trainer = Popen([ + "torchrun", + "--nnodes", + nnodes, + "--node_rank", + node_rank, + "--nproc_per_node", + nproc_per_node, + "--master_addr", + master_addr, + "--master_port", + master_port, + launcher.__file__, + save_cmd(args) + ], env=env, shell=True) + else: + self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True) yield from self.monitor() def preview_train(self, data): From 047e388a5e7879fcb680a1d2f52aaa4aaa3328fb Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 30 May 2024 16:40:17 +0800 Subject: [PATCH 040/162] update readme Former-commit-id: 3b92d8c2ddb288b849f38e573ca168cab23315d2 --- README.md | 1 + README_zh.md | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index 4ca6d1ec..95c0c345 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/9840a653-7e9c-41c8-ae89 Choose your path: - **Colab**: https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing +- **PAI-DSW**: https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory - **Local machine**: Please refer to [usage](#getting-started) ## Table of Contents diff --git a/README_zh.md b/README_zh.md index 70938e38..b10db1e5 100644 --- a/README_zh.md +++ b/README_zh.md @@ -26,6 +26,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd 选择你的打开方式: - **Colab**:https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing +- **PAI-DSW**: https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory - **本地机器**:请见[如何使用](#如何使用) ## 目录 From 9e15eca1c1368b139c8a7d22eb4e3a348d54819b Mon Sep 17 00:00:00 2001 From: Uminosachi <49424133+Uminosachi@users.noreply.github.com> Date: Fri, 31 May 2024 13:45:39 +0900 Subject: [PATCH 041/162] Set scheduler_specific_kwargs to get_scheduler Former-commit-id: f04e70dfab44480ef4c015c06470443237f69ba9 --- src/llamafactory/train/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llamafactory/train/utils.py b/src/llamafactory/train/utils.py index 23834f2d..230fdc1e 100644 --- a/src/llamafactory/train/utils.py +++ b/src/llamafactory/train/utils.py @@ -379,6 +379,7 @@ def create_custom_scheduler( optimizer=optimizer_dict[param], num_warmup_steps=training_args.get_warmup_steps(num_training_steps), num_training_steps=num_training_steps, + scheduler_specific_kwargs=training_args.lr_scheduler_kwargs, ) def scheduler_hook(param: "torch.nn.Parameter"): From e6fc5ab31ef04ff66561dfdd61a212961c658f8d Mon Sep 17 00:00:00 2001 From: statelesshz Date: Fri, 31 May 2024 13:18:18 +0800 Subject: [PATCH 042/162] Update bug-report.yml Former-commit-id: a8561502360c1e247eeacb46b77ffbcf3387c482 --- .github/ISSUE_TEMPLATE/bug-report.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index ab2851c6..c2182542 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -13,6 +13,18 @@ body: - label: I have read the README and searched the existing issues. required: true + - type: textarea + id: system-info + validations: + required: true + attributes: + label: System Info + description: | + Please share your system info with us. You can run the command **transformers-cli env** and copy-paste its output below. + 请提供您的系统信息。您可以在命令行运行 **transformers-cli env** 并将其输出复制到该文本框中。 + + placeholder: transformers version, platform, python version, ... + - type: textarea id: reproduction validations: @@ -38,18 +50,6 @@ body: Please provide a clear and concise description of what you would expect to happen. 请提供您原本的目的,即这段代码的期望行为。 - - type: textarea - id: system-info - validations: - required: false - attributes: - label: System Info - description: | - Please share your system info with us. You can run the command **transformers-cli env** and copy-paste its output below. - 请提供您的系统信息。您可以在命令行运行 **transformers-cli env** 并将其输出复制到该文本框中。 - - placeholder: transformers version, platform, python version, ... - - type: textarea id: others validations: From 0fba220d5d12eb8501208cd316d11789490a69d5 Mon Sep 17 00:00:00 2001 From: Xu Song Date: Fri, 31 May 2024 14:35:48 +0800 Subject: [PATCH 043/162] Update model_args.py Former-commit-id: f1e018587e5722e41962abd60f74043a3e55f692 --- src/llamafactory/hparams/model_args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 650d1c22..995d5f12 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -107,7 +107,7 @@ class ModelArguments: ) vllm_maxlen: int = field( default=2048, - metadata={"help": "Maximum input length of the vLLM engine."}, + metadata={"help": "Maximum sequence length of the vLLM engine (including prompt and output)."}, ) vllm_gpu_util: float = field( default=0.9, From 3f849f2314483aa091010cf9f4d2057014fac125 Mon Sep 17 00:00:00 2001 From: ylfeng Date: Fri, 31 May 2024 15:33:54 +0800 Subject: [PATCH 044/162] supervised packing with greedy knapsack algorithm Former-commit-id: 24d12396c9aabd49da0b08719068f24679111cc6 --- .../data/processors/supervised.py | 102 ++++++++++++++++-- 1 file changed, 92 insertions(+), 10 deletions(-) diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index b119aa22..65aa4b4e 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -1,3 +1,5 @@ +import itertools +from collections import defaultdict from typing import TYPE_CHECKING, Any, Dict, List, Optional from ...extras.constants import IGNORE_INDEX @@ -16,6 +18,52 @@ if TYPE_CHECKING: logger = get_logger(__name__) +def binary_search_for_fit(numbers, capacity): + """ + Perform binary search to find the largest number that fits into the knapsack with the given capacity. + """ + left, right = 0, len(numbers) - 1 + result = -1 # If no number fits, return -1 + + while left <= right: + mid = (left + right) // 2 + if numbers[mid] <= capacity: + result = mid + left = mid + 1 + else: + right = mid - 1 + + return result + + +def efficient_greedy_knapsack(numbers, capacity): + """ + An efficient greedy algorithm with binary search for the knapsack problem. + """ + numbers.sort() # Sort numbers in ascending order for binary search + knapsacks = [] + + while numbers: + current_knapsack = [] + remaining_capacity = capacity + + while True: + index = binary_search_for_fit(numbers, remaining_capacity) + if index == -1: + break # No more numbers fit in this knapsack + + # Add the found number to the knapsack and update the remaining capacity + current_knapsack.append(numbers[index]) + remaining_capacity -= numbers[index] + + # Remove the number from the list + numbers.pop(index) + + knapsacks.append(current_knapsack) + + return knapsacks + + def preprocess_supervised_dataset( examples: Dict[str, List[Any]], template: "Template", @@ -115,16 +163,50 @@ def preprocess_packed_supervised_dataset( input_ids += [tokenizer.eos_token_id] labels += [tokenizer.eos_token_id] - total_length = len(input_ids) - block_size = data_args.cutoff_len - # we drop the small remainder, and if the total_length < block_size, we exclude this batch - total_length = (total_length // block_size) * block_size - # split by chunks of cutoff_len - for i in range(0, total_length, block_size): - if not all(label == IGNORE_INDEX for label in labels[i : i + block_size]): - model_inputs["input_ids"].append(input_ids[i : i + block_size]) - model_inputs["attention_mask"].append([1] * block_size) - model_inputs["labels"].append(labels[i : i + block_size]) + # prepare for packing + lengths = [] + length2examples_idx = defaultdict(list) + for idx, example in enumerate(input_ids): + length = len(example) + if length > data_args.cutoff_len: + logger.warning("Dropped example with length {} > cutoff_len {}".format(length, data_args.cutoff_len)) + continue + lengths.append(length) + length2examples_idx[length].append(idx) + + knapsacks = efficient_greedy_knapsack(lengths, data_args.cutoff_len) + + for knapsack in knapsacks: + packed_input_ids = [] + packed_labels = [] + + total_length = 0 + for length in knapsack: + total_length += length + idx = length2examples_idx[length].pop() + packed_input_ids.append(input_ids[idx]) + packed_labels.append(labels[idx]) + + # padding to cutoff_len + if total_length < data_args.cutoff_len: + pad_length = data_args.cutoff_len - total_length + packed_input_ids.append([tokenizer.eos_token_id] * pad_length) + packed_labels.append([IGNORE_INDEX] * pad_length) + elif total_length == data_args.cutoff_len: + pad_length = 0 + else: + logger.warning( + "Dropped packed example with total length {} > cutoff_len {}".format( + total_length, data_args.cutoff_len + ) + ) + continue + + # concat all + model_inputs["input_ids"].append(list(itertools.chain(*packed_input_ids))) + + model_inputs["labels"].append(list(itertools.chain(*packed_labels))) + model_inputs["attention_mask"].append([1] * total_length + [0] * pad_length) return model_inputs From cc62c225598e4b825e74d6b61a69a880c9e99596 Mon Sep 17 00:00:00 2001 From: ylfeng Date: Fri, 31 May 2024 21:40:41 +0800 Subject: [PATCH 045/162] fix eos Former-commit-id: 6e236c952958cbfe50b5dcb7b8eff6aea8477922 --- .../data/processors/supervised.py | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index 65aa4b4e..f94cebba 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -151,17 +151,11 @@ def preprocess_packed_supervised_dataset( ): if data_args.train_on_prompt: source_mask = source_ids - elif len(input_ids) != 0 and template.efficient_eos: - source_mask = [tokenizer.eos_token_id] + [IGNORE_INDEX] * (len(source_ids) - 1) else: source_mask = [IGNORE_INDEX] * len(source_ids) - input_ids += source_ids + target_ids - labels += source_mask + target_ids - - if template.efficient_eos: - input_ids += [tokenizer.eos_token_id] - labels += [tokenizer.eos_token_id] + input_ids.append(source_ids + target_ids) + labels.append(source_mask + target_ids) # prepare for packing lengths = [] @@ -174,7 +168,8 @@ def preprocess_packed_supervised_dataset( lengths.append(length) length2examples_idx[length].append(idx) - knapsacks = efficient_greedy_knapsack(lengths, data_args.cutoff_len) + # cutoff_len - 1 for efficient_eos + knapsacks = efficient_greedy_knapsack(lengths, data_args.cutoff_len - int(template.efficient_eos)) for knapsack in knapsacks: packed_input_ids = [] @@ -190,8 +185,15 @@ def preprocess_packed_supervised_dataset( # padding to cutoff_len if total_length < data_args.cutoff_len: pad_length = data_args.cutoff_len - total_length - packed_input_ids.append([tokenizer.eos_token_id] * pad_length) - packed_labels.append([IGNORE_INDEX] * pad_length) + if template.efficient_eos: + # 确保有 eos + packed_input_ids.append([tokenizer.eos_token_id] * pad_length) + packed_labels.append([tokenizer.eos_token_id] + [IGNORE_INDEX] * (pad_length - 1)) + else: + # 无 eos 的情况下,使用 0 填充? + packed_input_ids.append([0] * pad_length) + packed_labels.append([tokenizer.eos_token_id] + [IGNORE_INDEX] * (pad_length - 1)) + elif total_length == data_args.cutoff_len: pad_length = 0 else: From 7226a1b43160914887e82a15b3074ada14943594 Mon Sep 17 00:00:00 2001 From: ylfeng Date: Fri, 31 May 2024 21:43:08 +0800 Subject: [PATCH 046/162] remove empty line Former-commit-id: 3164710971a6d6545629f5bf133f98de5ff0991a --- src/llamafactory/data/processors/supervised.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index f94cebba..eaceb5b8 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -206,7 +206,6 @@ def preprocess_packed_supervised_dataset( # concat all model_inputs["input_ids"].append(list(itertools.chain(*packed_input_ids))) - model_inputs["labels"].append(list(itertools.chain(*packed_labels))) model_inputs["attention_mask"].append([1] * total_length + [0] * pad_length) From 2e843a4cf68a764ac6b156033f49c28b5ccd32ba Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 3 Jun 2024 18:28:27 +0800 Subject: [PATCH 047/162] fix data loader hint Former-commit-id: 25b56126a11591b0155e2f72b673dd8f45a6c8c9 --- src/llamafactory/data/loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index d4a19e27..f5929f15 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -62,9 +62,9 @@ def load_single_dataset( raise ValueError("File {} not found.".format(local_path)) if data_path is None: - raise ValueError("File extension must be txt, csv, json or jsonl.") + raise ValueError("Allowed file types: {}.".format(",".join(FILEEXT2TYPE.keys()))) else: - raise NotImplementedError + raise NotImplementedError("Unknown load type: {}.".format(dataset_attr.load_from)) if dataset_attr.load_from == "ms_hub": try: From ee80c3acf19743a2cde86a0e51c404a76e38ac41 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 3 Jun 2024 18:29:38 +0800 Subject: [PATCH 048/162] bump versions transformers 4.37.2->4.41.2 datasets 2.14.3->2.16.0 accelerate 0.27.2->0.30.1 peft 0.10.0->0.11.1 trl 0.8.1->0.8.6 Former-commit-id: 5f1e041f7295bf42a41dd4d9e7f0c42fcc37fed2 --- requirements.txt | 10 +++++----- src/llamafactory/extras/misc.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/requirements.txt b/requirements.txt index f4a942e6..e17954e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -transformers>=4.37.2 -datasets>=2.14.3 -accelerate>=0.27.2 -peft>=0.10.0 -trl>=0.8.1 +transformers>=4.41.2 +datasets>=2.16.0 +accelerate>=0.30.1 +peft>=0.11.1 +trl>=0.8.6 gradio>=4.0.0 scipy einops diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 2c7f170c..638c24cf 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -61,11 +61,11 @@ def check_dependencies() -> None: if os.environ.get("DISABLE_VERSION_CHECK", "0").lower() in ["true", "1"]: logger.warning("Version checking has been disabled, may lead to unexpected behaviors.") else: - require_version("transformers>=4.37.2", "To fix: pip install transformers>=4.37.2") - require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3") - require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2") - require_version("peft>=0.10.0", "To fix: pip install peft>=0.10.0") - require_version("trl>=0.8.2", "To fix: pip install trl>=0.8.2") + require_version("transformers>=4.41.2", "To fix: pip install transformers>=4.41.2") + require_version("datasets>=2.16.0", "To fix: pip install datasets>=2.16.0") + require_version("accelerate>=0.30.1", "To fix: pip install accelerate>=0.30.1") + require_version("peft>=0.11.1", "To fix: pip install peft>=0.11.1") + require_version("trl>=0.8.6", "To fix: pip install trl>=0.8.6") def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: From d8d02e65a1c937ffc2adb48ef81427396221a93a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 3 Jun 2024 18:38:36 +0800 Subject: [PATCH 049/162] fix #4022 Former-commit-id: 9541f2f1f1b7d7877eb734f051048e52003a3430 --- setup.py | 2 +- src/llamafactory/chat/vllm_engine.py | 4 +--- src/llamafactory/hparams/parser.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 45e73343..23f532e7 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ extra_require = { "metrics": ["nltk", "jieba", "rouge-chinese"], "deepspeed": ["deepspeed>=0.10.0,<=0.14.0"], "bitsandbytes": ["bitsandbytes>=0.39.0"], - "vllm": ["vllm>=0.4.1"], + "vllm": ["vllm>=0.4.3"], "galore": ["galore-torch"], "badam": ["badam"], "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"], diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index 3310a864..8a067754 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -158,12 +158,10 @@ class VllmEngine(BaseEngine): ) result_generator = self.model.generate( - prompt=None, + inputs={"prompt_token_ids": prompt_ids, "multi_modal_data": multi_modal_data}, sampling_params=sampling_params, request_id=request_id, - prompt_token_ids=prompt_ids, lora_request=self.lora_request, - multi_modal_data=multi_modal_data, ) return result_generator diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index b3c673be..ff1fbf5d 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -94,7 +94,7 @@ def _check_extra_dependencies( require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6") if model_args.infer_backend == "vllm": - require_version("vllm>=0.4.1", "To fix: pip install vllm>=0.4.1") + require_version("vllm>=0.4.3", "To fix: pip install vllm>=0.4.3") if finetuning_args.use_galore: require_version("galore_torch", "To fix: pip install galore_torch") From 1ef396362bcb09eb8badbbd760f30ae6e104ab6a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 3 Jun 2024 19:12:29 +0800 Subject: [PATCH 050/162] fix #4005 #4013 Former-commit-id: 8608fa268cde5cddf8d0c6c2eb2cb5fa246c1831 --- examples/README.md | 8 ++++---- examples/README_zh.md | 10 +++++----- examples/extras/badam/llama3_lora_sft.yaml | 4 ++-- examples/extras/fsdp_qlora/llama3_lora_sft.yaml | 4 ++-- examples/extras/galore/llama3_full_sft.yaml | 4 ++-- examples/extras/llama_pro/llama3_freeze_sft.yaml | 4 ++-- examples/extras/loraplus/llama3_lora_sft.yaml | 4 ++-- examples/extras/mod/llama3_full_sft.yaml | 4 ++-- examples/full_multi_gpu/llama3_full_sft.yaml | 4 ++-- examples/lora_multi_gpu/llama3_lora_sft.yaml | 4 ++-- examples/lora_multi_gpu/llama3_lora_sft_ds.yaml | 4 ++-- examples/lora_multi_npu/llama3_lora_sft_ds.yaml | 4 ++-- examples/lora_single_gpu/llama3_lora_dpo.yaml | 4 ++-- examples/lora_single_gpu/llama3_lora_kto.yaml | 4 ++-- examples/lora_single_gpu/llama3_lora_ppo.yaml | 4 ++-- examples/lora_single_gpu/llama3_lora_pretrain.yaml | 4 ++-- examples/lora_single_gpu/llama3_lora_reward.yaml | 4 ++-- examples/lora_single_gpu/llama3_lora_sft.yaml | 4 ++-- examples/lora_single_gpu/llava1_5_lora_sft.yaml | 4 ++-- examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml | 4 ++-- examples/qlora_single_gpu/llama3_lora_sft_awq.yaml | 4 ++-- .../qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml | 4 ++-- examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml | 4 ++-- src/llamafactory/hparams/model_args.py | 2 +- 24 files changed, 52 insertions(+), 52 deletions(-) diff --git a/examples/README.md b/examples/README.md index 727b27c8..f985d552 100644 --- a/examples/README.md +++ b/examples/README.md @@ -107,13 +107,13 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_l ### LoRA Fine-Tuning on Multiple GPUs -#### Supervised Fine-Tuning with Accelerate on Single Node +#### Supervised Fine-Tuning on Single Node ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml ``` -#### Supervised Fine-Tuning with Accelerate on Multiple Nodes +#### Supervised Fine-Tuning on Multiple Nodes ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml @@ -136,13 +136,13 @@ ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu ### Full-Parameter Fine-Tuning on Multiple GPUs -#### Supervised Fine-Tuning with Accelerate on Single Node +#### Supervised Fine-Tuning on Single Node ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml ``` -#### Supervised Fine-Tuning with Accelerate on Multiple Nodes +#### Supervised Fine-Tuning on Multiple Nodes ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml diff --git a/examples/README_zh.md b/examples/README_zh.md index 6974faa9..cf5bbf49 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -107,13 +107,13 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_l ### 多 GPU LoRA 微调 -#### 使用 Accelerate 进行单节点训练 +#### 在单机上进行指令监督微调 ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml ``` -#### 使用 Accelerate 进行多节点训练 +#### 在多机上进行指令监督微调 ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml @@ -128,7 +128,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llam ### 多 NPU LoRA 微调 -#### 使用 DeepSpeed ZeRO-0 训练 +#### 使用 DeepSpeed ZeRO-0 进行指令监督微调 ```bash ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml @@ -136,13 +136,13 @@ ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu ### 多 GPU 全参数微调 -#### 使用 DeepSpeed 进行单节点训练 +#### 在单机上进行指令监督微调 ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml ``` -#### 使用 DeepSpeed 进行多节点训练 +#### 在多机上进行指令监督微调 ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml diff --git a/examples/extras/badam/llama3_lora_sft.yaml b/examples/extras/badam/llama3_lora_sft.yaml index 4a482749..242e63ab 100644 --- a/examples/extras/badam/llama3_lora_sft.yaml +++ b/examples/extras/badam/llama3_lora_sft.yaml @@ -28,10 +28,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 pure_bf16: true ### eval diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml index e9c04fa9..920d8fdb 100644 --- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml +++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml @@ -29,10 +29,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/extras/galore/llama3_full_sft.yaml b/examples/extras/galore/llama3_full_sft.yaml index 87381fcc..3db31fed 100644 --- a/examples/extras/galore/llama3_full_sft.yaml +++ b/examples/extras/galore/llama3_full_sft.yaml @@ -29,10 +29,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 1 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 pure_bf16: true ### eval diff --git a/examples/extras/llama_pro/llama3_freeze_sft.yaml b/examples/extras/llama_pro/llama3_freeze_sft.yaml index 8ace8db8..214f411a 100644 --- a/examples/extras/llama_pro/llama3_freeze_sft.yaml +++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml @@ -27,10 +27,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml index 26c2b1d2..9936bcd3 100644 --- a/examples/extras/loraplus/llama3_lora_sft.yaml +++ b/examples/extras/loraplus/llama3_lora_sft.yaml @@ -26,10 +26,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/extras/mod/llama3_full_sft.yaml b/examples/extras/mod/llama3_full_sft.yaml index 6b724ed0..edfec44e 100644 --- a/examples/extras/mod/llama3_full_sft.yaml +++ b/examples/extras/mod/llama3_full_sft.yaml @@ -26,10 +26,10 @@ overwrite_output_dir: true per_device_train_batch_size: 1 gradient_accumulation_steps: 8 optim: paged_adamw_8bit -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 pure_bf16: true ### eval diff --git a/examples/full_multi_gpu/llama3_full_sft.yaml b/examples/full_multi_gpu/llama3_full_sft.yaml index a96f1b8e..b8873e3a 100644 --- a/examples/full_multi_gpu/llama3_full_sft.yaml +++ b/examples/full_multi_gpu/llama3_full_sft.yaml @@ -28,10 +28,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 2 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/lora_multi_gpu/llama3_lora_sft.yaml b/examples/lora_multi_gpu/llama3_lora_sft.yaml index 6389f21b..5e5dd9e6 100644 --- a/examples/lora_multi_gpu/llama3_lora_sft.yaml +++ b/examples/lora_multi_gpu/llama3_lora_sft.yaml @@ -28,10 +28,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 2 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml index 6011896a..e8dee216 100644 --- a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml +++ b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml @@ -29,10 +29,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 2 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml b/examples/lora_multi_npu/llama3_lora_sft_ds.yaml index 65ab6347..825b8450 100644 --- a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml +++ b/examples/lora_multi_npu/llama3_lora_sft_ds.yaml @@ -29,10 +29,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 2 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/lora_single_gpu/llama3_lora_dpo.yaml index f68244b7..62752e57 100644 --- a/examples/lora_single_gpu/llama3_lora_dpo.yaml +++ b/examples/lora_single_gpu/llama3_lora_dpo.yaml @@ -27,10 +27,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.000005 +learning_rate: 5.0e-6 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/lora_single_gpu/llama3_lora_kto.yaml b/examples/lora_single_gpu/llama3_lora_kto.yaml index 4405aaec..6f689818 100644 --- a/examples/lora_single_gpu/llama3_lora_kto.yaml +++ b/examples/lora_single_gpu/llama3_lora_kto.yaml @@ -25,10 +25,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.000005 +learning_rate: 5.0e-6 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/lora_single_gpu/llama3_lora_ppo.yaml b/examples/lora_single_gpu/llama3_lora_ppo.yaml index 88ce24f3..19e7ccb3 100644 --- a/examples/lora_single_gpu/llama3_lora_ppo.yaml +++ b/examples/lora_single_gpu/llama3_lora_ppo.yaml @@ -26,10 +26,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.00001 +learning_rate: 1.0e-5 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### generate diff --git a/examples/lora_single_gpu/llama3_lora_pretrain.yaml b/examples/lora_single_gpu/llama3_lora_pretrain.yaml index acb18ebf..54c5d89a 100644 --- a/examples/lora_single_gpu/llama3_lora_pretrain.yaml +++ b/examples/lora_single_gpu/llama3_lora_pretrain.yaml @@ -24,10 +24,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/lora_single_gpu/llama3_lora_reward.yaml b/examples/lora_single_gpu/llama3_lora_reward.yaml index 6bf2ca02..c82f9414 100644 --- a/examples/lora_single_gpu/llama3_lora_reward.yaml +++ b/examples/lora_single_gpu/llama3_lora_reward.yaml @@ -25,10 +25,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.00001 +learning_rate: 1.0e-5 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/lora_single_gpu/llama3_lora_sft.yaml b/examples/lora_single_gpu/llama3_lora_sft.yaml index 5492bc34..429cb6af 100644 --- a/examples/lora_single_gpu/llama3_lora_sft.yaml +++ b/examples/lora_single_gpu/llama3_lora_sft.yaml @@ -25,10 +25,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/lora_single_gpu/llava1_5_lora_sft.yaml b/examples/lora_single_gpu/llava1_5_lora_sft.yaml index 8e4226da..acab4884 100644 --- a/examples/lora_single_gpu/llava1_5_lora_sft.yaml +++ b/examples/lora_single_gpu/llava1_5_lora_sft.yaml @@ -26,10 +26,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml index d2658051..53cc12e2 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml @@ -25,10 +25,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml index ba6d8ea5..1a92f822 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml @@ -25,10 +25,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml index a3db35ff..c7f72c66 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml @@ -26,10 +26,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml index cc9a454e..45caf17c 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml @@ -25,10 +25,10 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 0.0001 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine -warmup_steps: 0.1 +warmup_ratio: 0.1 fp16: true ### eval diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 995d5f12..7003cbee 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -107,7 +107,7 @@ class ModelArguments: ) vllm_maxlen: int = field( default=2048, - metadata={"help": "Maximum sequence length of the vLLM engine (including prompt and output)."}, + metadata={"help": "Maximum sequence (prompt + response) length of the vLLM engine."}, ) vllm_gpu_util: float = field( default=0.9, From ba16749942881fd0d202de0c5f8f57381a6958bb Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 3 Jun 2024 19:24:10 +0800 Subject: [PATCH 051/162] update placeholder in issue template Former-commit-id: 5503a90d7e38273b67129e0b9eb62bd1fd23154f --- .github/ISSUE_TEMPLATE/bug-report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index c2182542..82620fdb 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -38,7 +38,7 @@ body: 请合理使用 Markdown 标签来格式化您的文本。 placeholder: | - python src/train_bash.py ... + llamafactory-cli train ... - type: textarea id: expected-behavior From 59aca304c0acb3375d72ed951df623a7b3541758 Mon Sep 17 00:00:00 2001 From: "enji.zhou" Date: Mon, 3 Jun 2024 21:32:38 +0800 Subject: [PATCH 052/162] fix KTO Trainer Sampler Former-commit-id: 39eb1bfa272011554322e9bb2534f83b68282a70 --- src/llamafactory/train/kto/trainer.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 82ae722d..3f1220a9 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -4,6 +4,7 @@ from types import MethodType from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple, Union import torch +from torch.utils.data import RandomSampler from transformers import Trainer from trl import KTOTrainer from trl.trainer import disable_dropout_in_model @@ -173,6 +174,21 @@ class CustomKTOTrainer(KTOTrainer): return reference_chosen_logps, reference_rejected_logps, reference_kl_logps + def has_length(self,dataset): + """ + Checks if the dataset implements __len__() and it doesn't raise an error + """ + try: + return len(dataset) is not None + except TypeError: + # TypeError: len() of unsized object + return False + + def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]: + if self.train_dataset is None or not self.has_length(self.train_dataset): + return None + return RandomSampler(self.train_dataset) + def get_batch_loss_metrics( self, model: "PreTrainedModel", From ca60eca259f031db8318e12d8576668e6e5f2023 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 3 Jun 2024 22:08:38 +0800 Subject: [PATCH 053/162] Update trainer.py Former-commit-id: 8565d4b43db905374c328ae57c71fc226980d14f --- src/llamafactory/train/kto/trainer.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 3f1220a9..7c0343f5 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -4,7 +4,6 @@ from types import MethodType from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple, Union import torch -from torch.utils.data import RandomSampler from transformers import Trainer from trl import KTOTrainer from trl.trainer import disable_dropout_in_model @@ -14,6 +13,7 @@ from ..utils import create_custom_optimzer, create_custom_scheduler if TYPE_CHECKING: + import torch.utils.data from transformers import PreTrainedModel, ProcessorMixin from ...hparams import FinetuningArguments @@ -85,6 +85,12 @@ class CustomKTOTrainer(KTOTrainer): create_custom_scheduler(self.args, num_training_steps, optimizer) return super().create_scheduler(num_training_steps, optimizer) + def _get_train_sampler(self) -> Optional["torch.utils.data.Sampler"]: + r""" + Replaces the sequential sampler of KTO Trainer created by trl with the random sampler. + """ + return Trainer._get_train_sampler(self) + def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) if self.processor is not None: @@ -174,21 +180,6 @@ class CustomKTOTrainer(KTOTrainer): return reference_chosen_logps, reference_rejected_logps, reference_kl_logps - def has_length(self,dataset): - """ - Checks if the dataset implements __len__() and it doesn't raise an error - """ - try: - return len(dataset) is not None - except TypeError: - # TypeError: len() of unsized object - return False - - def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]: - if self.train_dataset is None or not self.has_length(self.train_dataset): - return None - return RandomSampler(self.train_dataset) - def get_batch_loss_metrics( self, model: "PreTrainedModel", From 2dc5743fba86e248c7ffd17f255903765c5590bc Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 3 Jun 2024 22:53:54 +0800 Subject: [PATCH 054/162] remove gc warnings in DPO&KTO Former-commit-id: b649bdcbafb464a638387429b770fe258b41f8af --- src/llamafactory/train/dpo/trainer.py | 5 +++-- src/llamafactory/train/kto/trainer.py | 5 +++-- src/llamafactory/train/utils.py | 16 ++++++++++++++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index 542335a3..ec1de810 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -10,7 +10,7 @@ from trl import DPOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..utils import create_custom_optimzer, create_custom_scheduler +from ..utils import create_custom_optimzer, create_custom_scheduler, get_ref_context if TYPE_CHECKING: @@ -69,6 +69,7 @@ class CustomDPOTrainer(DPOTrainer): self.ref_model = self._prepare_deepspeed(self.ref_model) else: self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) + self.ref_model.eval() if finetuning_args.use_badam: from badam import clip_grad_norm_for_sparse_tensor @@ -189,7 +190,7 @@ class CustomDPOTrainer(DPOTrainer): if self.ref_model is None: ref_model = model - ref_context = self.accelerator.unwrap_model(model).disable_adapter() + ref_context = get_ref_context(self.accelerator, model) else: ref_model = self.ref_model ref_context = nullcontext() diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 7c0343f5..f29945f5 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -9,7 +9,7 @@ from trl import KTOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..utils import create_custom_optimzer, create_custom_scheduler +from ..utils import create_custom_optimzer, create_custom_scheduler, get_ref_context if TYPE_CHECKING: @@ -68,6 +68,7 @@ class CustomKTOTrainer(KTOTrainer): self.ref_model = self._prepare_deepspeed(self.ref_model) else: self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) + self.ref_model.eval() if finetuning_args.use_badam: from badam import clip_grad_norm_for_sparse_tensor @@ -164,7 +165,7 @@ class CustomKTOTrainer(KTOTrainer): """ if self.ref_model is None: ref_model = model - ref_context = self.accelerator.unwrap_model(model).disable_adapter() + ref_context = get_ref_context(self.accelerator, model) else: ref_model = self.ref_model ref_context = nullcontext() diff --git a/src/llamafactory/train/utils.py b/src/llamafactory/train/utils.py index 230fdc1e..2b33af1c 100644 --- a/src/llamafactory/train/utils.py +++ b/src/llamafactory/train/utils.py @@ -1,3 +1,4 @@ +from contextlib import contextmanager from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union import torch @@ -17,8 +18,8 @@ if is_galore_available(): if TYPE_CHECKING: - from transformers import Seq2SeqTrainingArguments - from transformers.modeling_utils import PreTrainedModel + from accelerate import Accelerator + from transformers import PreTrainedModel, Seq2SeqTrainingArguments from trl import AutoModelForCausalLMWithValueHead from ..hparams import DataArguments @@ -156,6 +157,17 @@ def create_reward_model( return reward_model +@contextmanager +def get_ref_context(accelerator: "Accelerator", model: "PreTrainedModel"): + r""" + Gets adapter context for the reference model. + """ + with accelerator.unwrap_model(model).disable_adapter(): + model.eval() + yield + model.train() + + def _get_decay_parameter_names(model: "PreTrainedModel") -> List[str]: r""" Returns a list of names of parameters with weight decay. (weights in non-layernorm layers) From f1fe357ee20e29d82bda65a55fac55ad03511d16 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 3 Jun 2024 23:30:37 +0800 Subject: [PATCH 055/162] fix #4043 Former-commit-id: 67af68f4fc5232760c57b3a0ae780628da09db6a --- src/llamafactory/webui/components/eval.py | 2 +- src/llamafactory/webui/components/train.py | 21 ++++++++++++--------- src/llamafactory/webui/locales.py | 5 +++++ src/llamafactory/webui/runner.py | 9 +++++++-- src/llamafactory/webui/utils.py | 2 +- 5 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/llamafactory/webui/components/eval.py b/src/llamafactory/webui/components/eval.py index 99215fc2..0a7a0f44 100644 --- a/src/llamafactory/webui/components/eval.py +++ b/src/llamafactory/webui/components/eval.py @@ -57,7 +57,6 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]: with gr.Row(): output_box = gr.Markdown() - output_elems = [output_box, progress_bar] elem_dict.update( dict( cmd_preview_btn=cmd_preview_btn, @@ -68,6 +67,7 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]: output_box=output_box, ) ) + output_elems = [output_box, progress_bar] cmd_preview_btn.click(engine.runner.preview_eval, input_elems, output_elems, concurrency_limit=None) start_btn.click(engine.runner.run_eval, input_elems, output_elems) diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 6f742bb1..93e5dfc1 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -298,22 +298,25 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: ) output_elems = [output_box, progress_bar, loss_viewer] - lang = engine.manager.get_elem_by_id("top.lang") - model_name = engine.manager.get_elem_by_id("top.model_name") - finetuning_type = engine.manager.get_elem_by_id("top.finetuning_type") - cmd_preview_btn.click(engine.runner.preview_train, input_elems, output_elems, concurrency_limit=None) - arg_save_btn.click(engine.runner.save_args, input_elems, output_elems, concurrency_limit=None) - arg_load_btn.click( - engine.runner.load_args, [lang, config_path], list(input_elems) + [output_box], concurrency_limit=None - ) start_btn.click(engine.runner.run_train, input_elems, output_elems) stop_btn.click(engine.runner.set_abort) resume_btn.change(engine.runner.monitor, outputs=output_elems, concurrency_limit=None) - training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False) + lang = engine.manager.get_elem_by_id("top.lang") + model_name: "gr.Dropdown" = engine.manager.get_elem_by_id("top.model_name") + finetuning_type: "gr.Dropdown" = engine.manager.get_elem_by_id("top.finetuning_type") + + arg_save_btn.click(engine.runner.save_args, input_elems, output_elems, concurrency_limit=None) + arg_load_btn.click( + engine.runner.load_args, [lang, config_path], list(input_elems) + [output_box], concurrency_limit=None + ) + dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False) + training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False) reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False) + model_name.change(list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], queue=False) + finetuning_type.change(list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], queue=False) output_dir.change( list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], concurrency_limit=None ).then(check_output_dir, inputs=[lang, model_name, finetuning_type, output_dir], concurrency_limit=None) diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index 5b11c853..e30feab2 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -1475,6 +1475,11 @@ ALERTS = { "ru": "Пожалуйста, выберите адаптер.", "zh": "请选择适配器。", }, + "err_no_output_dir": { + "en": "Please provide output dir.", + "ru": "Пожалуйста, укажите выходную директорию.", + "zh": "请填写输出目录。", + }, "err_no_reward_model": { "en": "Please select a reward model.", "ru": "Пожалуйста, выберите модель вознаграждения.", diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 7a305d62..fe213513 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -64,10 +64,15 @@ class Runner: return ALERTS["err_demo"][lang] if do_train: + if not get("train.output_dir"): + return ALERTS["err_no_output_dir"][lang] + stage = TRAINING_STAGES[get("train.training_stage")] - reward_model = get("train.reward_model") - if stage == "ppo" and not reward_model: + if stage == "ppo" and not get("train.reward_model"): return ALERTS["err_no_reward_model"][lang] + else: + if not get("eval.output_dir"): + return ALERTS["err_no_output_dir"][lang] if not from_preview and not is_gpu_or_npu_available(): gr.Warning(ALERTS["warn_no_cuda"][lang]) diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index 09cefa0e..0446cb47 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -180,7 +180,7 @@ def check_output_dir(lang: str, model_name: str, finetuning_type: str, output_di r""" Check if output dir exists. """ - if os.path.isdir(get_save_dir(model_name, finetuning_type, output_dir)): + if model_name and output_dir and os.path.isdir(get_save_dir(model_name, finetuning_type, output_dir)): gr.Warning(ALERTS["warn_output_dir_exists"][lang]) From 2e87a54bf1045244b5ab2c6f432557c9562ed6ca Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 4 Jun 2024 00:10:24 +0800 Subject: [PATCH 056/162] fix abort in webui DDP mode Former-commit-id: b90ac72d753b13a3eed9cb8b898fac2f2fe5153f --- src/llamafactory/cli.py | 31 ++++++++++-------------- src/llamafactory/webui/runner.py | 41 ++++---------------------------- src/llamafactory/webui/utils.py | 14 +++++++++++ 3 files changed, 32 insertions(+), 54 deletions(-) diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index a74445a6..c14ae6ec 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -71,28 +71,23 @@ def main(): export_model() elif command == Command.TRAIN: if get_device_count() > 1: - nnodes = os.environ.get("NNODES", "1") - node_rank = os.environ.get("RANK", "0") - nproc_per_node = os.environ.get("NPROC_PER_NODE", str(get_device_count())) master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999))) logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port)) subprocess.run( - [ - "torchrun", - "--nnodes", - nnodes, - "--node_rank", - node_rank, - "--nproc_per_node", - nproc_per_node, - "--master_addr", - master_addr, - "--master_port", - master_port, - launcher.__file__, - *sys.argv[1:], - ] + ( + "torchrun --nnodes {nnodes} --node_rank {node_rank} --nproc_per_node {nproc_per_node} " + "--master_addr {master_addr} --master_port {master_port} {file_name} {args}" + ).format( + nnodes=os.environ.get("NNODES", "1"), + node_rank=os.environ.get("RANK", "0"), + nproc_per_node=os.environ.get("NPROC_PER_NODE", str(get_device_count())), + master_addr=master_addr, + master_port=master_port, + file_name=launcher.__file__, + args=" ".join(sys.argv[1:]), + ), + shell=True, ) else: run_exp() diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 36f593ae..6e1facef 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -1,20 +1,17 @@ import os -import signal -import random from copy import deepcopy from subprocess import Popen, TimeoutExpired from typing import TYPE_CHECKING, Any, Dict, Generator, Optional -import psutil from transformers.trainer import TRAINING_ARGS_NAME from ..extras.constants import PEFT_METHODS, TRAINING_STAGES -from ..extras.misc import is_gpu_or_npu_available, torch_gc, get_device_count +from ..extras.misc import is_gpu_or_npu_available, torch_gc from ..extras.packages import is_gradio_available from .common import DEFAULT_CACHE_DIR, get_module, get_save_dir, load_config from .locales import ALERTS -from .utils import gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd -from .. import launcher +from .utils import abort_leaf_process, gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd + if is_gradio_available(): import gradio as gr @@ -41,12 +38,7 @@ class Runner: def set_abort(self) -> None: self.aborted = True if self.trainer is not None: - for children in psutil.Process(self.trainer.pid).children(): # abort the child process - grand_children = children.children() - if len(grand_children) > 0: - for grand_child in grand_children: - os.kill(grand_child.pid, signal.SIGABRT) - os.kill(children.pid, signal.SIGABRT) + abort_leaf_process(self.trainer.pid) def _initialize(self, data: Dict["Component", Any], do_train: bool, from_preview: bool) -> str: get = lambda elem_id: data[self.manager.get_elem_by_id(elem_id)] @@ -285,30 +277,7 @@ class Runner: args = self._parse_train_args(data) if do_train else self._parse_eval_args(data) env = deepcopy(os.environ) env["LLAMABOARD_ENABLED"] = "1" - if get_device_count() > 1: - nnodes = os.environ.get("NNODES", "1") - node_rank = os.environ.get("RANK", "0") - nproc_per_node = os.environ.get("NPROC_PER_NODE", str(get_device_count())) - master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") - master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999))) - - self.trainer = Popen([ - "torchrun", - "--nnodes", - nnodes, - "--node_rank", - node_rank, - "--nproc_per_node", - nproc_per_node, - "--master_addr", - master_addr, - "--master_port", - master_port, - launcher.__file__, - save_cmd(args) - ], env=env, shell=True) - else: - self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True) + self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True) yield from self.monitor() def preview_train(self, data): diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index 0446cb47..fc258806 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -1,8 +1,10 @@ import json import os +import signal from datetime import datetime from typing import Any, Dict, List, Optional, Tuple +import psutil from transformers.trainer_utils import get_last_checkpoint from yaml import safe_dump, safe_load @@ -17,6 +19,18 @@ if is_gradio_available(): import gradio as gr +def abort_leaf_process(pid: int) -> None: + r""" + Aborts the leaf processes. + """ + children = psutil.Process(pid).children() + if children: + for child in children: + abort_leaf_process(child.pid) + else: + os.kill(pid, signal.SIGABRT) + + def can_quantize(finetuning_type: str) -> "gr.Dropdown": r""" Judges if the quantization is available in this finetuning type. From 920b091581ddc7deaf7c8fb2b3d3b8deec70fd33 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 4 Jun 2024 00:17:36 +0800 Subject: [PATCH 057/162] fix #3992 Former-commit-id: a48321fbf5196b88a11106cf74a74fbcea2ea50b --- src/llamafactory/data/loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index f5929f15..4d0503c3 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -120,8 +120,8 @@ def load_single_dataset( logger.info("Sampled {} examples from dataset {}.".format(dataset_attr.num_samples, dataset_attr)) if data_args.max_samples is not None: # truncate dataset - indexes = np.random.permutation(len(dataset))[: data_args.max_samples] - dataset = dataset.select(indexes) + max_samples = min(data_args.max_samples, len(dataset)) + dataset = dataset.select(range(max_samples)) return align_dataset(dataset, dataset_attr, data_args) From 0e81997792275c49c6e981dd750c41b56bfb8d9a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 4 Jun 2024 00:21:50 +0800 Subject: [PATCH 058/162] fix #3873 Former-commit-id: 1ac325b4d682bb493573c18bb0b67ceae8d0d372 --- src/llamafactory/chat/hf_engine.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py index ad0e90fe..28e6a409 100644 --- a/src/llamafactory/chat/hf_engine.py +++ b/src/llamafactory/chat/hf_engine.py @@ -83,6 +83,7 @@ class HuggingfaceEngine(BaseEngine): prompt_length = len(prompt_ids) inputs = torch.tensor([prompt_ids], device=model.device) + attention_mask = torch.ones_like(inputs, dtype=torch.bool) do_sample: Optional[bool] = input_kwargs.pop("do_sample", None) temperature: Optional[float] = input_kwargs.pop("temperature", None) @@ -136,6 +137,7 @@ class HuggingfaceEngine(BaseEngine): gen_kwargs = dict( inputs=inputs, + attention_mask=attention_mask, generation_config=GenerationConfig(**generating_args), logits_processor=get_logits_processor(), ) From 1cc9508fb32c8513c3de029237c7b3986da430ac Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 4 Jun 2024 00:31:10 +0800 Subject: [PATCH 059/162] tiny fix Former-commit-id: f9d50501aac1f60a3b445ca3fee9aa60995461ee --- examples/extras/fsdp_qlora/single_node.sh | 4 ---- scripts/llama_pro.py | 8 ++++---- src/llamafactory/data/loader.py | 2 +- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/examples/extras/fsdp_qlora/single_node.sh b/examples/extras/fsdp_qlora/single_node.sh index 54ec2bd2..fac8cdee 100644 --- a/examples/extras/fsdp_qlora/single_node.sh +++ b/examples/extras/fsdp_qlora/single_node.sh @@ -1,10 +1,6 @@ #!/bin/bash # DO NOT use GPTQ/AWQ model in FSDP+QLoRA -pip install "transformers>=4.39.1" -pip install "accelerate>=0.28.0" -pip install "bitsandbytes>=0.43.0" - CUDA_VISIBLE_DEVICES=0,1 accelerate launch \ --config_file examples/accelerate/fsdp_config.yaml \ src/train.py examples/extras/fsdp_qlora/llama3_lora_sft.yaml diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py index 997b3496..727998ae 100644 --- a/scripts/llama_pro.py +++ b/scripts/llama_pro.py @@ -104,10 +104,10 @@ def block_expansion( print("Model weights saved in {}".format(output_dir)) print("Fine-tune this model with:") - print(" --model_name_or_path {} \\".format(output_dir)) - print(" --finetuning_type freeze \\") - print(" --freeze_trainable_layers {} \\".format(num_expand)) - print(" --use_llama_pro") + print("model_name_or_path: {}".format(output_dir)) + print("finetuning_type: freeze") + print("freeze_trainable_layers: {}".format(num_expand)) + print("use_llama_pro: true") if __name__ == "__main__": diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 4d0503c3..7d013d27 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -179,7 +179,7 @@ def get_dataset( if training_args.should_save: dataset.save_to_disk(data_args.tokenized_path) logger.info("Tokenized dataset saved at {}.".format(data_args.tokenized_path)) - logger.info("Please restart the training with `--tokenized_path {}`.".format(data_args.tokenized_path)) + logger.info("Please restart the training with `tokenized_path: {}`.".format(data_args.tokenized_path)) sys.exit(0) From 1ca6d03bc1afc949739104a8d2a0d6f6497f752a Mon Sep 17 00:00:00 2001 From: hzhaoy Date: Tue, 4 Jun 2024 10:33:43 +0800 Subject: [PATCH 060/162] add: support selecting saved configuration files and loading training parameters Former-commit-id: 5c9b17c1dc9093da0ea813642bce9b5c9ae96274 --- src/llamafactory/webui/components/train.py | 5 +++-- src/llamafactory/webui/utils.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 6f742bb1..fabb91ea 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -6,7 +6,7 @@ from ...extras.constants import TRAINING_STAGES from ...extras.misc import get_device_count from ...extras.packages import is_gradio_available from ..common import DEFAULT_DATA_DIR, list_checkpoints, list_datasets -from ..utils import change_stage, check_output_dir, list_output_dirs +from ..utils import change_stage, check_output_dir, list_output_dirs, list_config_paths from .data import create_preview_box @@ -259,7 +259,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: with gr.Row(): initial_dir = gr.Textbox(visible=False, interactive=False) output_dir = gr.Dropdown(allow_custom_value=True) - config_path = gr.Textbox() + config_path = gr.Dropdown(allow_custom_value=True) with gr.Row(): device_count = gr.Textbox(value=str(get_device_count() or 1), interactive=False) @@ -317,5 +317,6 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: output_dir.change( list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], concurrency_limit=None ).then(check_output_dir, inputs=[lang, model_name, finetuning_type, output_dir], concurrency_limit=None) + config_path.change(list_config_paths, outputs=[config_path], concurrency_limit=None) return elem_dict diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index 09cefa0e..37df1b52 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -176,6 +176,18 @@ def list_output_dirs(model_name: str, finetuning_type: str, initial_dir: str) -> return gr.Dropdown(choices=output_dirs) +def list_config_paths() -> "gr.Dropdown": + """ + Lists all the saved configuration files that can be loaded. + """ + if os.path.exists(DEFAULT_CONFIG_DIR) and os.path.isdir(DEFAULT_CONFIG_DIR): + config_files = [file_name for file_name in os.listdir(DEFAULT_CONFIG_DIR) if file_name.endswith(".yaml")] + else: + config_files = [] + + return gr.Dropdown(choices=config_files) + + def check_output_dir(lang: str, model_name: str, finetuning_type: str, output_dir: str) -> None: r""" Check if output dir exists. From f4cf31a1a0e81f84c036cb71d656c56d1ffe84d7 Mon Sep 17 00:00:00 2001 From: faddddeout <39449491+injet-zhou@users.noreply.github.com> Date: Tue, 4 Jun 2024 11:04:29 +0000 Subject: [PATCH 061/162] add throughput entry to log Former-commit-id: 691f999f64c7bac78761e4354f89816d2f0d46fc --- src/llamafactory/extras/callbacks.py | 6 ++++-- src/llamafactory/webui/runner.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/extras/callbacks.py b/src/llamafactory/extras/callbacks.py index 637b786d..441ebbfd 100644 --- a/src/llamafactory/extras/callbacks.py +++ b/src/llamafactory/extras/callbacks.py @@ -170,12 +170,14 @@ class LogCallback(TrainerCallback): percentage=round(self.cur_steps / self.max_steps * 100, 2) if self.max_steps != 0 else 100, elapsed_time=self.elapsed_time, remaining_time=self.remaining_time, + throughput="{:.2f}".format(state.num_input_tokens_seen / (time.time() - self.start_time)), + total_tokens=state.num_input_tokens_seen, ) logs = {k: v for k, v in logs.items() if v is not None} if self.webui_mode and all(key in logs for key in ["loss", "learning_rate", "epoch"]): logger.info( - "{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}}}".format( - logs["loss"], logs["learning_rate"], logs["epoch"] + "{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}, 'throughput': {}}}".format( + logs["loss"], logs["learning_rate"], logs["epoch"], logs["throughput"] ) ) diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 6e1facef..6378a506 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -132,6 +132,7 @@ class Runner: pure_bf16=(get("train.compute_type") == "pure_bf16"), plot_loss=True, ddp_timeout=180000000, + include_num_input_tokens_seen=True, ) # checkpoints From d11ede9dc2991ac5b259db00b74e5f6657d7bf1f Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Wed, 5 Jun 2024 07:06:40 +0000 Subject: [PATCH 062/162] add npu for model export Former-commit-id: ce020b6eb3f35c1db37ee4835e694eddcd0f59b0 --- src/llamafactory/hparams/model_args.py | 4 ++-- src/llamafactory/webui/components/export.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 7003cbee..99c02850 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -145,9 +145,9 @@ class ModelArguments: default=1, metadata={"help": "The file shard size (in GB) of the exported model."}, ) - export_device: Literal["cpu", "cuda"] = field( + export_device: Literal["cpu", "cuda", "npu"] = field( default="cpu", - metadata={"help": "The device used in model export, use cuda to avoid addmm errors."}, + metadata={"help": "The device used in model export, use cuda to avoid addmm errors; use npu/cuda to speed up exporting."}, ) export_quantization_bit: Optional[int] = field( default=None, diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py index 2f354011..32bedffb 100644 --- a/src/llamafactory/webui/components/export.py +++ b/src/llamafactory/webui/components/export.py @@ -89,7 +89,7 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]: export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1) export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none") export_quantization_dataset = gr.Textbox(value="data/c4_demo.json") - export_device = gr.Radio(choices=["cpu", "cuda"], value="cpu") + export_device = gr.Radio(choices=["cpu", "cuda", "npu"], value="cpu") export_legacy_format = gr.Checkbox() with gr.Row(): From 04a7065830b04e4973f3e34fe4db05103e110a78 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 5 Jun 2024 15:16:38 +0800 Subject: [PATCH 063/162] support glm-4 Former-commit-id: a10f4718fbf3f3c89dc7eb31cb8e1a46ca6adda5 --- README.md | 9 ++++-- README_zh.md | 9 ++++-- src/llamafactory/data/template.py | 13 +++++++++ src/llamafactory/extras/constants.py | 42 ++++++++++++++++++++-------- 4 files changed, 56 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 95c0c345..7df6cb09 100644 --- a/README.md +++ b/README.md @@ -71,14 +71,16 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog +[24/06/05] We supported fine-tuning the **GLM-4-9B** and **GLM-4-9B-Chat** models. + [24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage. [24/05/20] We supported fine-tuning the **PaliGemma** series models. Note that the PaliGemma models are pre-trained models, you need to fine-tune them with `gemma` template for chat completion. -[24/05/18] We supported **[KTO](https://arxiv.org/abs/2402.01306)** algorithm for preference learning. See [examples](examples/README.md) for usage. -
Full Changelog +[24/05/18] We supported **[KTO](https://arxiv.org/abs/2402.01306)** algorithm for preference learning. See [examples](examples/README.md) for usage. + [24/05/14] We supported training and inference on the Ascend NPU devices. Check [installation](#installation) section for details. [24/04/26] We supported fine-tuning the **LLaVA-1.5** multimodal LLMs. See [examples](examples/README.md) for usage. @@ -157,6 +159,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | q_proj,v_proj | deepseek | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | query_key_value | falcon | | [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | q_proj,v_proj | gemma | +| [GLM4](https://huggingface.co/THUDM) | 9B | query_key_value | glm4 | | [InternLM2](https://huggingface.co/internlm) | 7B/20B | wqkv | intern2 | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - | | [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 | @@ -531,7 +534,7 @@ If you have a project that should be incorporated, please contact via email or c This repository is licensed under the [Apache-2.0 License](LICENSE). -Please follow the model licenses to use the corresponding model weights: [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) +Please follow the model licenses to use the corresponding model weights: [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) ## Citation diff --git a/README_zh.md b/README_zh.md index b10db1e5..bdb61b8b 100644 --- a/README_zh.md +++ b/README_zh.md @@ -71,14 +71,16 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd ## 更新日志 +[24/06/05] 我们支持了 **GLM-4-9B** 和 **GLM-4-9B-Chat** 模型的微调。 + [24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。 [24/05/20] 我们支持了 **PaliGemma** 系列模型的微调。注意 PaliGemma 是预训练模型,你需要使用 `gemma` 模板进行微调使其获得对话能力。 -[24/05/18] 我们支持了 **[KTO](https://arxiv.org/abs/2402.01306)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。 -
展开日志 +[24/05/18] 我们支持了 **[KTO](https://arxiv.org/abs/2402.01306)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。 + [24/05/14] 我们支持了昇腾 NPU 设备的训练和推理。详情请查阅[安装](#安装-llama-factory)部分。 [24/04/26] 我们支持了多模态模型 **LLaVA-1.5** 的微调。详细用法请参照 [examples](examples/README_zh.md)。 @@ -157,6 +159,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd | [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | q_proj,v_proj | deepseek | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | query_key_value | falcon | | [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | q_proj,v_proj | gemma | +| [GLM4](https://huggingface.co/THUDM) | 9B | query_key_value | glm4 | | [InternLM2](https://huggingface.co/internlm) | 7B/20B | wqkv | intern2 | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - | | [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 | @@ -529,7 +532,7 @@ run_name: test_run # 可选 本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。 -使用模型权重时,请遵循对应的模型协议:[Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) +使用模型权重时,请遵循对应的模型协议:[Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) ## 引用 diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 00527b44..fe0211c6 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -658,6 +658,19 @@ _register_template( ) +_register_template( + name="glm4", + format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]), + format_assistant=StringFormatter(slots=["\n{{content}}"]), + format_system=StringFormatter(slots=["[gMASK]{{content}}"]), + format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]), + format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]), + stop_words=["<|user|>", "<|observation|>"], + efficient_eos=True, + force_system=True, +) + + _register_template( name="intern", format_user=StringFormatter(slots=["<|User|>:{{content}}", {"token": ""}, "\n<|Bot|>:"]), diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index f365016f..4d7685c5 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -281,6 +281,26 @@ register_model_group( ) +register_model_group( + models={ + "CodeGemma-7B": { + DownloadSource.DEFAULT: "google/codegemma-7b", + }, + "CodeGemma-7B-Chat": { + DownloadSource.DEFAULT: "google/codegemma-7b-it", + DownloadSource.MODELSCOPE: "AI-ModelScope/codegemma-7b-it", + }, + "CodeGemma-1.1-2B": { + DownloadSource.DEFAULT: "google/codegemma-1.1-2b", + }, + "CodeGemma-1.1-7B-Chat": { + DownloadSource.DEFAULT: "google/codegemma-1.1-7b-it", + }, + }, + template="gemma", +) + + register_model_group( models={ "CommandR-35B-Chat": { @@ -469,21 +489,21 @@ register_model_group( register_model_group( models={ - "CodeGemma-7B": { - DownloadSource.DEFAULT: "google/codegemma-7b", + "GLM-4-9B": { + DownloadSource.DEFAULT: "THUDM/glm-4-9b", + DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b", }, - "CodeGemma-7B-Chat": { - DownloadSource.DEFAULT: "google/codegemma-7b-it", - DownloadSource.MODELSCOPE: "AI-ModelScope/codegemma-7b-it", + "GLM-4-9B-Chat": { + DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat", + DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat", }, - "CodeGemma-1.1-2B": { - DownloadSource.DEFAULT: "google/codegemma-1.1-2b", - }, - "CodeGemma-1.1-7B-Chat": { - DownloadSource.DEFAULT: "google/codegemma-1.1-7b-it", + "GLM-4-9B-1M-Chat": { + DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat-1m", + DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat-1m", }, }, - template="gemma", + module="query_key_value", + template="glm4", ) From 48b2d0d12b4281d27d87a0ba4e4b786d2e63e092 Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Wed, 5 Jun 2024 08:03:30 +0000 Subject: [PATCH 064/162] fix #4077 Former-commit-id: fedbe92f3b56294acc6c49f9a51e369cf2de3ead --- src/llamafactory/cli.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index c14ae6ec..8a229a38 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -70,7 +70,11 @@ def main(): elif command == Command.EXPORT: export_model() elif command == Command.TRAIN: - if get_device_count() > 1: + if get_device_count() > 0: + # NOTE (MengqingCao): why use torchrun when only one accelerator is available? + # DeepSpeed only warp model with DeepSpeedEngine when launching by distributed launcher, + # e.g., torchrun, causing some feature missing + # sa: https://github.com/huggingface/transformers/issues/24309 master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999))) logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port)) From 3d2b0813318fa66226eb2703aff814322bf07a8f Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 5 Jun 2024 16:32:32 +0800 Subject: [PATCH 065/162] update readme Former-commit-id: 02d34db29a7a35c25711d49e98fd3167a2f4dfe7 --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7df6cb09..5e8bc8eb 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog -[24/06/05] We supported fine-tuning the **GLM-4-9B** and **GLM-4-9B-Chat** models. +[24/06/05] We supported fine-tuning the **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** models. [24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage. diff --git a/README_zh.md b/README_zh.md index bdb61b8b..d8e17b29 100644 --- a/README_zh.md +++ b/README_zh.md @@ -71,7 +71,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd ## 更新日志 -[24/06/05] 我们支持了 **GLM-4-9B** 和 **GLM-4-9B-Chat** 模型的微调。 +[24/06/05] 我们支持了 **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** 模型的微调。 [24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。 From 861d93a06a7621cfba4c05224fa40dabb80966c2 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 5 Jun 2024 16:56:54 +0800 Subject: [PATCH 066/162] fix #4079 Former-commit-id: fda732d7f4616373844c97beff416880260f49db --- requirements.txt | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e17954e3..9e00555e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ gradio>=4.0.0 scipy einops sentencepiece +tiktoken protobuf uvicorn pydantic diff --git a/setup.py b/setup.py index 23f532e7..54f971ca 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ extra_require = { "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"], "awq": ["autoawq"], "aqlm": ["aqlm[gpu]>=1.1.0"], - "qwen": ["tiktoken", "transformers_stream_generator"], + "qwen": ["transformers_stream_generator"], "modelscope": ["modelscope"], "quality": ["ruff"], } From 80897e3d6fda7ecf27d78c30a9cd457d65969bbd Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Wed, 5 Jun 2024 09:37:36 +0000 Subject: [PATCH 067/162] modify export_device option Former-commit-id: b2fc4a5499e21a5b9622c2285402efef6e27a74d --- src/llamafactory/webui/components/export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py index 32bedffb..791a833b 100644 --- a/src/llamafactory/webui/components/export.py +++ b/src/llamafactory/webui/components/export.py @@ -89,7 +89,7 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]: export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1) export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none") export_quantization_dataset = gr.Textbox(value="data/c4_demo.json") - export_device = gr.Radio(choices=["cpu", "cuda", "npu"], value="cpu") + export_device = gr.Radio(choices=["cpu", "cuda/npu"], value="cpu") export_legacy_format = gr.Checkbox() with gr.Row(): From fc053cf81f0cc135e5fce7fcb4880fcbc7ec3db2 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 00:50:32 +0800 Subject: [PATCH 068/162] fix #4090 Former-commit-id: d9f15f30a8f4bc64778a5c96baeb6801700d7a2c --- requirements.txt | 2 +- src/llamafactory/extras/misc.py | 2 +- src/llamafactory/train/dpo/trainer.py | 29 ++++++++++----------------- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9e00555e..7b6cbee9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ transformers>=4.41.2 datasets>=2.16.0 accelerate>=0.30.1 peft>=0.11.1 -trl>=0.8.6 +trl>=0.9.3 gradio>=4.0.0 scipy einops diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 638c24cf..78f71847 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -65,7 +65,7 @@ def check_dependencies() -> None: require_version("datasets>=2.16.0", "To fix: pip install datasets>=2.16.0") require_version("accelerate>=0.30.1", "To fix: pip install accelerate>=0.30.1") require_version("peft>=0.11.1", "To fix: pip install peft>=0.11.1") - require_version("trl>=0.8.6", "To fix: pip install trl>=0.8.6") + require_version("trl>=0.9.3", "To fix: pip install trl>=0.9.3") def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index ec1de810..2bbe6a06 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -93,18 +93,6 @@ class CustomDPOTrainer(DPOTrainer): output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) - def sft_loss(self, batch: Dict[str, "torch.Tensor"], chosen_logits: "torch.FloatTensor") -> "torch.Tensor": - r""" - Computes supervised cross-entropy loss of given labels under the given logits. - - Returns: - A tensor of shape (batch_size,) containing the cross-entropy loss of each samples. - """ - batch_size = batch["input_ids"].size(0) // 2 - chosen_labels, _ = batch["labels"].split(batch_size, dim=0) - chosen_logps = self.get_batch_logps(chosen_logits, chosen_labels, average_log_prob=True) - return -chosen_logps - def odds_ratio_loss(self, chosen_logps: "torch.Tensor", rejected_logps: "torch.Tensor") -> "torch.Tensor": r""" Computes ORPO's odds ratio (OR) loss for batched log probabilities of the policy model. @@ -156,9 +144,9 @@ class CustomDPOTrainer(DPOTrainer): def concatenated_forward( self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"] - ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]: + ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]: r""" - Computes the sum log probabilities of the labels under the given logits if loss_type != IPO. + Computes the sum log probabilities of the labels under given logits if loss_type is not IPO, ORPO or SimPO. Otherwise the average log probabilities. """ @@ -167,17 +155,20 @@ class CustomDPOTrainer(DPOTrainer): all_logits: "torch.Tensor" = model(**batch, return_dict=True, use_cache=False).logits.to(torch.float32) - all_logps = self.get_batch_logps( + all_logps, valid_length = self.get_batch_logps( logits=all_logits, labels=batch["labels"], - average_log_prob=(self.loss_type in ["ipo", "orpo", "simpo"]), is_encoder_decoder=self.is_encoder_decoder, label_pad_token_id=self.label_pad_token_id, ) + if self.loss_type in ["ipo", "orpo", "simpo"]: + all_logps = all_logps / valid_length + batch_size = batch["input_ids"].size(0) // 2 chosen_logps, rejected_logps = all_logps.split(batch_size, dim=0) chosen_logits, rejected_logits = all_logits.split(batch_size, dim=0) - return chosen_logps, rejected_logps, chosen_logits, rejected_logits + chosen_length, _ = valid_length.split(batch_size, dim=0) + return chosen_logps, rejected_logps, chosen_logits, rejected_logits, chosen_logps / chosen_length def compute_reference_log_probs( self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"] @@ -201,6 +192,7 @@ class CustomDPOTrainer(DPOTrainer): reference_rejected_logps, _, _, + _, ) = self.concatenated_forward(ref_model, batch) return reference_chosen_logps, reference_rejected_logps @@ -220,6 +212,7 @@ class CustomDPOTrainer(DPOTrainer): policy_rejected_logps, policy_chosen_logits, policy_rejected_logits, + policy_chosen_logps_avg, ) = self.concatenated_forward(model, batch) reference_chosen_logps, reference_rejected_logps = self.compute_reference_log_probs(model, batch) @@ -229,7 +222,7 @@ class CustomDPOTrainer(DPOTrainer): reference_chosen_logps, reference_rejected_logps, ) - sft_loss = self.sft_loss(batch, policy_chosen_logits) # compute chosen_logps with masks + sft_loss = -policy_chosen_logps_avg if self.ftx_gamma > 1e-6: losses += self.ftx_gamma * sft_loss From 1935f4a1e09d0ea171ac7aeb93c0edfb82705a29 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 01:28:14 +0800 Subject: [PATCH 069/162] add llamafactory-cli env Former-commit-id: 1df077184845ff5f394b9324d46f8c382869e590 --- .github/ISSUE_TEMPLATE/bug-report.yml | 6 +-- src/llamafactory/cli.py | 6 ++- src/llamafactory/extras/env.py | 54 +++++++++++++++++++++++++++ src/llamafactory/extras/packages.py | 4 ++ 4 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 src/llamafactory/extras/env.py diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 82620fdb..1d962200 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -20,10 +20,10 @@ body: attributes: label: System Info description: | - Please share your system info with us. You can run the command **transformers-cli env** and copy-paste its output below. - 请提供您的系统信息。您可以在命令行运行 **transformers-cli env** 并将其输出复制到该文本框中。 + Please share your system info with us. You can run the command **llamafactory-cli env** and copy-paste its output below. + 请提供您的系统信息。您可以在命令行运行 **llamafactory-cli env** 并将其输出复制到该文本框中。 - placeholder: transformers version, platform, python version, ... + placeholder: llamafactory version, platform, python version, ... - type: textarea id: reproduction diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index c14ae6ec..fbe18d86 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -8,6 +8,7 @@ from . import launcher from .api.app import run_api from .chat.chat_model import run_chat from .eval.evaluator import run_eval +from .extras.env import VERSION, print_env from .extras.logging import get_logger from .extras.misc import get_device_count from .train.tuner import export_model, run_exp @@ -29,8 +30,6 @@ USAGE = ( + "-" * 70 ) -VERSION = "0.7.2.dev0" - WELCOME = ( "-" * 58 + "\n" @@ -50,6 +49,7 @@ logger = get_logger(__name__) class Command(str, Enum): API = "api" CHAT = "chat" + ENV = "env" EVAL = "eval" EXPORT = "export" TRAIN = "train" @@ -65,6 +65,8 @@ def main(): run_api() elif command == Command.CHAT: run_chat() + elif command == Command.ENV: + print_env() elif command == Command.EVAL: run_eval() elif command == Command.EXPORT: diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py new file mode 100644 index 00000000..27453a6b --- /dev/null +++ b/src/llamafactory/extras/env.py @@ -0,0 +1,54 @@ +import platform + +import accelerate +import datasets +import peft +import torch +import transformers +import trl +from transformers.utils import is_bitsandbytes_available, is_torch_cuda_available, is_torch_npu_available + +from .packages import is_deepspeed_available, is_vllm_available + + +VERSION = "0.7.2.dev0" + + +def print_env() -> None: + info = { + "`llamafactory` version": VERSION, + "Platform": platform.platform(), + "Python version": platform.python_version(), + "PyTorch version": torch.__version__, + "Transformers version": transformers.__version__, + "Datasets version": datasets.__version__, + "Accelerate version": accelerate.__version__, + "PEFT version": peft.__version__, + "TRL version": trl.__version__, + } + + if is_torch_cuda_available(): + info["PyTorch version"] += " (GPU)" + info["GPU type"] = torch.cuda.get_device_name() + + if is_torch_npu_available(): + info["PyTorch version"] += " (NPU)" + info["NPU type"] = torch.npu.get_device_name() + info["CANN version"] = torch.version.cann + + if is_deepspeed_available(): + import deepspeed # type: ignore + + info["DeepSpeed version"] = deepspeed.__version__ + + if is_bitsandbytes_available(): + import bitsandbytes + + info["Bitsandbytes version"] = bitsandbytes.__version__ + + if is_vllm_available(): + import vllm + + info["vLLM version"] = vllm.__version__ + + print("\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n") diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py index 4c9e6492..fe056e2d 100644 --- a/src/llamafactory/extras/packages.py +++ b/src/llamafactory/extras/packages.py @@ -20,6 +20,10 @@ def _get_package_version(name: str) -> "Version": return version.parse("0.0.0") +def is_deepspeed_available(): + return _is_package_available("deepspeed") + + def is_fastapi_available(): return _is_package_available("fastapi") From eff00a8172c0573b76a0949b21dd75a089679406 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 01:39:02 +0800 Subject: [PATCH 070/162] fix setup Former-commit-id: b2b80d434fcc0c3838d229098e1c21d26632204c --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 54f971ca..7a5b9304 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup def get_version(): - with open(os.path.join("src", "llamafactory", "cli.py"), "r", encoding="utf-8") as f: + with open(os.path.join("src", "llamafactory", "extras", "env.py"), "r", encoding="utf-8") as f: file_content = f.read() pattern = r"{}\W*=\W*\"([^\"]+)\"".format("VERSION") (version,) = re.findall(pattern, file_content) From 0b671615d0b2fc02a752a7f35592809fd4841054 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 01:49:20 +0800 Subject: [PATCH 071/162] update train hparams Former-commit-id: 1ca9fce55b55bf209f4b76152b586731932a3f39 --- examples/extras/badam/llama3_lora_sft.yaml | 2 +- examples/extras/fsdp_qlora/llama3_lora_sft.yaml | 2 +- examples/extras/galore/llama3_full_sft.yaml | 2 +- examples/extras/llama_pro/llama3_freeze_sft.yaml | 2 +- examples/extras/loraplus/llama3_lora_sft.yaml | 2 +- examples/extras/mod/llama3_full_sft.yaml | 2 +- examples/full_multi_gpu/llama3_full_sft.yaml | 2 +- examples/lora_multi_gpu/llama3_lora_sft.yaml | 2 +- examples/lora_multi_gpu/llama3_lora_sft_ds.yaml | 2 +- examples/lora_multi_npu/llama3_lora_sft_ds.yaml | 2 +- examples/lora_single_gpu/llama3_lora_dpo.yaml | 2 +- examples/lora_single_gpu/llama3_lora_kto.yaml | 2 +- examples/lora_single_gpu/llama3_lora_pretrain.yaml | 2 +- examples/lora_single_gpu/llama3_lora_reward.yaml | 2 +- examples/lora_single_gpu/llama3_lora_sft.yaml | 2 +- examples/lora_single_gpu/llava1_5_lora_sft.yaml | 2 +- examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml | 2 +- examples/qlora_single_gpu/llama3_lora_sft_awq.yaml | 2 +- examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml | 2 +- examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml | 2 +- src/llamafactory/extras/env.py | 2 +- src/llamafactory/webui/runner.py | 2 +- 22 files changed, 22 insertions(+), 22 deletions(-) diff --git a/examples/extras/badam/llama3_lora_sft.yaml b/examples/extras/badam/llama3_lora_sft.yaml index 242e63ab..a78de2fa 100644 --- a/examples/extras/badam/llama3_lora_sft.yaml +++ b/examples/extras/badam/llama3_lora_sft.yaml @@ -37,5 +37,5 @@ pure_bf16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml index 920d8fdb..348459b8 100644 --- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml +++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml @@ -38,5 +38,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/extras/galore/llama3_full_sft.yaml b/examples/extras/galore/llama3_full_sft.yaml index 3db31fed..605545de 100644 --- a/examples/extras/galore/llama3_full_sft.yaml +++ b/examples/extras/galore/llama3_full_sft.yaml @@ -38,5 +38,5 @@ pure_bf16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/extras/llama_pro/llama3_freeze_sft.yaml b/examples/extras/llama_pro/llama3_freeze_sft.yaml index 214f411a..444a1113 100644 --- a/examples/extras/llama_pro/llama3_freeze_sft.yaml +++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml @@ -36,5 +36,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml index 9936bcd3..960f613e 100644 --- a/examples/extras/loraplus/llama3_lora_sft.yaml +++ b/examples/extras/loraplus/llama3_lora_sft.yaml @@ -35,5 +35,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/extras/mod/llama3_full_sft.yaml b/examples/extras/mod/llama3_full_sft.yaml index edfec44e..df03c1e0 100644 --- a/examples/extras/mod/llama3_full_sft.yaml +++ b/examples/extras/mod/llama3_full_sft.yaml @@ -35,5 +35,5 @@ pure_bf16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/full_multi_gpu/llama3_full_sft.yaml b/examples/full_multi_gpu/llama3_full_sft.yaml index b8873e3a..40b62f24 100644 --- a/examples/full_multi_gpu/llama3_full_sft.yaml +++ b/examples/full_multi_gpu/llama3_full_sft.yaml @@ -37,5 +37,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/lora_multi_gpu/llama3_lora_sft.yaml b/examples/lora_multi_gpu/llama3_lora_sft.yaml index 5e5dd9e6..9be3c780 100644 --- a/examples/lora_multi_gpu/llama3_lora_sft.yaml +++ b/examples/lora_multi_gpu/llama3_lora_sft.yaml @@ -37,5 +37,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml index e8dee216..41152243 100644 --- a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml +++ b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml @@ -38,5 +38,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml b/examples/lora_multi_npu/llama3_lora_sft_ds.yaml index 825b8450..1ed24d04 100644 --- a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml +++ b/examples/lora_multi_npu/llama3_lora_sft_ds.yaml @@ -38,5 +38,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/lora_single_gpu/llama3_lora_dpo.yaml index 62752e57..158c9e04 100644 --- a/examples/lora_single_gpu/llama3_lora_dpo.yaml +++ b/examples/lora_single_gpu/llama3_lora_dpo.yaml @@ -36,5 +36,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/lora_single_gpu/llama3_lora_kto.yaml b/examples/lora_single_gpu/llama3_lora_kto.yaml index 6f689818..ead221e9 100644 --- a/examples/lora_single_gpu/llama3_lora_kto.yaml +++ b/examples/lora_single_gpu/llama3_lora_kto.yaml @@ -34,5 +34,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/lora_single_gpu/llama3_lora_pretrain.yaml b/examples/lora_single_gpu/llama3_lora_pretrain.yaml index 54c5d89a..9167a893 100644 --- a/examples/lora_single_gpu/llama3_lora_pretrain.yaml +++ b/examples/lora_single_gpu/llama3_lora_pretrain.yaml @@ -33,5 +33,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/lora_single_gpu/llama3_lora_reward.yaml b/examples/lora_single_gpu/llama3_lora_reward.yaml index c82f9414..91663057 100644 --- a/examples/lora_single_gpu/llama3_lora_reward.yaml +++ b/examples/lora_single_gpu/llama3_lora_reward.yaml @@ -34,5 +34,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/lora_single_gpu/llama3_lora_sft.yaml b/examples/lora_single_gpu/llama3_lora_sft.yaml index 429cb6af..cc93d05a 100644 --- a/examples/lora_single_gpu/llama3_lora_sft.yaml +++ b/examples/lora_single_gpu/llama3_lora_sft.yaml @@ -34,5 +34,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/lora_single_gpu/llava1_5_lora_sft.yaml b/examples/lora_single_gpu/llava1_5_lora_sft.yaml index acab4884..95c1d40d 100644 --- a/examples/lora_single_gpu/llava1_5_lora_sft.yaml +++ b/examples/lora_single_gpu/llava1_5_lora_sft.yaml @@ -35,5 +35,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml index 53cc12e2..23301de5 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml @@ -34,5 +34,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml index 1a92f822..40a290a3 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml @@ -34,5 +34,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml index c7f72c66..6652d8cf 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml @@ -35,5 +35,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml index 45caf17c..323ea7c6 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml @@ -34,5 +34,5 @@ fp16: true ### eval val_size: 0.1 per_device_eval_batch_size: 1 -evaluation_strategy: steps +eval_strategy: steps eval_steps: 500 diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index 27453a6b..059730f1 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -51,4 +51,4 @@ def print_env() -> None: info["vLLM version"] = vllm.__version__ - print("\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n") + print("\n" + "\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n") diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 6e1facef..d35fd903 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -200,7 +200,7 @@ class Runner: # eval config if get("train.val_size") > 1e-6 and args["stage"] != "ppo": args["val_size"] = get("train.val_size") - args["evaluation_strategy"] = "steps" + args["eval_strategy"] = "steps" args["eval_steps"] = args["save_steps"] args["per_device_eval_batch_size"] = args["per_device_train_batch_size"] From 7226fe780d4511f9f34cb5ccf084132f4fc3e2d9 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 02:29:55 +0800 Subject: [PATCH 072/162] support image input in api #3971 #4061 Former-commit-id: c70aaf763ef22fb83ce3635e8ffd5ec4c89c1cb0 --- README.md | 3 +++ README_zh.md | 3 +++ src/llamafactory/api/chat.py | 39 ++++++++++++++++++++++++++------ src/llamafactory/api/protocol.py | 12 +++++++++- 4 files changed, 49 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 5e8bc8eb..3eebf355 100644 --- a/README.md +++ b/README.md @@ -456,6 +456,9 @@ docker compose -f ./docker-compose.yml up -d CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml ``` +> [!TIP] +> Visit https://platform.openai.com/docs/api-reference/chat/create for API document. + ### Download from ModelScope Hub If you have trouble with downloading models and datasets from Hugging Face, you can use ModelScope. diff --git a/README_zh.md b/README_zh.md index d8e17b29..09a7f330 100644 --- a/README_zh.md +++ b/README_zh.md @@ -454,6 +454,9 @@ docker compose -f ./docker-compose.yml up -d CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml ``` +> [!TIP] +> API 文档请查阅 https://platform.openai.com/docs/api-reference/chat/create。 + ### 从魔搭社区下载 如果您在 Hugging Face 模型和数据集的下载中遇到了问题,可以通过下述方法使用魔搭社区。 diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py index b7a08f0b..712b6940 100644 --- a/src/llamafactory/api/chat.py +++ b/src/llamafactory/api/chat.py @@ -1,10 +1,11 @@ import json +import os import uuid from typing import TYPE_CHECKING, AsyncGenerator, Dict, List, Optional, Tuple from ..data import Role as DataRole from ..extras.logging import get_logger -from ..extras.packages import is_fastapi_available +from ..extras.packages import is_fastapi_available, is_pillow_available from .common import dictify, jsonify from .protocol import ( ChatCompletionMessage, @@ -25,7 +26,14 @@ if is_fastapi_available(): from fastapi import HTTPException, status +if is_pillow_available(): + import requests + from PIL import Image + + if TYPE_CHECKING: + from numpy.typing import NDArray + from ..chat import ChatModel from .protocol import ChatCompletionRequest, ScoreEvaluationRequest @@ -40,7 +48,9 @@ ROLE_MAPPING = { } -def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, str]], str, str]: +def _process_request( + request: "ChatCompletionRequest", +) -> Tuple[List[Dict[str, str]], Optional[str], Optional[str], Optional["NDArray"]]: logger.info("==== request ====\n{}".format(json.dumps(dictify(request), indent=2, ensure_ascii=False))) if len(request.messages) == 0: @@ -49,12 +59,13 @@ def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, s if request.messages[0].role == Role.SYSTEM: system = request.messages.pop(0).content else: - system = "" + system = None if len(request.messages) % 2 == 0: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only supports u/a/u/a/u...") input_messages = [] + image = None for i, message in enumerate(request.messages): if i % 2 == 0 and message.role not in [Role.USER, Role.TOOL]: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role") @@ -66,6 +77,18 @@ def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, s arguments = message.tool_calls[0].function.arguments content = json.dumps({"name": name, "argument": arguments}, ensure_ascii=False) input_messages.append({"role": ROLE_MAPPING[Role.FUNCTION], "content": content}) + elif isinstance(message.content, list): + for input_item in message.content: + if input_item.type == "text": + input_messages.append({"role": ROLE_MAPPING[message.role], "content": input_item.text}) + else: + image_url = input_item.image_url.url + if os.path.isfile(image_url): + image_path = open(image_url, "rb") + else: + image_path = requests.get(image_url, stream=True).raw + + image = Image.open(image_path).convert("RGB") else: input_messages.append({"role": ROLE_MAPPING[message.role], "content": message.content}) @@ -76,9 +99,9 @@ def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, s except Exception: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid tools") else: - tools = "" + tools = None - return input_messages, system, tools + return input_messages, system, tools, image def _create_stream_chat_completion_chunk( @@ -97,11 +120,12 @@ async def create_chat_completion_response( request: "ChatCompletionRequest", chat_model: "ChatModel" ) -> "ChatCompletionResponse": completion_id = "chatcmpl-{}".format(uuid.uuid4().hex) - input_messages, system, tools = _process_request(request) + input_messages, system, tools, image = _process_request(request) responses = await chat_model.achat( input_messages, system, tools, + image, do_sample=request.do_sample, temperature=request.temperature, top_p=request.top_p, @@ -145,7 +169,7 @@ async def create_stream_chat_completion_response( request: "ChatCompletionRequest", chat_model: "ChatModel" ) -> AsyncGenerator[str, None]: completion_id = "chatcmpl-{}".format(uuid.uuid4().hex) - input_messages, system, tools = _process_request(request) + input_messages, system, tools, image = _process_request(request) if tools: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream function calls.") @@ -159,6 +183,7 @@ async def create_stream_chat_completion_response( input_messages, system, tools, + image, do_sample=request.do_sample, temperature=request.temperature, top_p=request.top_p, diff --git a/src/llamafactory/api/protocol.py b/src/llamafactory/api/protocol.py index 525fa6a7..055fa781 100644 --- a/src/llamafactory/api/protocol.py +++ b/src/llamafactory/api/protocol.py @@ -56,9 +56,19 @@ class FunctionCall(BaseModel): function: Function +class ImageURL(BaseModel): + url: str + + +class MultimodalInputItem(BaseModel): + type: Literal["text", "image_url"] + text: Optional[str] = None + image_url: Optional[ImageURL] = None + + class ChatMessage(BaseModel): role: Role - content: Optional[str] = None + content: Optional[Union[str, List[MultimodalInputItem]]] = None tool_calls: Optional[List[FunctionCall]] = None From c91655e952f5dec7cc5bb898478a4b0f40533045 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 02:43:19 +0800 Subject: [PATCH 073/162] support train from scratch #4033 #4075 Former-commit-id: 1290b9d01077e62f8de7a23637daa2586cc82bfa --- src/llamafactory/hparams/model_args.py | 4 ++++ src/llamafactory/model/loader.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 7003cbee..a3b5b2a6 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -101,6 +101,10 @@ class ModelArguments: default=False, metadata={"help": "Whether or not to upcast the output of lm_head in fp32."}, ) + train_from_scratch: bool = field( + default=False, + metadata={"help": "Whether or not to randomly initialize the model weights."}, + ) infer_backend: Literal["huggingface", "vllm"] = field( default="huggingface", metadata={"help": "Backend engine used at inference."}, diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index 49b347d5..8f3309b3 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -131,6 +131,8 @@ def load_model( model = load_mod_pretrained_model(**init_kwargs) elif model_args.visual_inputs: model = AutoModelForVision2Seq.from_pretrained(**init_kwargs) + elif model_args.train_from_scratch: + model = AutoModelForCausalLM.from_config(config) else: model = AutoModelForCausalLM.from_pretrained(**init_kwargs) From c955a332d7b58dd029ff20e5ec97b58927c2d265 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 02:53:27 +0800 Subject: [PATCH 074/162] add vllm_dtype arg #3387 #3717 Former-commit-id: a0dd3a6351bb78541d40fec1d2fc457d803c86a4 --- src/llamafactory/chat/vllm_engine.py | 7 ++----- src/llamafactory/hparams/model_args.py | 4 ++++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index 8a067754..e193704a 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Opt from ..data import get_template_and_fix_tokenizer from ..extras.logging import get_logger -from ..extras.misc import get_device_count, infer_optim_dtype +from ..extras.misc import get_device_count from ..extras.packages import is_vllm_available from ..model import load_config, load_tokenizer from ..model.utils.visual import LlavaMultiModalProjectorForYiVLForVLLM @@ -35,8 +35,6 @@ class VllmEngine(BaseEngine): generating_args: "GeneratingArguments", ) -> None: config = load_config(model_args) # may download model from ms hub - infer_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None)) - infer_dtype = str(infer_dtype).split(".")[-1] self.can_generate = finetuning_args.stage == "sft" tokenizer_module = load_tokenizer(model_args) @@ -50,7 +48,7 @@ class VllmEngine(BaseEngine): "model": model_args.model_name_or_path, "trust_remote_code": True, "download_dir": model_args.cache_dir, - "dtype": infer_dtype, + "dtype": model_args.vllm_dtype, "max_model_len": model_args.vllm_maxlen, "tensor_parallel_size": get_device_count() or 1, "gpu_memory_utilization": model_args.vllm_gpu_util, @@ -70,7 +68,6 @@ class VllmEngine(BaseEngine): engine_args["image_input_shape"] = "1,3,{},{}".format(image_size, image_size) engine_args["image_feature_size"] = self.image_feature_size if getattr(config, "is_yi_vl_derived_model", None): - # bug in vllm 0.4.2, see: https://github.com/vllm-project/vllm/pull/4828 import vllm.model_executor.models.llava logger.info("Detected Yi-VL model, applying projector patch.") diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index a3b5b2a6..0434f426 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -125,6 +125,10 @@ class ModelArguments: default=8, metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."}, ) + vllm_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field( + default="auto", + metadata={"help": "Data type for model weights and activations in the vLLM engine."}, + ) offload_folder: str = field( default="offload", metadata={"help": "Path to offload model weights."}, From 79a39ca7031163a928dbc107bcf11fc41697ad6a Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 6 Jun 2024 03:14:23 +0800 Subject: [PATCH 075/162] Update model_args.py Former-commit-id: 09c0afd94a8a5f5b45a61b32c983d50e1b9e2941 --- src/llamafactory/hparams/model_args.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 99c02850..024bc2f8 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -145,9 +145,9 @@ class ModelArguments: default=1, metadata={"help": "The file shard size (in GB) of the exported model."}, ) - export_device: Literal["cpu", "cuda", "npu"] = field( + export_device: Literal["cpu", "auto"] = field( default="cpu", - metadata={"help": "The device used in model export, use cuda to avoid addmm errors; use npu/cuda to speed up exporting."}, + metadata={"help": "The device used in model export, use `auto` to accelerate exporting."}, ) export_quantization_bit: Optional[int] = field( default=None, From f002409e6adf7046f67047ad24a41f00ea196075 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 6 Jun 2024 03:14:46 +0800 Subject: [PATCH 076/162] Update export.py Former-commit-id: 694833c1104d13929d4f181f014a121f25955dc5 --- src/llamafactory/webui/components/export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py index 791a833b..7e1493c8 100644 --- a/src/llamafactory/webui/components/export.py +++ b/src/llamafactory/webui/components/export.py @@ -89,7 +89,7 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]: export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1) export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none") export_quantization_dataset = gr.Textbox(value="data/c4_demo.json") - export_device = gr.Radio(choices=["cpu", "cuda/npu"], value="cpu") + export_device = gr.Radio(choices=["cpu", "auto"], value="cpu") export_legacy_format = gr.Checkbox() with gr.Row(): From e9f9b1f250ef3a8a2f784d7a0ba696d3cde24891 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 03:33:44 +0800 Subject: [PATCH 077/162] lint Former-commit-id: 9030501eaef97ea249347198272adf0d709503ec --- src/llamafactory/extras/env.py | 3 ++- src/llamafactory/extras/packages.py | 4 --- src/llamafactory/webui/components/train.py | 14 +++++------ src/llamafactory/webui/engine.py | 2 +- src/llamafactory/webui/utils.py | 29 +++++++++++----------- 5 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index 059730f1..fdccf86b 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -6,9 +6,10 @@ import peft import torch import transformers import trl +from transformers.integrations import is_deepspeed_available from transformers.utils import is_bitsandbytes_available, is_torch_cuda_available, is_torch_npu_available -from .packages import is_deepspeed_available, is_vllm_available +from .packages import is_vllm_available VERSION = "0.7.2.dev0" diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py index fe056e2d..4c9e6492 100644 --- a/src/llamafactory/extras/packages.py +++ b/src/llamafactory/extras/packages.py @@ -20,10 +20,6 @@ def _get_package_version(name: str) -> "Version": return version.parse("0.0.0") -def is_deepspeed_available(): - return _is_package_available("deepspeed") - - def is_fastapi_available(): return _is_package_available("fastapi") diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index eca8f9b3..74f8ef2a 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -6,7 +6,7 @@ from ...extras.constants import TRAINING_STAGES from ...extras.misc import get_device_count from ...extras.packages import is_gradio_available from ..common import DEFAULT_DATA_DIR, list_checkpoints, list_datasets -from ..utils import change_stage, check_output_dir, list_output_dirs, list_config_paths +from ..utils import change_stage, check_output_dir, list_config_paths, list_output_dirs from .data import create_preview_box @@ -257,7 +257,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: with gr.Row(): with gr.Column(scale=3): with gr.Row(): - initial_dir = gr.Textbox(visible=False, interactive=False) + current_time = gr.Textbox(visible=False, interactive=False) output_dir = gr.Dropdown(allow_custom_value=True) config_path = gr.Dropdown(allow_custom_value=True) @@ -284,7 +284,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: arg_load_btn=arg_load_btn, start_btn=start_btn, stop_btn=stop_btn, - initial_dir=initial_dir, + current_time=current_time, output_dir=output_dir, config_path=config_path, device_count=device_count, @@ -315,11 +315,11 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False) training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False) reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False) - model_name.change(list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], queue=False) - finetuning_type.change(list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], queue=False) + model_name.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False) + finetuning_type.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False) output_dir.change( - list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], concurrency_limit=None + list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], concurrency_limit=None ).then(check_output_dir, inputs=[lang, model_name, finetuning_type, output_dir], concurrency_limit=None) - config_path.change(list_config_paths, outputs=[config_path], concurrency_limit=None) + config_path.change(list_config_paths, [current_time], [config_path], queue=False) return elem_dict diff --git a/src/llamafactory/webui/engine.py b/src/llamafactory/webui/engine.py index 00877115..eb6142d3 100644 --- a/src/llamafactory/webui/engine.py +++ b/src/llamafactory/webui/engine.py @@ -41,7 +41,7 @@ class Engine: if not self.pure_chat: current_time = get_time() - init_dict["train.initial_dir"] = {"value": "train_{}".format(current_time)} + init_dict["train.current_time"] = {"value": current_time} init_dict["train.output_dir"] = {"value": "train_{}".format(current_time)} init_dict["train.config_path"] = {"value": "{}.yaml".format(current_time)} init_dict["eval.output_dir"] = {"value": "eval_{}".format(current_time)} diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index 0303aa31..23e62dca 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -174,11 +174,24 @@ def save_args(config_path: str, config_dict: Dict[str, Any]) -> str: return str(get_arg_save_path(config_path)) -def list_output_dirs(model_name: str, finetuning_type: str, initial_dir: str) -> "gr.Dropdown": +def list_config_paths(current_time: str) -> "gr.Dropdown": + r""" + Lists all the saved configuration files. + """ + config_files = ["{}.yaml".format(current_time)] + if os.path.isdir(DEFAULT_CONFIG_DIR): + for file_name in os.listdir(DEFAULT_CONFIG_DIR): + if file_name.endswith(".yaml"): + config_files.append(file_name) + + return gr.Dropdown(choices=config_files) + + +def list_output_dirs(model_name: str, finetuning_type: str, current_time: str) -> "gr.Dropdown": r""" Lists all the directories that can resume from. """ - output_dirs = [initial_dir] + output_dirs = ["train_{}".format(current_time)] if model_name: save_dir = get_save_dir(model_name, finetuning_type) if save_dir and os.path.isdir(save_dir): @@ -190,18 +203,6 @@ def list_output_dirs(model_name: str, finetuning_type: str, initial_dir: str) -> return gr.Dropdown(choices=output_dirs) -def list_config_paths() -> "gr.Dropdown": - """ - Lists all the saved configuration files that can be loaded. - """ - if os.path.exists(DEFAULT_CONFIG_DIR) and os.path.isdir(DEFAULT_CONFIG_DIR): - config_files = [file_name for file_name in os.listdir(DEFAULT_CONFIG_DIR) if file_name.endswith(".yaml")] - else: - config_files = [] - - return gr.Dropdown(choices=config_files) - - def check_output_dir(lang: str, model_name: str, finetuning_type: str, output_dir: str) -> None: r""" Check if output dir exists. From 8d9f3022d2b8832024d0755a636a6330a6153dc9 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 03:42:50 +0800 Subject: [PATCH 078/162] add codestral 22B Former-commit-id: b011c7f527a57cb1d21c4e2c9631c2fb62bb835e --- src/llamafactory/extras/constants.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 4d7685c5..687e16cc 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -301,6 +301,16 @@ register_model_group( ) +register_model_group( + models={ + "Codestral-22B-v0.1-Chat": { + DownloadSource.DEFAULT: "mistralai/Codestral-22B-v0.1", + }, + }, + template="mistral", +) + + register_model_group( models={ "CommandR-35B-Chat": { From 990dd6d44c6c20e256e9e298b06fbbbcad9b0464 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 03:53:28 +0800 Subject: [PATCH 079/162] lora modules: all by default Former-commit-id: 52c4ae87c7f4312704c31ef26b079b2c5b95ea5f --- README.md | 56 ++++++++--------- README_zh.md | 60 +++++++++---------- .../extras/fsdp_qlora/llama3_lora_sft.yaml | 2 +- examples/extras/loraplus/llama3_lora_sft.yaml | 2 +- examples/lora_multi_gpu/llama3_lora_sft.yaml | 2 +- .../lora_multi_gpu/llama3_lora_sft_ds.yaml | 2 +- .../lora_multi_npu/llama3_lora_sft_ds.yaml | 2 +- examples/lora_single_gpu/llama3_lora_dpo.yaml | 2 +- examples/lora_single_gpu/llama3_lora_kto.yaml | 2 +- examples/lora_single_gpu/llama3_lora_ppo.yaml | 2 +- .../lora_single_gpu/llama3_lora_pretrain.yaml | 2 +- .../lora_single_gpu/llama3_lora_reward.yaml | 2 +- examples/lora_single_gpu/llama3_lora_sft.yaml | 2 +- .../lora_single_gpu/llama3_preprocess.yaml | 2 +- .../lora_single_gpu/llava1_5_lora_sft.yaml | 2 +- .../llama3_lora_sft_aqlm.yaml | 2 +- .../qlora_single_gpu/llama3_lora_sft_awq.yaml | 2 +- .../llama3_lora_sft_bitsandbytes.yaml | 2 +- .../llama3_lora_sft_gptq.yaml | 2 +- src/llamafactory/extras/constants.py | 19 ------ src/llamafactory/hparams/finetuning_args.py | 15 +---- src/llamafactory/webui/common.py | 8 --- src/llamafactory/webui/runner.py | 4 +- 23 files changed, 78 insertions(+), 118 deletions(-) diff --git a/README.md b/README.md index 3eebf355..f3ced20e 100644 --- a/README.md +++ b/README.md @@ -149,34 +149,34 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Supported Models -| Model | Model size | Default module | Template | -| -------------------------------------------------------- | -------------------------------- | ----------------- | --------- | -| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | W_pack | baichuan2 | -| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - | -| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - | -| [ChatGLM3](https://huggingface.co/THUDM) | 6B | query_key_value | chatglm3 | -| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | q_proj,v_proj | cohere | -| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | q_proj,v_proj | deepseek | -| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | query_key_value | falcon | -| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | q_proj,v_proj | gemma | -| [GLM4](https://huggingface.co/THUDM) | 9B | query_key_value | glm4 | -| [InternLM2](https://huggingface.co/internlm) | 7B/20B | wqkv | intern2 | -| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - | -| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 | -| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | q_proj,v_proj | llama3 | -| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | q_proj,v_proj | vicuna | -| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | q_proj,v_proj | mistral | -| [OLMo](https://huggingface.co/allenai) | 1B/7B | q_proj,v_proj | - | -| [PaliGemma](https://huggingface.co/google) | 3B | q_proj,v_proj | gemma | -| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | q_proj,v_proj | - | -| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | qkv_proj | phi | -| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | c_attn | qwen | -| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | q_proj,v_proj | qwen | -| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - | -| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse | -| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi | -| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | q_proj,v_proj | yi_vl | -| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan | +| Model | Model size | Template | +| -------------------------------------------------------- | -------------------------------- | --------- | +| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 | +| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | +| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | +| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | +| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | +| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | +| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | +| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | +| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | +| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | +| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | +| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | +| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | llama3 | +| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna | +| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | +| [OLMo](https://huggingface.co/allenai) | 1B/7B | - | +| [PaliGemma](https://huggingface.co/google) | 3B | gemma | +| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | +| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | +| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen | +| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen | +| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - | +| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | +| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi | +| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | +| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan | > [!NOTE] > **Default module** is used for the `lora_target` argument, you can use `lora_target: all` to specify all the available modules for better convergence. diff --git a/README_zh.md b/README_zh.md index 09a7f330..982c0123 100644 --- a/README_zh.md +++ b/README_zh.md @@ -149,41 +149,39 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd ## 模型 -| 模型名 | 模型大小 | 默认模块 | Template | -| -------------------------------------------------------- | -------------------------------- | ----------------- | --------- | -| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | W_pack | baichuan2 | -| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - | -| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - | -| [ChatGLM3](https://huggingface.co/THUDM) | 6B | query_key_value | chatglm3 | -| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | q_proj,v_proj | cohere | -| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | q_proj,v_proj | deepseek | -| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | query_key_value | falcon | -| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | q_proj,v_proj | gemma | -| [GLM4](https://huggingface.co/THUDM) | 9B | query_key_value | glm4 | -| [InternLM2](https://huggingface.co/internlm) | 7B/20B | wqkv | intern2 | -| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - | -| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 | -| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | q_proj,v_proj | llama3 | -| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | q_proj,v_proj | vicuna | -| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | q_proj,v_proj | mistral | -| [OLMo](https://huggingface.co/allenai) | 1B/7B | q_proj,v_proj | - | -| [PaliGemma](https://huggingface.co/google) | 3B | q_proj,v_proj | gemma | -| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | q_proj,v_proj | - | -| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | qkv_proj | phi | -| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | c_attn | qwen | -| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | q_proj,v_proj | qwen | -| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - | -| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse | -| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi | -| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | q_proj,v_proj | yi_vl | -| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan | +| 模型名 | 模型大小 | Template | +| -------------------------------------------------------- | -------------------------------- | --------- | +| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 | +| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | +| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | +| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | +| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | +| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | +| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | +| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | +| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | +| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | +| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | +| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | +| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | llama3 | +| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna | +| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | +| [OLMo](https://huggingface.co/allenai) | 1B/7B | - | +| [PaliGemma](https://huggingface.co/google) | 3B | gemma | +| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | +| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | +| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen | +| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen | +| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - | +| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | +| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi | +| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | +| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan | > [!NOTE] -> **默认模块**应作为 `lora_target` 参数的默认值,可使用 `lora_target: all` 参数指定全部模块以取得更好的效果。 -> > 对于所有“基座”(Base)模型,`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”(Instruct/Chat)模型请务必使用**对应的模板**。 > -> 请务必在训练和推理时使用**完全一致**的模板。 +> 请务必在训练和推理时采用**完全一致**的模板。 项目所支持模型的完整列表请参阅 [constants.py](src/llamafactory/extras/constants.py)。 diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml index 348459b8..084269ef 100644 --- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml +++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml @@ -6,7 +6,7 @@ quantization_bit: 4 stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### ddp ddp_timeout: 180000000 diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml index 960f613e..1ba654ec 100644 --- a/examples/extras/loraplus/llama3_lora_sft.yaml +++ b/examples/extras/loraplus/llama3_lora_sft.yaml @@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all loraplus_lr_ratio: 16.0 ### dataset diff --git a/examples/lora_multi_gpu/llama3_lora_sft.yaml b/examples/lora_multi_gpu/llama3_lora_sft.yaml index 9be3c780..348e53b9 100644 --- a/examples/lora_multi_gpu/llama3_lora_sft.yaml +++ b/examples/lora_multi_gpu/llama3_lora_sft.yaml @@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### ddp ddp_timeout: 180000000 diff --git a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml index 41152243..1c432fa7 100644 --- a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml +++ b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml @@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### ddp ddp_timeout: 180000000 diff --git a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml b/examples/lora_multi_npu/llama3_lora_sft_ds.yaml index 1ed24d04..a0ec8aa1 100644 --- a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml +++ b/examples/lora_multi_npu/llama3_lora_sft_ds.yaml @@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### ddp ddp_timeout: 180000000 diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/lora_single_gpu/llama3_lora_dpo.yaml index 158c9e04..78344330 100644 --- a/examples/lora_single_gpu/llama3_lora_dpo.yaml +++ b/examples/lora_single_gpu/llama3_lora_dpo.yaml @@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: dpo do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all pref_beta: 0.1 pref_loss: sigmoid # [sigmoid (dpo), orpo, simpo] diff --git a/examples/lora_single_gpu/llama3_lora_kto.yaml b/examples/lora_single_gpu/llama3_lora_kto.yaml index ead221e9..d5234c0a 100644 --- a/examples/lora_single_gpu/llama3_lora_kto.yaml +++ b/examples/lora_single_gpu/llama3_lora_kto.yaml @@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: kto do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### dataset dataset: kto_en_demo diff --git a/examples/lora_single_gpu/llama3_lora_ppo.yaml b/examples/lora_single_gpu/llama3_lora_ppo.yaml index 19e7ccb3..98c842f9 100644 --- a/examples/lora_single_gpu/llama3_lora_ppo.yaml +++ b/examples/lora_single_gpu/llama3_lora_ppo.yaml @@ -6,7 +6,7 @@ reward_model: saves/llama3-8b/lora/reward stage: ppo do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### dataset dataset: identity,alpaca_en_demo diff --git a/examples/lora_single_gpu/llama3_lora_pretrain.yaml b/examples/lora_single_gpu/llama3_lora_pretrain.yaml index 9167a893..db435ca9 100644 --- a/examples/lora_single_gpu/llama3_lora_pretrain.yaml +++ b/examples/lora_single_gpu/llama3_lora_pretrain.yaml @@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: pt do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### dataset dataset: c4_demo diff --git a/examples/lora_single_gpu/llama3_lora_reward.yaml b/examples/lora_single_gpu/llama3_lora_reward.yaml index 91663057..1ce42ea4 100644 --- a/examples/lora_single_gpu/llama3_lora_reward.yaml +++ b/examples/lora_single_gpu/llama3_lora_reward.yaml @@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: rm do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### dataset dataset: dpo_en_demo diff --git a/examples/lora_single_gpu/llama3_lora_sft.yaml b/examples/lora_single_gpu/llama3_lora_sft.yaml index cc93d05a..651b636f 100644 --- a/examples/lora_single_gpu/llama3_lora_sft.yaml +++ b/examples/lora_single_gpu/llama3_lora_sft.yaml @@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### dataset dataset: identity,alpaca_en_demo diff --git a/examples/lora_single_gpu/llama3_preprocess.yaml b/examples/lora_single_gpu/llama3_preprocess.yaml index 86dad37b..34bb9efc 100644 --- a/examples/lora_single_gpu/llama3_preprocess.yaml +++ b/examples/lora_single_gpu/llama3_preprocess.yaml @@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### dataset dataset: identity,alpaca_en_demo diff --git a/examples/lora_single_gpu/llava1_5_lora_sft.yaml b/examples/lora_single_gpu/llava1_5_lora_sft.yaml index 95c1d40d..df510a93 100644 --- a/examples/lora_single_gpu/llava1_5_lora_sft.yaml +++ b/examples/lora_single_gpu/llava1_5_lora_sft.yaml @@ -6,7 +6,7 @@ visual_inputs: true stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### dataset dataset: mllm_demo diff --git a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml index 23301de5..d54d6af6 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml @@ -5,7 +5,7 @@ model_name_or_path: ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16 stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### dataset dataset: identity,alpaca_en_demo diff --git a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml index 40a290a3..5cef178a 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml @@ -5,7 +5,7 @@ model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-AWQ stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### dataset dataset: identity,alpaca_en_demo diff --git a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml index 6652d8cf..b308dcab 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml @@ -6,7 +6,7 @@ quantization_bit: 4 stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### dataset dataset: identity,alpaca_en_demo diff --git a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml index 323ea7c6..b950042e 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml @@ -5,7 +5,7 @@ model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ stage: sft do_train: true finetuning_type: lora -lora_target: q_proj,v_proj +lora_target: all ### dataset dataset: identity,alpaca_en_demo diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 687e16cc..4d9cb26d 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -20,8 +20,6 @@ CHOICES = ["A", "B", "C", "D"] DATA_CONFIG = "dataset_info.json" -DEFAULT_MODULE = defaultdict(str) - DEFAULT_TEMPLATE = defaultdict(str) FILEEXT2TYPE = { @@ -80,7 +78,6 @@ class DownloadSource(str, Enum): def register_model_group( models: Dict[str, Dict[DownloadSource, str]], - module: Optional[str] = None, template: Optional[str] = None, vision: bool = False, ) -> None: @@ -91,8 +88,6 @@ def register_model_group( else: assert prefix == name.split("-")[0], "prefix should be identical." SUPPORTED_MODELS[name] = path - if module is not None: - DEFAULT_MODULE[prefix] = module if template is not None: DEFAULT_TEMPLATE[prefix] = template if vision: @@ -127,7 +122,6 @@ register_model_group( DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan-13B-Chat", }, }, - module="W_pack", template="baichuan", ) @@ -151,7 +145,6 @@ register_model_group( DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Chat", }, }, - module="W_pack", template="baichuan2", ) @@ -171,7 +164,6 @@ register_model_group( DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-7b1", }, }, - module="query_key_value", ) @@ -190,7 +182,6 @@ register_model_group( DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-7b1-mt", }, }, - module="query_key_value", ) @@ -229,7 +220,6 @@ register_model_group( DownloadSource.MODELSCOPE: "ZhipuAI/chatglm2-6b", } }, - module="query_key_value", template="chatglm2", ) @@ -245,7 +235,6 @@ register_model_group( DownloadSource.MODELSCOPE: "ZhipuAI/chatglm3-6b", }, }, - module="query_key_value", template="chatglm3", ) @@ -344,7 +333,6 @@ register_model_group( DownloadSource.MODELSCOPE: "AI-ModelScope/dbrx-instruct", }, }, - module="Wqkv", template="dbrx", ) @@ -463,7 +451,6 @@ register_model_group( DownloadSource.MODELSCOPE: "modelscope/falcon-180B-chat", }, }, - module="query_key_value", template="falcon", ) @@ -512,7 +499,6 @@ register_model_group( DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat-1m", }, }, - module="query_key_value", template="glm4", ) @@ -559,7 +545,6 @@ register_model_group( DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2-chat-20b", }, }, - module="wqkv", template="intern2", ) @@ -581,7 +566,6 @@ register_model_group( DownloadSource.MODELSCOPE: "DeepLang/LingoWhale-8B", } }, - module="qkv_proj", ) @@ -868,7 +852,6 @@ register_model_group( DownloadSource.MODELSCOPE: "LLM-Research/Phi-3-medium-128k-instruct", }, }, - module="qkv_proj", template="phi", ) @@ -940,7 +923,6 @@ register_model_group( DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat-Int4", }, }, - module="c_attn", template="qwen", ) @@ -1153,7 +1135,6 @@ register_model_group( DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B-v2", }, }, - module="query,key_value", template="telechat", ) diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index b9322f18..08af31e4 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -24,12 +24,7 @@ class FreezeArguments: "help": ( "Name(s) of trainable modules for freeze (partial-parameter) fine-tuning. " "Use commas to separate multiple modules. " - "Use `all` to specify all the available modules. " - "LLaMA choices: [`mlp`, `self_attn`], " - "BLOOM & Falcon & ChatGLM choices: [`mlp`, `self_attention`], " - "Qwen choices: [`mlp`, `attn`], " - "InternLM2 choices: [`feed_forward`, `attention`], " - "Others choices: the same as LLaMA." + "Use `all` to specify all the available modules." ) }, ) @@ -79,13 +74,7 @@ class LoraArguments: "help": ( "Name(s) of target modules to apply LoRA. " "Use commas to separate multiple modules. " - "Use `all` to specify all the linear modules. " - "LLaMA choices: [`q_proj`, `k_proj`, `v_proj`, `o_proj`, `gate_proj`, `up_proj`, `down_proj`], " - "BLOOM & Falcon & ChatGLM choices: [`query_key_value`, `dense`, `dense_h_to_4h`, `dense_4h_to_h`], " - "Baichuan choices: [`W_pack`, `o_proj`, `gate_proj`, `up_proj`, `down_proj`], " - "Qwen choices: [`c_attn`, `attn.c_proj`, `w1`, `w2`, `mlp.c_proj`], " - "InternLM2 choices: [`wqkv`, `wo`, `w1`, `w2`, `w3`], " - "Others choices: the same as LLaMA." + "Use `all` to specify all the linear modules." ) }, ) diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index 62004bce..304b56a5 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -8,7 +8,6 @@ from yaml import safe_dump, safe_load from ..extras.constants import ( CHECKPOINT_NAMES, DATA_CONFIG, - DEFAULT_MODULE, DEFAULT_TEMPLATE, PEFT_METHODS, STAGES_USE_PAIR_DATA, @@ -118,13 +117,6 @@ def get_model_info(model_name: str) -> Tuple[str, str, bool]: return get_model_path(model_name), get_template(model_name), get_visual(model_name) -def get_module(model_name: str) -> str: - r""" - Gets the LoRA modules of this model. - """ - return DEFAULT_MODULE.get(get_prefix(model_name), "all") - - def get_template(model_name: str) -> str: r""" Gets the template name if the model is a chat model. diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 4ec1531a..e8fdd129 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -8,7 +8,7 @@ from transformers.trainer import TRAINING_ARGS_NAME from ..extras.constants import PEFT_METHODS, TRAINING_STAGES from ..extras.misc import is_gpu_or_npu_available, torch_gc from ..extras.packages import is_gradio_available -from .common import DEFAULT_CACHE_DIR, get_module, get_save_dir, load_config +from .common import DEFAULT_CACHE_DIR, get_save_dir, load_config from .locales import ALERTS from .utils import abort_leaf_process, gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd @@ -159,7 +159,7 @@ class Runner: args["create_new_adapter"] = get("train.create_new_adapter") args["use_rslora"] = get("train.use_rslora") args["use_dora"] = get("train.use_dora") - args["lora_target"] = get("train.lora_target") or get_module(model_name) + args["lora_target"] = get("train.lora_target") or "all" args["additional_target"] = get("train.additional_target") or None if args["use_llama_pro"]: From ed8dc92e3983f3ddd1e1b71ed3848e76bc102400 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 16:25:42 +0800 Subject: [PATCH 080/162] update readme Former-commit-id: fb1f709af5199976e63d7188e088e33c75d19bfe --- README.md | 20 +++++++++----------- README_zh.md | 18 +++++++++--------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index f3ced20e..1f9ff5a2 100644 --- a/README.md +++ b/README.md @@ -179,8 +179,6 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan | > [!NOTE] -> **Default module** is used for the `lora_target` argument, you can use `lora_target: all` to specify all the available modules for better convergence. -> > For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models. > > Remember to use the **SAME** template in training and inference. @@ -291,21 +289,21 @@ huggingface-cli login | Mandatory | Minimum | Recommend | | ------------ | ------- | --------- | -| python | 3.8 | 3.10 | -| torch | 1.13.1 | 2.2.0 | -| transformers | 4.37.2 | 4.41.0 | -| datasets | 2.14.3 | 2.19.1 | -| accelerate | 0.27.2 | 0.30.1 | -| peft | 0.9.0 | 0.11.1 | -| trl | 0.8.2 | 0.8.6 | +| python | 3.8 | 3.11 | +| torch | 1.13.1 | 2.3.0 | +| transformers | 4.41.2 | 4.41.2 | +| datasets | 2.16.0 | 2.19.2 | +| accelerate | 0.30.1 | 0.30.1 | +| peft | 0.11.1 | 0.11.1 | +| trl | 0.9.3 | 0.9.3 | | Optional | Minimum | Recommend | | ------------ | ------- | --------- | | CUDA | 11.6 | 12.2 | | deepspeed | 0.10.0 | 0.14.0 | | bitsandbytes | 0.39.0 | 0.43.1 | -| vllm | 0.4.0 | 0.4.2 | -| flash-attn | 2.3.0 | 2.5.8 | +| vllm | 0.4.3 | 0.4.3 | +| flash-attn | 2.3.0 | 2.5.9 | ### Hardware Requirement diff --git a/README_zh.md b/README_zh.md index 982c0123..8422e667 100644 --- a/README_zh.md +++ b/README_zh.md @@ -289,21 +289,21 @@ huggingface-cli login | 必需项 | 至少 | 推荐 | | ------------ | ------- | --------- | -| python | 3.8 | 3.10 | -| torch | 1.13.1 | 2.2.0 | -| transformers | 4.37.2 | 4.41.0 | -| datasets | 2.14.3 | 2.19.1 | -| accelerate | 0.27.2 | 0.30.1 | -| peft | 0.9.0 | 0.11.1 | -| trl | 0.8.2 | 0.8.6 | +| python | 3.8 | 3.11 | +| torch | 1.13.1 | 2.3.0 | +| transformers | 4.41.2 | 4.41.2 | +| datasets | 2.16.0 | 2.19.2 | +| accelerate | 0.30.1 | 0.30.1 | +| peft | 0.11.1 | 0.11.1 | +| trl | 0.9.3 | 0.9.3 | | 可选项 | 至少 | 推荐 | | ------------ | ------- | --------- | | CUDA | 11.6 | 12.2 | | deepspeed | 0.10.0 | 0.14.0 | | bitsandbytes | 0.39.0 | 0.43.1 | -| vllm | 0.4.0 | 0.4.2 | -| flash-attn | 2.3.0 | 2.5.8 | +| vllm | 0.4.3 | 0.4.3 | +| flash-attn | 2.3.0 | 2.5.9 | ### 硬件依赖 From a4e1fcc881127f717440a2d7165c7339813e3c82 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 16:59:18 +0800 Subject: [PATCH 081/162] update readme Former-commit-id: cc331fa2d28afe081937c50ea83d63add21d4e3a --- README.md | 4 ++-- README_zh.md | 4 ++-- setup.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1f9ff5a2..77684757 100644 --- a/README.md +++ b/README.md @@ -329,7 +329,7 @@ huggingface-cli login ```bash git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git cd LLaMA-Factory -pip install -e .[torch,metrics] +pip install -e '.[torch,metrics]' ``` Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality @@ -353,7 +353,7 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec Join [NPU user group](assets/wechat_npu.jpg). -To install LLaMA Factory on Ascend NPU devices, please specify extra dependencies: `pip install -e .[torch_npu,metrics]`. Additionally, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. Please follow the [installation tutorial](https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/softwareinstall/instg/atlasdeploy_03_0031.html) or use the following commands: +To install LLaMA Factory on Ascend NPU devices, please specify extra dependencies: `pip install -e '.[torch-npu,metrics]'`. Additionally, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. Please follow the [installation tutorial](https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/softwareinstall/instg/atlasdeploy_03_0031.html) or use the following commands: ```bash # replace the url according to your CANN version and devices diff --git a/README_zh.md b/README_zh.md index 8422e667..da5ff079 100644 --- a/README_zh.md +++ b/README_zh.md @@ -329,7 +329,7 @@ huggingface-cli login ```bash git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git cd LLaMA-Factory -pip install -e .[torch,metrics] +pip install -e '.[torch,metrics]' ``` 可选的额外依赖项:torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality @@ -353,7 +353,7 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl 加入 [NPU 用户群](assets/wechat_npu.jpg)。 -在昇腾 NPU 设备上安装 LLaMA Factory 时,需要指定额外依赖项,使用 `pip install -e .[torch_npu,metrics]` 命令安装。此外,还需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**,安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令: +在昇腾 NPU 设备上安装 LLaMA Factory 时,需要指定额外依赖项,使用 `pip install -e '.[torch-npu,metrics]'` 命令安装。此外,还需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**,安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令: ```bash # 请替换 URL 为 CANN 版本和设备型号对应的 URL diff --git a/setup.py b/setup.py index 7a5b9304..c32be8af 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_requires(): extra_require = { "torch": ["torch>=1.13.1"], - "torch_npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"], + "torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"], "metrics": ["nltk", "jieba", "rouge-chinese"], "deepspeed": ["deepspeed>=0.10.0,<=0.14.0"], "bitsandbytes": ["bitsandbytes>=0.39.0"], From e963a470c1a4c67ab59a3b118770f1f6b01f2a65 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 17:29:19 +0800 Subject: [PATCH 082/162] fix base64 image read #4061 Former-commit-id: 66ccb2a27a04296b4600f2c85f428071bf14eeb0 --- src/llamafactory/api/chat.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py index 712b6940..50892a54 100644 --- a/src/llamafactory/api/chat.py +++ b/src/llamafactory/api/chat.py @@ -1,3 +1,5 @@ +import base64 +import io import json import os import uuid @@ -83,9 +85,12 @@ def _process_request( input_messages.append({"role": ROLE_MAPPING[message.role], "content": input_item.text}) else: image_url = input_item.image_url.url - if os.path.isfile(image_url): + if image_url.startswith("data:image"): # base64 image + image_data = base64.b64decode(image_url.split(",", maxsplit=1)[1]) + image_path = io.BytesIO(image_data) + elif os.path.isfile(image_url): # local file image_path = open(image_url, "rb") - else: + else: # web uri image_path = requests.get(image_url, stream=True).raw image = Image.open(image_path).convert("RGB") From 67246f52f2f9848003e7154b286455d05bed6cdc Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 18:45:49 +0800 Subject: [PATCH 083/162] update trainers Former-commit-id: b7f6c4a171293cf4f3e88f15a811f847342f84ee --- src/llamafactory/api/chat.py | 7 +++++-- src/llamafactory/train/dpo/trainer.py | 8 +------- src/llamafactory/train/kto/trainer.py | 11 ++--------- src/llamafactory/train/ppo/utils.py | 7 ++++--- 4 files changed, 12 insertions(+), 21 deletions(-) diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py index 50892a54..98957bc1 100644 --- a/src/llamafactory/api/chat.py +++ b/src/llamafactory/api/chat.py @@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, AsyncGenerator, Dict, List, Optional, Tuple from ..data import Role as DataRole from ..extras.logging import get_logger -from ..extras.packages import is_fastapi_available, is_pillow_available +from ..extras.packages import is_fastapi_available, is_pillow_available, is_requests_available from .common import dictify, jsonify from .protocol import ( ChatCompletionMessage, @@ -29,10 +29,13 @@ if is_fastapi_available(): if is_pillow_available(): - import requests from PIL import Image +if is_requests_available(): + import requests + + if TYPE_CHECKING: from numpy.typing import NDArray diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index 2bbe6a06..6f1da34e 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -187,13 +187,7 @@ class CustomDPOTrainer(DPOTrainer): ref_context = nullcontext() with torch.no_grad(), ref_context: - ( - reference_chosen_logps, - reference_rejected_logps, - _, - _, - _, - ) = self.concatenated_forward(ref_model, batch) + reference_chosen_logps, reference_rejected_logps, *_ = self.concatenated_forward(ref_model, batch) return reference_chosen_logps, reference_rejected_logps diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index f29945f5..03cad5a7 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -146,15 +146,8 @@ class CustomKTOTrainer(KTOTrainer): if len(target_logps) != len(batch["kto_tags"]): raise ValueError("Mismatched shape of inputs and labels.") - chosen_idx = [i for i in range(len(target_logps)) if batch["kto_tags"][i]] - rejected_idx = [i for i in range(len(target_logps)) if not batch["kto_tags"][i]] - - chosen_logps = target_logps[chosen_idx, ...] - rejected_logps = target_logps[rejected_idx, ...] - - chosen_logits = target_logits[chosen_idx, ...] - rejected_logits = target_logits[rejected_idx, ...] - + chosen_logps, rejected_logps = target_logps[batch["kto_tags"]], target_logps[~batch["kto_tags"]] + chosen_logits, rejected_logits = target_logits[batch["kto_tags"]], target_logits[~batch["kto_tags"]] return chosen_logps, rejected_logps, chosen_logits, rejected_logits, kl_logps def compute_reference_log_probs( diff --git a/src/llamafactory/train/ppo/utils.py b/src/llamafactory/train/ppo/utils.py index e6bdb89c..e5025581 100644 --- a/src/llamafactory/train/ppo/utils.py +++ b/src/llamafactory/train/ppo/utils.py @@ -8,13 +8,14 @@ from transformers.integrations import is_deepspeed_zero3_enabled from ...extras.packages import is_requests_available +if is_requests_available(): + import requests + + if TYPE_CHECKING: from transformers import PreTrainedModel from trl import AutoModelForCausalLMWithValueHead -if is_requests_available(): - import requests - def get_rewards_from_server(server_url: str, messages: List[str]) -> List[torch.Tensor]: headers = {"Content-Type": "application/json"} From 56a6db6d8446aadc143dccbd42c73d234e11bfb2 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 19:03:20 +0800 Subject: [PATCH 084/162] fix ppo dataset bug #4012 Former-commit-id: 7fc51b2e93698ae5e012566af8481f4d861c873d --- src/llamafactory/data/loader.py | 2 +- src/llamafactory/data/preprocess.py | 2 +- src/llamafactory/data/processors/pretrain.py | 2 +- src/llamafactory/train/ppo/workflow.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 7d013d27..859f9a93 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -130,7 +130,7 @@ def get_dataset( model_args: "ModelArguments", data_args: "DataArguments", training_args: "Seq2SeqTrainingArguments", - stage: Literal["pt", "sft", "rm", "kto"], + stage: Literal["pt", "sft", "rm", "ppo", "kto"], tokenizer: "PreTrainedTokenizer", processor: Optional["ProcessorMixin"] = None, ) -> Union["Dataset", "IterableDataset"]: diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py index 336257ca..97789c39 100644 --- a/src/llamafactory/data/preprocess.py +++ b/src/llamafactory/data/preprocess.py @@ -23,7 +23,7 @@ if TYPE_CHECKING: def get_preprocess_and_print_func( data_args: "DataArguments", training_args: "Seq2SeqTrainingArguments", - stage: Literal["pt", "sft", "rm", "kto"], + stage: Literal["pt", "sft", "rm", "ppo", "kto"], template: "Template", tokenizer: "PreTrainedTokenizer", processor: Optional["ProcessorMixin"], diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index 3de0d1ac..87727b55 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -18,7 +18,7 @@ def preprocess_pretrain_dataset( if data_args.template == "gemma": text_examples = [tokenizer.bos_token + example for example in text_examples] - result = tokenizer(text_examples, add_special_tokens=False, max_length=data_args.cutoff_len) + result = tokenizer(text_examples, add_special_tokens=False, max_length=data_args.cutoff_len, truncation=True) else: tokenized_examples = tokenizer(text_examples, add_special_tokens=False) concatenated_examples = {k: list(chain(*tokenized_examples[k])) for k in tokenized_examples.keys()} diff --git a/src/llamafactory/train/ppo/workflow.py b/src/llamafactory/train/ppo/workflow.py index c4e05e57..4383bcdc 100644 --- a/src/llamafactory/train/ppo/workflow.py +++ b/src/llamafactory/train/ppo/workflow.py @@ -29,7 +29,7 @@ def run_ppo( ): tokenizer_module = load_tokenizer(model_args) tokenizer = tokenizer_module["tokenizer"] - dataset = get_dataset(model_args, data_args, training_args, stage="pt", **tokenizer_module) + dataset = get_dataset(model_args, data_args, training_args, stage="ppo", **tokenizer_module) model = load_model(tokenizer, model_args, finetuning_args, training_args.do_train, add_valuehead=True) tokenizer.padding_side = "left" # use left-padding in generation while using right-padding in training From d3a378ffea59d9f94e70a4e80d065f0aba1e0305 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 20:30:25 +0800 Subject: [PATCH 085/162] fix torch gc Former-commit-id: e173799d057598e5692a407601c30d8ce1513461 --- src/llamafactory/extras/misc.py | 11 ++++++++--- src/llamafactory/model/utils/embedding.py | 2 +- src/llamafactory/model/utils/valuehead.py | 9 +++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 78f71847..48476f9c 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -212,12 +212,17 @@ def has_tokenized_data(path: os.PathLike) -> bool: def torch_gc() -> None: r""" - Collects GPU memory. + Collects GPU or NPU memory. """ gc.collect() - if torch.cuda.is_available(): + if is_torch_xpu_available(): + torch.xpu.empty_cache() + elif is_torch_npu_available(): + torch.npu.empty_cache() + elif is_torch_mps_available(): + torch.mps.empty_cache() + elif is_torch_cuda_available(): torch.cuda.empty_cache() - torch.cuda.ipc_collect() def try_download_model_from_ms(model_args: "ModelArguments") -> str: diff --git a/src/llamafactory/model/utils/embedding.py b/src/llamafactory/model/utils/embedding.py index 357c9cc0..3d9278e3 100644 --- a/src/llamafactory/model/utils/embedding.py +++ b/src/llamafactory/model/utils/embedding.py @@ -15,7 +15,7 @@ if TYPE_CHECKING: logger = get_logger(__name__) -def _noisy_mean_initialization(embed_weight: torch.Tensor, num_new_tokens: int) -> None: +def _noisy_mean_initialization(embed_weight: "torch.Tensor", num_new_tokens: int) -> None: embedding_dim = embed_weight.size(1) avg_weight = embed_weight[:-num_new_tokens].mean(dim=0, keepdim=True) noise_weight = torch.empty_like(embed_weight[-num_new_tokens:]) diff --git a/src/llamafactory/model/utils/valuehead.py b/src/llamafactory/model/utils/valuehead.py index d813729e..64333688 100644 --- a/src/llamafactory/model/utils/valuehead.py +++ b/src/llamafactory/model/utils/valuehead.py @@ -23,6 +23,7 @@ def load_valuehead_params(path_or_repo_id: str, model_args: "ModelArguments") -> Returns: dict with keys `v_head.summary.weight` and `v_head.summary.bias`. """ kwargs = {"path_or_repo_id": path_or_repo_id, "cache_dir": model_args.cache_dir, "token": model_args.hf_hub_token} + err_text = "" try: from safetensors import safe_open @@ -31,16 +32,16 @@ def load_valuehead_params(path_or_repo_id: str, model_args: "ModelArguments") -> with safe_open(vhead_file, framework="pt", device="cpu") as f: return {key: f.get_tensor(key) for key in f.keys()} except Exception as err: - logger.info("Failed to load {}: {}".format(V_HEAD_SAFE_WEIGHTS_NAME, str(err))) + err_text = str(err) try: vhead_file = cached_file(filename=V_HEAD_WEIGHTS_NAME, **kwargs) return torch.load(vhead_file, map_location="cpu") except Exception as err: - logger.info("Failed to load {}: {}".format(V_HEAD_WEIGHTS_NAME, str(err))) + err_text = str(err) - logger.info("Provided path ({}) does not contain value head weights.".format(path_or_repo_id)) - logger.info("Ignore these messages if you are not resuming the training of a value head model.") + logger.info("Provided path ({}) does not contain value head weights: {}.".format(path_or_repo_id, err_text)) + logger.info("Ignore the above message if you are not resuming the training of a value head model.") return None From ee2c3601677f0360c934fc263406810ffaf9037e Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 23:30:07 +0800 Subject: [PATCH 086/162] fix ppo+zero3 #3108 Former-commit-id: 33a93cc29e3e57bf001515000c0a70c112573dea --- src/llamafactory/train/ppo/trainer.py | 91 ++++++++++++++------------- src/llamafactory/train/ppo/utils.py | 36 ++++++----- 2 files changed, 66 insertions(+), 61 deletions(-) diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 27353c72..b0c7e25d 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -2,9 +2,10 @@ import math import os import sys from types import MethodType -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple import torch +from accelerate.utils import DistributedDataParallelKwargs from tqdm import tqdm from transformers import GenerationConfig, Trainer, TrainerControl, TrainerState from transformers.optimization import get_scheduler @@ -79,6 +80,13 @@ class CustomPPOTrainer(PPOTrainer, Trainer): project_kwargs={"logging_dir": training_args.logging_dir}, ) + # Add deepspeed config + ppo_config.accelerator_kwargs["kwargs_handlers"] = [ + DistributedDataParallelKwargs(find_unused_parameters=training_args.ddp_find_unused_parameters) + ] + if training_args.deepspeed_plugin is not None: + ppo_config.accelerator_kwargs["deepspeed_plugin"] = training_args.deepspeed_plugin + # Create optimizer and scheduler if training_args.max_steps > 0: num_training_steps = training_args.max_steps @@ -124,6 +132,12 @@ class CustomPPOTrainer(PPOTrainer, Trainer): if self.args.max_steps > 0: logger.info("max_steps is given, it will override any value given in num_train_epochs") + unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model) + self.is_chatglm_model = getattr(unwrapped_model.config, "model_type", None) == "chatglm" + + device_type = unwrapped_model.pretrained_model.device.type + self.amp_context = torch.autocast(device_type, dtype=model_args.compute_dtype) + if finetuning_args.reward_model_type == "full": if self.is_deepspeed_enabled: if not ( @@ -184,7 +198,6 @@ class CustomPPOTrainer(PPOTrainer, Trainer): logger.info(" Total training steps = {}".format(max_steps)) logger.info(" Number of trainable parameters = {}".format(count_parameters(self.model)[0])) - unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model) dataiter = iter(self.dataloader) loss_meter = AverageMeter() reward_meter = AverageMeter() @@ -197,29 +210,21 @@ class CustomPPOTrainer(PPOTrainer, Trainer): dataiter = iter(self.dataloader) batch = next(dataiter) - # Cast to inference mode - unwrapped_model.gradient_checkpointing_disable() - unwrapped_model.config.use_cache = True - self.model.eval() - # Get inputs + self.model.eval() self.tokenizer.padding_side = "right" # change padding side queries, responses, rewards = [], [], [] for idx in range(0, self.config.batch_size, self.config.mini_batch_size): mini_batch_queries, mini_batch_responses = self.get_inputs( batch[idx : idx + self.config.mini_batch_size] ) - mini_batch_rewards = self.get_rewards(mini_batch_queries, mini_batch_responses, unwrapped_model) + mini_batch_rewards = self.get_rewards(mini_batch_queries, mini_batch_responses) queries.extend(mini_batch_queries) responses.extend(mini_batch_responses) rewards.extend(mini_batch_rewards) - # Cast to training mode - unwrapped_model.gradient_checkpointing_enable() - unwrapped_model.config.use_cache = False - self.model.train() - # Run PPO step + self.model.train() stats = self.step(queries, responses, rewards) self.tokenizer.padding_side = "left" # restore padding side loss_meter.update(float(stats["ppo/loss/total"]), n=len(rewards)) @@ -311,25 +316,24 @@ class CustomPPOTrainer(PPOTrainer, Trainer): getattr(self.processor, "image_processor").save_pretrained(output_dir) @torch.no_grad() - def get_inputs(self, batch: Dict[str, torch.Tensor]) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: + def get_inputs(self, batch: Dict[str, "torch.Tensor"]) -> Tuple[List["torch.Tensor"], List["torch.Tensor"]]: r""" Generates model's responses given queries. """ - if self.model_args.upcast_layernorm: - layernorm_params = dump_layernorm(self.model) - if batch["input_ids"].size(0) == 1: # handle llama2 ppo with gradient accumulation > 1 start_index = (batch["input_ids"][0] != self.tokenizer.pad_token_id).nonzero()[0].item() for k, v in batch.items(): batch[k] = v[:, start_index:] with unwrap_model_for_generation(self.model, self.accelerator) as unwrapped_model: + if self.model_args.upcast_layernorm: + layernorm_params = dump_layernorm(unwrapped_model) + generate_output: torch.Tensor = unwrapped_model.generate( generation_config=self.generation_config, logits_processor=get_logits_processor(), **batch ) - - if self.model_args.upcast_layernorm: - restore_layernorm(self.model, layernorm_params) + if self.model_args.upcast_layernorm: + restore_layernorm(unwrapped_model, layernorm_params) query = batch["input_ids"].detach().cpu() response = generate_output[:, batch["input_ids"].size(-1) :].detach().cpu() @@ -351,10 +355,9 @@ class CustomPPOTrainer(PPOTrainer, Trainer): @torch.no_grad() def get_rewards( self, - queries: List[torch.Tensor], - responses: List[torch.Tensor], - unwrapped_model: "AutoModelForCausalLMWithValueHead", - ) -> List[torch.Tensor]: + queries: List["torch.Tensor"], + responses: List["torch.Tensor"], + ) -> List["torch.Tensor"]: r""" Computes scores using given reward model. @@ -365,18 +368,22 @@ class CustomPPOTrainer(PPOTrainer, Trainer): messages = self.tokenizer.batch_decode(token_ids, skip_special_tokens=True) return get_rewards_from_server(self.reward_model, messages) - if self.finetuning_args.reward_model_type == "lora": - replace_model(unwrapped_model, target="reward") - reward_model = self.model - else: - reward_model = self.reward_model - batch = self.prepare_model_inputs(queries, responses) - with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype): # support bf16 - _, _, values = reward_model(**batch, output_hidden_states=True, return_dict=True, use_cache=False) + with unwrap_model_for_generation(self.model, self.accelerator) as unwrapped_model: + if self.finetuning_args.reward_model_type == "lora": + replace_model(unwrapped_model, target="reward") + reward_model = self.model + else: + reward_model = self.reward_model - if getattr(unwrapped_model.config, "model_type", None) == "chatglm": # assume same architecture + with self.amp_context: # support bf16 + _, _, values = reward_model(**batch, output_hidden_states=True, return_dict=True, use_cache=False) + + if self.finetuning_args.reward_model_type == "lora": + replace_model(unwrapped_model, target="default") + + if self.is_chatglm_model: # assume same architecture values = torch.transpose(values, 0, 1) rewards = [] @@ -385,21 +392,18 @@ class CustomPPOTrainer(PPOTrainer, Trainer): end_index = end_indexes[-1].item() if len(end_indexes) else 0 rewards.append(values[i, end_index].float().detach().cpu()) # use fp32 type - if self.finetuning_args.reward_model_type == "lora": - replace_model(unwrapped_model, target="default") - return rewards @PPODecorators.empty_device_cache() def batched_forward_pass( self, model: "AutoModelForCausalLMWithValueHead", - queries: torch.Tensor, - responses: torch.Tensor, - model_inputs: dict, + queries: "torch.Tensor", + responses: "torch.Tensor", + model_inputs: Dict[str, Any], return_logits: bool = False, - response_masks: Optional[torch.Tensor] = None, - ): + response_masks: Optional["torch.Tensor"] = None, + ) -> Tuple["torch.Tensor", Optional["torch.Tensor"], "torch.Tensor", "torch.Tensor"]: r""" Calculates model outputs in multiple batches. @@ -421,11 +425,10 @@ class CustomPPOTrainer(PPOTrainer, Trainer): input_ids = input_kwargs["input_ids"] attention_mask = input_kwargs["attention_mask"] - with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype): # support bf16 + with self.amp_context: # support bf16 logits, _, values = model(**input_kwargs) - unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model) - if getattr(unwrapped_model.config, "model_type", None) == "chatglm": + if self.is_chatglm_model: values = torch.transpose(values, 0, 1) logprobs = logprobs_from_logits(logits[:, :-1, :], input_ids[:, 1:]) diff --git a/src/llamafactory/train/ppo/utils.py b/src/llamafactory/train/ppo/utils.py index e5025581..570409f2 100644 --- a/src/llamafactory/train/ppo/utils.py +++ b/src/llamafactory/train/ppo/utils.py @@ -1,9 +1,7 @@ import json -from contextlib import nullcontext from typing import TYPE_CHECKING, Dict, List, Literal, Optional import torch -from transformers.integrations import is_deepspeed_zero3_enabled from ...extras.packages import is_requests_available @@ -18,6 +16,9 @@ if TYPE_CHECKING: def get_rewards_from_server(server_url: str, messages: List[str]) -> List[torch.Tensor]: + r""" + Gets reward scores from the API server. + """ headers = {"Content-Type": "application/json"} payload = {"model": "model", "messages": messages} response = requests.post(server_url, json=payload, headers=headers) @@ -26,25 +27,23 @@ def get_rewards_from_server(server_url: str, messages: List[str]) -> List[torch. def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["default", "reward"]) -> None: - if is_deepspeed_zero3_enabled(): - import deepspeed # type: ignore + r""" + Replaces the default/reward modules in the model. The model is already unwrapped (and gathered). + """ + if target == "reward": # save default head temporarily + setattr(model, "default_head_weight", model.v_head.summary.weight.data.detach().clone()) + setattr(model, "default_head_bias", model.v_head.summary.bias.data.detach().clone()) - params = [model.v_head.summary.weight, model.v_head.summary.bias] - context_maybe_zero3 = deepspeed.zero.GatheredParameters(params, modifier_rank=0) - else: - context_maybe_zero3 = nullcontext() - - with context_maybe_zero3: - if target == "reward": # save default head temporarily - setattr(model, "default_head_weight", model.v_head.summary.weight.data.detach().clone()) - setattr(model, "default_head_bias", model.v_head.summary.bias.data.detach().clone()) - - model.pretrained_model.set_adapter(target) # set the LoRA adapter to be active - model.v_head.summary.weight.data = model.get_buffer("{}_head_weight".format(target)).detach().clone() - model.v_head.summary.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone() + model.pretrained_model.set_adapter(target) # set the LoRA adapter to be active + device = model.v_head.summary.weight.device + model.v_head.summary.weight.data = model.get_buffer("{}_head_weight".format(target)).detach().clone().to(device) + model.v_head.summary.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone().to(device) def dump_layernorm(model: "PreTrainedModel") -> Dict[str, torch.Tensor]: + r""" + Dumps the layernorm parameters in the model. The model is already unwrapped (and gathered). + """ layer_norm_params = {} for name, param in model.named_parameters(): if param.data.dtype == torch.float32: @@ -55,6 +54,9 @@ def dump_layernorm(model: "PreTrainedModel") -> Dict[str, torch.Tensor]: def restore_layernorm(model: "PreTrainedModel", layernorm_params: Optional[Dict[str, torch.Tensor]] = None) -> None: + r""" + Restores the layernorm parameters in the model. The model is already unwrapped (and gathered). + """ for name, param in model.named_parameters(): if name in layernorm_params: param.data = layernorm_params[name] From 969d0f7cddd4447152ebe4a17791707886bf4102 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 6 Jun 2024 23:38:09 +0800 Subject: [PATCH 087/162] Update cli.py Former-commit-id: 32190507534adf5f505858b3af2b592ca6568ac7 --- src/llamafactory/cli.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index 8a229a38..19a104bd 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -71,10 +71,6 @@ def main(): export_model() elif command == Command.TRAIN: if get_device_count() > 0: - # NOTE (MengqingCao): why use torchrun when only one accelerator is available? - # DeepSpeed only warp model with DeepSpeedEngine when launching by distributed launcher, - # e.g., torchrun, causing some feature missing - # sa: https://github.com/huggingface/transformers/issues/24309 master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999))) logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port)) From 3e274dfb19bfe773c94ad42a68b9ede43af22226 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 23:44:58 +0800 Subject: [PATCH 088/162] add DISABLE_TORCHRUN option Former-commit-id: bcc574b479c2101438723aadead42743d4378776 --- src/llamafactory/cli.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index 8ae3d6a8..092f4cf7 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -72,7 +72,12 @@ def main(): elif command == Command.EXPORT: export_model() elif command == Command.TRAIN: - if get_device_count() > 0: + disable_torchrun = os.environ.get("DISABLE_TORCHRUN", "0").lower() in ["true", "1"] + if disable_torchrun and get_device_count() > 1: + logger.warning("`torchrun` cannot be disabled when device count > 1.") + disable_torchrun = False + + if (not disable_torchrun) and (get_device_count() > 0): master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999))) logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port)) From 0b1f4a34f866874707c4689a1569db24125a3bf8 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 7 Jun 2024 00:09:06 +0800 Subject: [PATCH 089/162] rename files Former-commit-id: e1a8431770fc36c0c9ee7fed4abbc3d7fdcc5efd --- src/llamafactory/chat/vllm_engine.py | 2 +- src/llamafactory/data/__init__.py | 12 ++++++------ src/llamafactory/data/aligner.py | 2 +- .../data/{utils.py => data_utils.py} | 0 src/llamafactory/data/loader.py | 2 +- src/llamafactory/data/processors/feedback.py | 2 +- src/llamafactory/data/processors/pairwise.py | 2 +- .../{mm_utils.py => processor_utils.py} | 0 src/llamafactory/data/processors/supervised.py | 2 +- .../data/processors/unsupervised.py | 4 ++-- src/llamafactory/data/template.py | 10 +++++----- src/llamafactory/model/__init__.py | 6 +++--- src/llamafactory/model/adapter.py | 6 +++--- src/llamafactory/model/loader.py | 8 ++++---- .../model/{utils => model_utils}/__init__.py | 0 .../model/{utils => model_utils}/attention.py | 0 .../{utils => model_utils}/checkpointing.py | 0 .../model/{utils => model_utils}/embedding.py | 0 .../model/{utils => model_utils}/longlora.py | 0 .../model/{utils => model_utils}/misc.py | 0 .../model/{utils => model_utils}/mod.py | 0 .../model/{utils => model_utils}/moe.py | 0 .../{utils => model_utils}/quantization.py | 0 .../model/{utils => model_utils}/rope.py | 0 .../model/{utils => model_utils}/unsloth.py | 0 .../model/{utils => model_utils}/valuehead.py | 0 .../model/{utils => model_utils}/visual.py | 0 src/llamafactory/model/patcher.py | 18 +++++++++--------- src/llamafactory/train/dpo/trainer.py | 2 +- src/llamafactory/train/dpo/workflow.py | 2 +- src/llamafactory/train/kto/trainer.py | 2 +- src/llamafactory/train/kto/workflow.py | 2 +- .../train/ppo/{utils.py => ppo_utils.py} | 0 src/llamafactory/train/ppo/trainer.py | 4 ++-- src/llamafactory/train/ppo/workflow.py | 2 +- src/llamafactory/train/pt/trainer.py | 2 +- src/llamafactory/train/pt/workflow.py | 2 +- src/llamafactory/train/rm/trainer.py | 2 +- src/llamafactory/train/rm/workflow.py | 2 +- src/llamafactory/train/sft/trainer.py | 2 +- src/llamafactory/train/sft/workflow.py | 2 +- .../train/{utils.py => trainer_utils.py} | 0 src/llamafactory/webui/components/top.py | 4 ++-- 43 files changed, 53 insertions(+), 53 deletions(-) rename src/llamafactory/data/{utils.py => data_utils.py} (100%) rename src/llamafactory/data/processors/{mm_utils.py => processor_utils.py} (100%) rename src/llamafactory/model/{utils => model_utils}/__init__.py (100%) rename src/llamafactory/model/{utils => model_utils}/attention.py (100%) rename src/llamafactory/model/{utils => model_utils}/checkpointing.py (100%) rename src/llamafactory/model/{utils => model_utils}/embedding.py (100%) rename src/llamafactory/model/{utils => model_utils}/longlora.py (100%) rename src/llamafactory/model/{utils => model_utils}/misc.py (100%) rename src/llamafactory/model/{utils => model_utils}/mod.py (100%) rename src/llamafactory/model/{utils => model_utils}/moe.py (100%) rename src/llamafactory/model/{utils => model_utils}/quantization.py (100%) rename src/llamafactory/model/{utils => model_utils}/rope.py (100%) rename src/llamafactory/model/{utils => model_utils}/unsloth.py (100%) rename src/llamafactory/model/{utils => model_utils}/valuehead.py (100%) rename src/llamafactory/model/{utils => model_utils}/visual.py (100%) rename src/llamafactory/train/ppo/{utils.py => ppo_utils.py} (100%) rename src/llamafactory/train/{utils.py => trainer_utils.py} (100%) diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index e193704a..87ce8684 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -6,7 +6,7 @@ from ..extras.logging import get_logger from ..extras.misc import get_device_count from ..extras.packages import is_vllm_available from ..model import load_config, load_tokenizer -from ..model.utils.visual import LlavaMultiModalProjectorForYiVLForVLLM +from ..model.model_utils.visual import LlavaMultiModalProjectorForYiVLForVLLM from .base_engine import BaseEngine, Response diff --git a/src/llamafactory/data/__init__.py b/src/llamafactory/data/__init__.py index 44887d24..b08691d3 100644 --- a/src/llamafactory/data/__init__.py +++ b/src/llamafactory/data/__init__.py @@ -1,16 +1,16 @@ from .collator import KTODataCollatorWithPadding, PairwiseDataCollatorWithPadding +from .data_utils import Role, split_dataset from .loader import get_dataset -from .template import Template, get_template_and_fix_tokenizer, templates -from .utils import Role, split_dataset +from .template import TEMPLATES, Template, get_template_and_fix_tokenizer __all__ = [ "KTODataCollatorWithPadding", "PairwiseDataCollatorWithPadding", - "get_dataset", - "Template", - "get_template_and_fix_tokenizer", - "templates", "Role", "split_dataset", + "get_dataset", + "TEMPLATES", + "Template", + "get_template_and_fix_tokenizer", ] diff --git a/src/llamafactory/data/aligner.py b/src/llamafactory/data/aligner.py index 2a382c60..434956af 100644 --- a/src/llamafactory/data/aligner.py +++ b/src/llamafactory/data/aligner.py @@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Union from datasets import Features from ..extras.logging import get_logger -from .utils import Role +from .data_utils import Role if TYPE_CHECKING: diff --git a/src/llamafactory/data/utils.py b/src/llamafactory/data/data_utils.py similarity index 100% rename from src/llamafactory/data/utils.py rename to src/llamafactory/data/data_utils.py diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 859f9a93..2c236c76 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -10,10 +10,10 @@ from ..extras.constants import FILEEXT2TYPE from ..extras.logging import get_logger from ..extras.misc import has_tokenized_data from .aligner import align_dataset +from .data_utils import merge_dataset from .parser import get_dataset_list from .preprocess import get_preprocess_and_print_func from .template import get_template_and_fix_tokenizer -from .utils import merge_dataset if TYPE_CHECKING: diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py index 1aaff0ab..dc7d817c 100644 --- a/src/llamafactory/data/processors/feedback.py +++ b/src/llamafactory/data/processors/feedback.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from .mm_utils import get_paligemma_token_type_ids, get_pixel_values +from .processor_utils import get_paligemma_token_type_ids, get_pixel_values if TYPE_CHECKING: diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py index 69dab34a..8ad3979f 100644 --- a/src/llamafactory/data/processors/pairwise.py +++ b/src/llamafactory/data/processors/pairwise.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from .mm_utils import get_paligemma_token_type_ids, get_pixel_values +from .processor_utils import get_paligemma_token_type_ids, get_pixel_values if TYPE_CHECKING: diff --git a/src/llamafactory/data/processors/mm_utils.py b/src/llamafactory/data/processors/processor_utils.py similarity index 100% rename from src/llamafactory/data/processors/mm_utils.py rename to src/llamafactory/data/processors/processor_utils.py diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index b119aa22..d90a32ac 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from .mm_utils import get_paligemma_token_type_ids, get_pixel_values +from .processor_utils import get_paligemma_token_type_ids, get_pixel_values if TYPE_CHECKING: diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py index 6a9f9460..e00bde55 100644 --- a/src/llamafactory/data/processors/unsupervised.py +++ b/src/llamafactory/data/processors/unsupervised.py @@ -1,8 +1,8 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional from ...extras.logging import get_logger -from ..utils import Role -from .mm_utils import get_paligemma_token_type_ids, get_pixel_values +from ..data_utils import Role +from .processor_utils import get_paligemma_token_type_ids, get_pixel_values if TYPE_CHECKING: diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index fe0211c6..3dce5ec6 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -2,8 +2,8 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union from ..extras.logging import get_logger +from .data_utils import Role, infer_max_len from .formatter import EmptyFormatter, FunctionFormatter, StringFormatter, ToolFormatter -from .utils import Role, infer_max_len if TYPE_CHECKING: @@ -196,7 +196,7 @@ class Llama2Template(Template): return self._make_pairs(encoded_messages, cutoff_len, reserved_label_len) -templates: Dict[str, Template] = {} +TEMPLATES: Dict[str, Template] = {} def _register_template( @@ -248,7 +248,7 @@ def _register_template( default_function_formatter = FunctionFormatter(slots=["Action: {{name}}\nAction Input: {{arguments}}"] + eos_slots) default_tool_formatter = ToolFormatter(tool_format="default") default_separator_formatter = EmptyFormatter() - templates[name] = template_class( + TEMPLATES[name] = template_class( format_user=format_user or default_user_formatter, format_assistant=format_assistant or default_assistant_formatter, format_system=format_system or default_user_formatter, @@ -348,9 +348,9 @@ def get_template_and_fix_tokenizer( name: Optional[str] = None, ) -> Template: if name is None: - template = templates["empty"] # placeholder + template = TEMPLATES["empty"] # placeholder else: - template = templates.get(name, None) + template = TEMPLATES.get(name, None) if template is None: raise ValueError("Template {} does not exist.".format(name)) diff --git a/src/llamafactory/model/__init__.py b/src/llamafactory/model/__init__.py index 88f666c8..9d23d59f 100644 --- a/src/llamafactory/model/__init__.py +++ b/src/llamafactory/model/__init__.py @@ -1,12 +1,12 @@ from .loader import load_config, load_model, load_tokenizer -from .utils.misc import find_all_linear_modules -from .utils.valuehead import load_valuehead_params +from .model_utils.misc import find_all_linear_modules +from .model_utils.valuehead import load_valuehead_params __all__ = [ "load_config", "load_model", "load_tokenizer", - "load_valuehead_params", "find_all_linear_modules", + "load_valuehead_params", ] diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index a9204ef0..1a77d613 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -7,9 +7,9 @@ from transformers.integrations import is_deepspeed_zero3_enabled from transformers.modeling_utils import is_fsdp_enabled from ..extras.logging import get_logger -from .utils.misc import find_all_linear_modules, find_expanded_modules -from .utils.quantization import QuantizationMethod -from .utils.unsloth import get_unsloth_peft_model, load_unsloth_peft_model +from .model_utils.misc import find_all_linear_modules, find_expanded_modules +from .model_utils.quantization import QuantizationMethod +from .model_utils.unsloth import get_unsloth_peft_model, load_unsloth_peft_model if TYPE_CHECKING: diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index 8f3309b3..697a04e7 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -6,11 +6,11 @@ from trl import AutoModelForCausalLMWithValueHead from ..extras.logging import get_logger from ..extras.misc import count_parameters, try_download_model_from_ms from .adapter import init_adapter +from .model_utils.misc import register_autoclass +from .model_utils.mod import convert_pretrained_model_to_mod, load_mod_pretrained_model +from .model_utils.unsloth import load_unsloth_pretrained_model +from .model_utils.valuehead import load_valuehead_params from .patcher import patch_config, patch_model, patch_tokenizer, patch_valuehead_model -from .utils.misc import register_autoclass -from .utils.mod import convert_pretrained_model_to_mod, load_mod_pretrained_model -from .utils.unsloth import load_unsloth_pretrained_model -from .utils.valuehead import load_valuehead_params if TYPE_CHECKING: diff --git a/src/llamafactory/model/utils/__init__.py b/src/llamafactory/model/model_utils/__init__.py similarity index 100% rename from src/llamafactory/model/utils/__init__.py rename to src/llamafactory/model/model_utils/__init__.py diff --git a/src/llamafactory/model/utils/attention.py b/src/llamafactory/model/model_utils/attention.py similarity index 100% rename from src/llamafactory/model/utils/attention.py rename to src/llamafactory/model/model_utils/attention.py diff --git a/src/llamafactory/model/utils/checkpointing.py b/src/llamafactory/model/model_utils/checkpointing.py similarity index 100% rename from src/llamafactory/model/utils/checkpointing.py rename to src/llamafactory/model/model_utils/checkpointing.py diff --git a/src/llamafactory/model/utils/embedding.py b/src/llamafactory/model/model_utils/embedding.py similarity index 100% rename from src/llamafactory/model/utils/embedding.py rename to src/llamafactory/model/model_utils/embedding.py diff --git a/src/llamafactory/model/utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py similarity index 100% rename from src/llamafactory/model/utils/longlora.py rename to src/llamafactory/model/model_utils/longlora.py diff --git a/src/llamafactory/model/utils/misc.py b/src/llamafactory/model/model_utils/misc.py similarity index 100% rename from src/llamafactory/model/utils/misc.py rename to src/llamafactory/model/model_utils/misc.py diff --git a/src/llamafactory/model/utils/mod.py b/src/llamafactory/model/model_utils/mod.py similarity index 100% rename from src/llamafactory/model/utils/mod.py rename to src/llamafactory/model/model_utils/mod.py diff --git a/src/llamafactory/model/utils/moe.py b/src/llamafactory/model/model_utils/moe.py similarity index 100% rename from src/llamafactory/model/utils/moe.py rename to src/llamafactory/model/model_utils/moe.py diff --git a/src/llamafactory/model/utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py similarity index 100% rename from src/llamafactory/model/utils/quantization.py rename to src/llamafactory/model/model_utils/quantization.py diff --git a/src/llamafactory/model/utils/rope.py b/src/llamafactory/model/model_utils/rope.py similarity index 100% rename from src/llamafactory/model/utils/rope.py rename to src/llamafactory/model/model_utils/rope.py diff --git a/src/llamafactory/model/utils/unsloth.py b/src/llamafactory/model/model_utils/unsloth.py similarity index 100% rename from src/llamafactory/model/utils/unsloth.py rename to src/llamafactory/model/model_utils/unsloth.py diff --git a/src/llamafactory/model/utils/valuehead.py b/src/llamafactory/model/model_utils/valuehead.py similarity index 100% rename from src/llamafactory/model/utils/valuehead.py rename to src/llamafactory/model/model_utils/valuehead.py diff --git a/src/llamafactory/model/utils/visual.py b/src/llamafactory/model/model_utils/visual.py similarity index 100% rename from src/llamafactory/model/utils/visual.py rename to src/llamafactory/model/model_utils/visual.py diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 1a8ce607..87c92315 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -10,15 +10,15 @@ from transformers.modeling_utils import is_fsdp_enabled from ..extras.logging import get_logger from ..extras.misc import infer_optim_dtype -from .utils.attention import configure_attn_implementation, print_attn_implementation -from .utils.checkpointing import prepare_model_for_training -from .utils.embedding import resize_embedding_layer -from .utils.longlora import configure_longlora -from .utils.moe import add_z3_leaf_module, configure_moe -from .utils.quantization import configure_quantization -from .utils.rope import configure_rope -from .utils.valuehead import prepare_valuehead_model -from .utils.visual import autocast_projector_dtype, configure_visual_model +from .model_utils.attention import configure_attn_implementation, print_attn_implementation +from .model_utils.checkpointing import prepare_model_for_training +from .model_utils.embedding import resize_embedding_layer +from .model_utils.longlora import configure_longlora +from .model_utils.moe import add_z3_leaf_module, configure_moe +from .model_utils.quantization import configure_quantization +from .model_utils.rope import configure_rope +from .model_utils.valuehead import prepare_valuehead_model +from .model_utils.visual import autocast_projector_dtype, configure_visual_model if TYPE_CHECKING: diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index 6f1da34e..f64c287f 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -10,7 +10,7 @@ from trl import DPOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..utils import create_custom_optimzer, create_custom_scheduler, get_ref_context +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_ref_context if TYPE_CHECKING: diff --git a/src/llamafactory/train/dpo/workflow.py b/src/llamafactory/train/dpo/workflow.py index 61a3e2f0..992985b0 100644 --- a/src/llamafactory/train/dpo/workflow.py +++ b/src/llamafactory/train/dpo/workflow.py @@ -7,7 +7,7 @@ from ...extras.constants import IGNORE_INDEX from ...extras.ploting import plot_loss from ...hparams import ModelArguments from ...model import load_model, load_tokenizer -from ..utils import create_modelcard_and_push, create_ref_model +from ..trainer_utils import create_modelcard_and_push, create_ref_model from .trainer import CustomDPOTrainer diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 03cad5a7..1610ccfa 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -9,7 +9,7 @@ from trl import KTOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..utils import create_custom_optimzer, create_custom_scheduler, get_ref_context +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_ref_context if TYPE_CHECKING: diff --git a/src/llamafactory/train/kto/workflow.py b/src/llamafactory/train/kto/workflow.py index 26dc770c..c79b160b 100644 --- a/src/llamafactory/train/kto/workflow.py +++ b/src/llamafactory/train/kto/workflow.py @@ -5,7 +5,7 @@ from ...extras.constants import IGNORE_INDEX from ...extras.ploting import plot_loss from ...hparams import ModelArguments from ...model import load_model, load_tokenizer -from ..utils import create_modelcard_and_push, create_ref_model +from ..trainer_utils import create_modelcard_and_push, create_ref_model from .trainer import CustomKTOTrainer diff --git a/src/llamafactory/train/ppo/utils.py b/src/llamafactory/train/ppo/ppo_utils.py similarity index 100% rename from src/llamafactory/train/ppo/utils.py rename to src/llamafactory/train/ppo/ppo_utils.py diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index b0c7e25d..7addfc3c 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -19,8 +19,8 @@ from trl.models.utils import unwrap_model_for_generation from ...extras.callbacks import FixValueHeadModelCallback, LogCallback from ...extras.logging import get_logger from ...extras.misc import AverageMeter, count_parameters, get_current_device, get_logits_processor -from ..utils import create_custom_optimzer, create_custom_scheduler -from .utils import dump_layernorm, get_rewards_from_server, replace_model, restore_layernorm +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler +from .ppo_utils import dump_layernorm, get_rewards_from_server, replace_model, restore_layernorm if TYPE_CHECKING: diff --git a/src/llamafactory/train/ppo/workflow.py b/src/llamafactory/train/ppo/workflow.py index 4383bcdc..111704c6 100644 --- a/src/llamafactory/train/ppo/workflow.py +++ b/src/llamafactory/train/ppo/workflow.py @@ -9,7 +9,7 @@ from ...extras.callbacks import FixValueHeadModelCallback from ...extras.misc import fix_valuehead_checkpoint from ...extras.ploting import plot_loss from ...model import load_model, load_tokenizer -from ..utils import create_ref_model, create_reward_model +from ..trainer_utils import create_ref_model, create_reward_model from .trainer import CustomPPOTrainer diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py index b7b80f88..1d96e82f 100644 --- a/src/llamafactory/train/pt/trainer.py +++ b/src/llamafactory/train/pt/trainer.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Dict, Optional from transformers import Trainer from ...extras.logging import get_logger -from ..utils import create_custom_optimzer, create_custom_scheduler +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler if TYPE_CHECKING: diff --git a/src/llamafactory/train/pt/workflow.py b/src/llamafactory/train/pt/workflow.py index 9f945901..8a635567 100644 --- a/src/llamafactory/train/pt/workflow.py +++ b/src/llamafactory/train/pt/workflow.py @@ -8,7 +8,7 @@ from transformers import DataCollatorForLanguageModeling from ...data import get_dataset, split_dataset from ...extras.ploting import plot_loss from ...model import load_model, load_tokenizer -from ..utils import create_modelcard_and_push +from ..trainer_utils import create_modelcard_and_push from .trainer import CustomTrainer diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py index d49dd67b..bfb344dc 100644 --- a/src/llamafactory/train/rm/trainer.py +++ b/src/llamafactory/train/rm/trainer.py @@ -7,7 +7,7 @@ import torch from transformers import Trainer from ...extras.logging import get_logger -from ..utils import create_custom_optimzer, create_custom_scheduler +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler if TYPE_CHECKING: diff --git a/src/llamafactory/train/rm/workflow.py b/src/llamafactory/train/rm/workflow.py index 621d03b7..2e9e194b 100644 --- a/src/llamafactory/train/rm/workflow.py +++ b/src/llamafactory/train/rm/workflow.py @@ -7,7 +7,7 @@ from ...extras.callbacks import FixValueHeadModelCallback from ...extras.misc import fix_valuehead_checkpoint from ...extras.ploting import plot_loss from ...model import load_model, load_tokenizer -from ..utils import create_modelcard_and_push +from ..trainer_utils import create_modelcard_and_push from .metric import compute_accuracy from .trainer import PairwiseTrainer diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index 35671e1b..c063b214 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -9,7 +9,7 @@ from transformers import Seq2SeqTrainer from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from ..utils import create_custom_optimzer, create_custom_scheduler +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler if TYPE_CHECKING: diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py index d9d7c8e9..f09b5173 100644 --- a/src/llamafactory/train/sft/workflow.py +++ b/src/llamafactory/train/sft/workflow.py @@ -9,7 +9,7 @@ from ...extras.constants import IGNORE_INDEX from ...extras.misc import get_logits_processor from ...extras.ploting import plot_loss from ...model import load_model, load_tokenizer -from ..utils import create_modelcard_and_push +from ..trainer_utils import create_modelcard_and_push from .metric import ComputeMetrics from .trainer import CustomSeq2SeqTrainer diff --git a/src/llamafactory/train/utils.py b/src/llamafactory/train/trainer_utils.py similarity index 100% rename from src/llamafactory/train/utils.py rename to src/llamafactory/train/trainer_utils.py diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index ca093584..c794d0aa 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -1,6 +1,6 @@ from typing import TYPE_CHECKING, Dict -from ...data import templates +from ...data import TEMPLATES from ...extras.constants import METHODS, SUPPORTED_MODELS from ...extras.packages import is_gradio_available from ..common import get_model_info, list_checkpoints, save_config @@ -30,7 +30,7 @@ def create_top() -> Dict[str, "Component"]: with gr.Accordion(open=False) as advanced_tab: with gr.Row(): quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", scale=2) - template = gr.Dropdown(choices=list(templates.keys()), value="default", scale=2) + template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=2) rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=3) booster = gr.Radio(choices=["none", "flashattn2", "unsloth"], value="none", scale=3) visual_inputs = gr.Checkbox(scale=1) From 093abed7ccdb653219778dc8b68fee1e2044e003 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 7 Jun 2024 00:22:57 +0800 Subject: [PATCH 090/162] add qwen2 models Former-commit-id: 49cb694d02c876e3740a003a8b332349f4310ad3 --- README.md | 7 ++- README_zh.md | 7 ++- src/llamafactory/extras/constants.py | 83 ++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 77684757..1358b6e8 100644 --- a/README.md +++ b/README.md @@ -71,14 +71,16 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog +[24/06/07] We supported fine-tuning the **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** series models. + [24/06/05] We supported fine-tuning the **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** models. [24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage. -[24/05/20] We supported fine-tuning the **PaliGemma** series models. Note that the PaliGemma models are pre-trained models, you need to fine-tune them with `gemma` template for chat completion. -
Full Changelog +[24/05/20] We supported fine-tuning the **PaliGemma** series models. Note that the PaliGemma models are pre-trained models, you need to fine-tune them with `gemma` template for chat completion. + [24/05/18] We supported **[KTO](https://arxiv.org/abs/2402.01306)** algorithm for preference learning. See [examples](examples/README.md) for usage. [24/05/14] We supported training and inference on the Ascend NPU devices. Check [installation](#installation) section for details. @@ -172,6 +174,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | | [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen | | [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen | +| [Qwen2 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/7B/57B/72B | qwen | | [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | | [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi | diff --git a/README_zh.md b/README_zh.md index da5ff079..becec988 100644 --- a/README_zh.md +++ b/README_zh.md @@ -71,14 +71,16 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd ## 更新日志 +[24/06/07] 我们支持了 **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** 系列模型的微调。 + [24/06/05] 我们支持了 **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** 模型的微调。 [24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。 -[24/05/20] 我们支持了 **PaliGemma** 系列模型的微调。注意 PaliGemma 是预训练模型,你需要使用 `gemma` 模板进行微调使其获得对话能力。 -
展开日志 +[24/05/20] 我们支持了 **PaliGemma** 系列模型的微调。注意 PaliGemma 是预训练模型,你需要使用 `gemma` 模板进行微调使其获得对话能力。 + [24/05/18] 我们支持了 **[KTO](https://arxiv.org/abs/2402.01306)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。 [24/05/14] 我们支持了昇腾 NPU 设备的训练和推理。详情请查阅[安装](#安装-llama-factory)部分。 @@ -172,6 +174,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd | [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | | [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen | | [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen | +| [Qwen2 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/7B/57B/72B | qwen | | [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | | [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi | diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 4d9cb26d..4099fe56 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -1078,6 +1078,89 @@ register_model_group( ) +register_model_group( + models={ + "Qwen2-0.5B": { + DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B", + DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B", + }, + "Qwen2-1.5B": { + DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B", + DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B", + }, + "Qwen2-7B": { + DownloadSource.DEFAULT: "Qwen/Qwen2-7B", + DownloadSource.MODELSCOPE: "qwen/Qwen2-7B", + }, + "Qwen2-72B": { + DownloadSource.DEFAULT: "Qwen/Qwen2-72B", + DownloadSource.MODELSCOPE: "qwen/Qwen2-72B", + }, + "Qwen2-MoE-57B": { + DownloadSource.DEFAULT: "Qwen/Qwen2-57B-A14B", + DownloadSource.MODELSCOPE: "qwen/Qwen2-57B-A14B", + }, + "Qwen2-0.5B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct", + DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct", + }, + "Qwen2-1.5B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct", + DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct", + }, + "Qwen2-7B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct", + DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct", + }, + "Qwen2-72B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct", + DownloadSource.MODELSCOPE: "qwen/Qwen2-72B-Instruct", + }, + "Qwen2-MoE-57B-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-57B-A14B-Instruct", + DownloadSource.MODELSCOPE: "qwen/Qwen2-57B-A14B-Instruct", + }, + "Qwen2-0.5B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct-GPTQ-Int8", + }, + "Qwen2-0.5B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct-AWQ", + }, + "Qwen2-1.5B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct-GPTQ-Int8", + }, + "Qwen2-1.5B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct-AWQ", + }, + "Qwen2-7B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct-GPTQ-Int8", + }, + "Qwen2-7B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct-AWQ", + }, + "Qwen2-72B-int8-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct-GPTQ-Int8", + DownloadSource.MODELSCOPE: "qwen/Qwen2-72B-Instruct-GPTQ-Int8", + }, + "Qwen2-72B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen2-72B-Instruct-AWQ", + }, + "Qwen2-MoE-57B-int4-Chat": { + DownloadSource.DEFAULT: "Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4", + DownloadSource.MODELSCOPE: "qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4", + }, + }, + template="qwen", +) + + register_model_group( models={ "SOLAR-10.7B": { From b5421ac8ebf5ade217fd4fd82ebe8b7e80f33194 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Fri, 7 Jun 2024 03:38:04 +0800 Subject: [PATCH 091/162] Update supervised.py Former-commit-id: 67625b5278a839c12a3e4245f9e90af67d8b11b4 --- .../data/processors/supervised.py | 233 ++++++++---------- 1 file changed, 107 insertions(+), 126 deletions(-) diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index eaceb5b8..cd49fd0c 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -1,10 +1,10 @@ -import itertools +import bisect from collections import defaultdict -from typing import TYPE_CHECKING, Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from .mm_utils import get_paligemma_token_type_ids, get_pixel_values +from .mm_utils import get_paligemma_token_type_ids, get_pixel_values, greedy_knapsack if TYPE_CHECKING: @@ -18,29 +18,19 @@ if TYPE_CHECKING: logger = get_logger(__name__) -def binary_search_for_fit(numbers, capacity): +def search_for_fit(numbers: Sequence[int], capacity: int) -> int: + r""" + Finds the index of largest number that fits into the knapsack with the given capacity. """ - Perform binary search to find the largest number that fits into the knapsack with the given capacity. - """ - left, right = 0, len(numbers) - 1 - result = -1 # If no number fits, return -1 - - while left <= right: - mid = (left + right) // 2 - if numbers[mid] <= capacity: - result = mid - left = mid + 1 - else: - right = mid - 1 - - return result + index = bisect.bisect(numbers, capacity) + return -1 if index == 0 else (index - 1) -def efficient_greedy_knapsack(numbers, capacity): - """ +def greedy_knapsack(numbers: List[int], capacity: int) -> List[List[int]]: + r""" An efficient greedy algorithm with binary search for the knapsack problem. """ - numbers.sort() # Sort numbers in ascending order for binary search + numbers.sort() # sort numbers in ascending order for binary search knapsacks = [] while numbers: @@ -48,22 +38,60 @@ def efficient_greedy_knapsack(numbers, capacity): remaining_capacity = capacity while True: - index = binary_search_for_fit(numbers, remaining_capacity) + index = search_for_fit(numbers, remaining_capacity) if index == -1: - break # No more numbers fit in this knapsack + break # no more numbers fit in this knapsack - # Add the found number to the knapsack and update the remaining capacity - current_knapsack.append(numbers[index]) - remaining_capacity -= numbers[index] - - # Remove the number from the list - numbers.pop(index) + remaining_capacity -= numbers[index] # update the remaining capacity + current_knapsack.append(numbers.pop(index)) # add the number to knapsack knapsacks.append(current_knapsack) return knapsacks +def _encode_supervised_example( + prompt: Sequence[Dict[str, str]], + response: Sequence[Dict[str, str]], + system: Optional[str], + tools: Optional[str], + template: "Template", + tokenizer: "PreTrainedTokenizer", + processor: Optional["ProcessorMixin"], + data_args: "DataArguments", +) -> Tuple[List[int], List[int]]: + if processor is not None and not hasattr(processor, "image_seq_length"): # llava-like models + prompt[0]["content"] = template.image_token + prompt[0]["content"] + + messages = prompt + response + input_ids, labels = [], [] + + if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models + image_token_id = tokenizer.convert_tokens_to_ids(template.image_token) + input_ids += [image_token_id] * getattr(processor, "image_seq_length") + labels += [IGNORE_INDEX] * getattr(processor, "image_seq_length") + + encoded_pairs = template.encode_multiturn( + tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len + ) + for turn_idx, (source_ids, target_ids) in enumerate(encoded_pairs): + if data_args.train_on_prompt: + source_mask = source_ids + elif turn_idx != 0 and template.efficient_eos: + source_mask = [tokenizer.eos_token_id] + [IGNORE_INDEX] * (len(source_ids) - 1) + else: + source_mask = [IGNORE_INDEX] * len(source_ids) + + input_ids += source_ids + target_ids + labels += source_mask + target_ids + + if template.efficient_eos: + input_ids += [tokenizer.eos_token_id] + labels += [tokenizer.eos_token_id] + + return input_ids, labels + + def preprocess_supervised_dataset( examples: Dict[str, List[Any]], template: "Template", @@ -84,41 +112,16 @@ def preprocess_supervised_dataset( logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i])) continue - if processor is not None and not hasattr(processor, "image_seq_length"): # llava-like models - examples["prompt"][i][0]["content"] = template.image_token + examples["prompt"][i][0]["content"] - - messages = examples["prompt"][i] + examples["response"][i] - input_ids, labels = [], [] - - if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models - image_token_id = tokenizer.convert_tokens_to_ids(template.image_token) - input_ids += [image_token_id] * getattr(processor, "image_seq_length") - labels += [IGNORE_INDEX] * getattr(processor, "image_seq_length") - - for turn_idx, (source_ids, target_ids) in enumerate( - template.encode_multiturn( - tokenizer, - messages, - examples["system"][i], - examples["tools"][i], - data_args.cutoff_len, - data_args.reserved_label_len, - ) - ): - if data_args.train_on_prompt: - source_mask = source_ids - elif turn_idx != 0 and template.efficient_eos: - source_mask = [tokenizer.eos_token_id] + [IGNORE_INDEX] * (len(source_ids) - 1) - else: - source_mask = [IGNORE_INDEX] * len(source_ids) - - input_ids += source_ids + target_ids - labels += source_mask + target_ids - - if template.efficient_eos: - input_ids += [tokenizer.eos_token_id] - labels += [tokenizer.eos_token_id] - + input_ids, labels = _encode_supervised_example( + prompt=examples["prompt"][i], + response=examples["response"][i], + system=examples["system"][i], + tools=examples["tools"][i], + template=template, + tokenizer=tokenizer, + processor=processor, + data_args=data_args, + ) model_inputs["input_ids"].append(input_ids) model_inputs["attention_mask"].append([1] * len(input_ids)) model_inputs["labels"].append(labels) @@ -138,76 +141,54 @@ def preprocess_packed_supervised_dataset( ) -> Dict[str, List[List[int]]]: # build inputs with format ` X1 Y1 X2 Y2 ` # and labels with format ` ... Y1 ... Y2 ` - model_inputs = {"input_ids": [], "attention_mask": [], "labels": []} - input_ids, labels = [], [] + valid_num = 0 + batch_input_ids, batch_labels = [], [] + lengths = [] + length2indexes = defaultdict(list) for i in range(len(examples["prompt"])): if len(examples["prompt"][i]) % 2 != 1 or len(examples["response"][i]) != 1: logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i])) continue - messages = examples["prompt"][i] + examples["response"][i] - for source_ids, target_ids in template.encode_multiturn( - tokenizer, messages, examples["system"][i], examples["tools"][i] - ): - if data_args.train_on_prompt: - source_mask = source_ids - else: - source_mask = [IGNORE_INDEX] * len(source_ids) - - input_ids.append(source_ids + target_ids) - labels.append(source_mask + target_ids) - - # prepare for packing - lengths = [] - length2examples_idx = defaultdict(list) - for idx, example in enumerate(input_ids): - length = len(example) + input_ids, labels = _encode_supervised_example( + prompt=examples["prompt"][i], + response=examples["response"][i], + system=examples["system"][i], + tools=examples["tools"][i], + template=template, + tokenizer=tokenizer, + processor=None, + data_args=data_args, + ) + length = len(input_ids) if length > data_args.cutoff_len: - logger.warning("Dropped example with length {} > cutoff_len {}".format(length, data_args.cutoff_len)) - continue - lengths.append(length) - length2examples_idx[length].append(idx) - - # cutoff_len - 1 for efficient_eos - knapsacks = efficient_greedy_knapsack(lengths, data_args.cutoff_len - int(template.efficient_eos)) - - for knapsack in knapsacks: - packed_input_ids = [] - packed_labels = [] - - total_length = 0 - for length in knapsack: - total_length += length - idx = length2examples_idx[length].pop() - packed_input_ids.append(input_ids[idx]) - packed_labels.append(labels[idx]) - - # padding to cutoff_len - if total_length < data_args.cutoff_len: - pad_length = data_args.cutoff_len - total_length - if template.efficient_eos: - # 确保有 eos - packed_input_ids.append([tokenizer.eos_token_id] * pad_length) - packed_labels.append([tokenizer.eos_token_id] + [IGNORE_INDEX] * (pad_length - 1)) - else: - # 无 eos 的情况下,使用 0 填充? - packed_input_ids.append([0] * pad_length) - packed_labels.append([tokenizer.eos_token_id] + [IGNORE_INDEX] * (pad_length - 1)) - - elif total_length == data_args.cutoff_len: - pad_length = 0 + logger.warning("Dropped lengthy example with length {} > {}.".format(length, data_args.cutoff_len)) else: - logger.warning( - "Dropped packed example with total length {} > cutoff_len {}".format( - total_length, data_args.cutoff_len - ) - ) - continue + lengths.append(length) + length2indexes[length].append(valid_num) + batch_input_ids.append(input_ids) + batch_labels.append(labels) + valid_num += 1 - # concat all - model_inputs["input_ids"].append(list(itertools.chain(*packed_input_ids))) - model_inputs["labels"].append(list(itertools.chain(*packed_labels))) - model_inputs["attention_mask"].append([1] * total_length + [0] * pad_length) + model_inputs = {"input_ids": [], "attention_mask": [], "labels": []} + knapsacks = greedy_knapsack(lengths, data_args.cutoff_len) + for knapsack in knapsacks: + packed_input_ids, packed_labels = [], [] + for length in knapsack: + index = length2indexes[length].pop() + packed_input_ids += batch_input_ids[index] + packed_labels += batch_labels[index] + + if len(packed_input_ids) <= data_args.cutoff_len: + pad_length = data_args.cutoff_len - len(packed_input_ids) + packed_input_ids += [tokenizer.pad_token_id] * pad_length + packed_labels += [IGNORE_INDEX] * pad_length + else: + raise ValueError("The length of packed example exceeds the cutoff length.") + + model_inputs["input_ids"].append(packed_input_ids) + model_inputs["attention_mask"].append([1] * len(packed_input_ids)) + model_inputs["labels"].append(packed_labels) return model_inputs From 9de42c4b3a0fa3087287328553d14dfcdd1bbe96 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Fri, 7 Jun 2024 03:38:23 +0800 Subject: [PATCH 092/162] Update supervised.py Former-commit-id: 49993c4f4e1f871a22ff0196afe60026b668a4dc --- src/llamafactory/data/processors/supervised.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index cd49fd0c..502b591c 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from .mm_utils import get_paligemma_token_type_ids, get_pixel_values, greedy_knapsack +from .mm_utils import get_paligemma_token_type_ids, get_pixel_values if TYPE_CHECKING: From 4a4b8d0b399089b46d939aa329c7304d4e1fa442 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Fri, 7 Jun 2024 03:42:08 +0800 Subject: [PATCH 093/162] Update supervised.py Former-commit-id: 04b6c2a754e602e0b698cfe6c255c2f2486d8865 --- src/llamafactory/data/processors/supervised.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index 502b591c..a340a1ab 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -179,15 +179,16 @@ def preprocess_packed_supervised_dataset( packed_input_ids += batch_input_ids[index] packed_labels += batch_labels[index] - if len(packed_input_ids) <= data_args.cutoff_len: + if len(packed_input_ids) < data_args.cutoff_len: pad_length = data_args.cutoff_len - len(packed_input_ids) packed_input_ids += [tokenizer.pad_token_id] * pad_length packed_labels += [IGNORE_INDEX] * pad_length - else: - raise ValueError("The length of packed example exceeds the cutoff length.") + + if len(packed_input_ids) != data_args.cutoff_len: + raise ValueError("The length of packed example should be identical to the cutoff length.") model_inputs["input_ids"].append(packed_input_ids) - model_inputs["attention_mask"].append([1] * len(packed_input_ids)) + model_inputs["attention_mask"].append([1] * data_args.cutoff_len) model_inputs["labels"].append(packed_labels) return model_inputs From 72c91be9435197edb8ef616ac7d3381ab9e0e803 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 7 Jun 2024 04:15:40 +0800 Subject: [PATCH 094/162] update data processors Former-commit-id: 04b138cbcb8b9a72e4bbda6c65843bb459e525e7 --- src/llamafactory/cli.py | 2 +- src/llamafactory/data/processors/feedback.py | 104 ++++++++++-------- src/llamafactory/data/processors/pairwise.py | 82 ++++++++------ .../data/processors/processor_utils.py | 41 ++++++- .../data/processors/supervised.py | 35 +----- .../data/processors/unsupervised.py | 65 +++++++---- 6 files changed, 190 insertions(+), 139 deletions(-) diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index 092f4cf7..b9e734e4 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -72,7 +72,7 @@ def main(): elif command == Command.EXPORT: export_model() elif command == Command.TRAIN: - disable_torchrun = os.environ.get("DISABLE_TORCHRUN", "0").lower() in ["true", "1"] + disable_torchrun = os.environ.get("TORCHRUN_DISABLED", "0").lower() in ["true", "1"] if disable_torchrun and get_device_count() > 1: logger.warning("`torchrun` cannot be disabled when device count > 1.") disable_torchrun = False diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py index dc7d817c..98d83658 100644 --- a/src/llamafactory/data/processors/feedback.py +++ b/src/llamafactory/data/processors/feedback.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger @@ -16,6 +16,55 @@ if TYPE_CHECKING: logger = get_logger(__name__) +def _encode_feedback_example( + prompt: Sequence[Dict[str, str]], + response: Sequence[Dict[str, str]], + kl_response: Sequence[Dict[str, str]], + system: Optional[str], + tools: Optional[str], + template: "Template", + tokenizer: "PreTrainedTokenizer", + processor: Optional["ProcessorMixin"], + data_args: "DataArguments", +) -> Tuple[List[int], List[int], List[int], List[int], bool]: + if processor is not None and not hasattr(processor, "image_seq_length"): # llava-like models + prompt[0]["content"] = template.image_token + prompt[0]["content"] + + if response[0]["content"]: # desired example + kto_tag = True + messages = prompt + [response[0]] + else: # undesired example + kto_tag = False + messages = prompt + [response[1]] + + if kl_response[0]["content"]: + kl_messages = prompt + [kl_response[0]] + else: + kl_messages = prompt + [kl_response[1]] + + prompt_ids, response_ids = template.encode_oneturn( + tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len + ) + _, kl_response_ids = template.encode_oneturn( + tokenizer, kl_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len + ) + + if template.efficient_eos: + response_ids += [tokenizer.eos_token_id] + kl_response_ids += [tokenizer.eos_token_id] + + if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models + image_token_id = tokenizer.convert_tokens_to_ids(template.image_token) + prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids + + input_ids = prompt_ids + response_ids + labels = [IGNORE_INDEX] * len(prompt_ids) + response_ids + kl_input_ids = prompt_ids + kl_response_ids + kl_labels = [IGNORE_INDEX] * len(prompt_ids) + kl_response_ids + + return input_ids, labels, kl_input_ids, kl_labels, kto_tag + + def preprocess_feedback_dataset( examples: Dict[str, List[Any]], template: "Template", @@ -45,50 +94,17 @@ def preprocess_feedback_dataset( logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i])) continue - if processor is not None and not hasattr(processor, "image_seq_length"): # llava-like models - examples["prompt"][i][0]["content"] = template.image_token + examples["prompt"][i][0]["content"] - - if examples["response"][i][0]["content"]: # desired example - kto_tag = True - messages = examples["prompt"][i] + [examples["response"][i][0]] - else: # undesired example - kto_tag = False - messages = examples["prompt"][i] + [examples["response"][i][1]] - - if kl_response[i][0]["content"]: - kl_messages = examples["prompt"][i] + [kl_response[i][0]] - else: - kl_messages = examples["prompt"][i] + [kl_response[i][1]] - - prompt_ids, response_ids = template.encode_oneturn( - tokenizer, - messages, - examples["system"][i], - examples["tools"][i], - data_args.cutoff_len, - data_args.reserved_label_len, + input_ids, labels, kl_input_ids, kl_labels, kto_tag = _encode_feedback_example( + prompt=examples["prompt"][i], + response=examples["response"][i], + kl_response=kl_response[i], + system=examples["system"][i], + tools=examples["tools"][i], + template=template, + tokenizer=tokenizer, + processor=processor, + data_args=data_args, ) - _, kl_response_ids = template.encode_oneturn( - tokenizer, - kl_messages, - examples["system"][i], - examples["tools"][i], - data_args.cutoff_len, - data_args.reserved_label_len, - ) - - if template.efficient_eos: - response_ids += [tokenizer.eos_token_id] - kl_response_ids += [tokenizer.eos_token_id] - - if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models - image_token_id = tokenizer.convert_tokens_to_ids(template.image_token) - prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids - - input_ids = prompt_ids + response_ids - labels = [IGNORE_INDEX] * len(prompt_ids) + response_ids - kl_input_ids = prompt_ids + kl_response_ids - kl_labels = [IGNORE_INDEX] * len(prompt_ids) + kl_response_ids model_inputs["input_ids"].append(input_ids) model_inputs["attention_mask"].append([1] * len(input_ids)) model_inputs["labels"].append(labels) diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py index 8ad3979f..fe984efa 100644 --- a/src/llamafactory/data/processors/pairwise.py +++ b/src/llamafactory/data/processors/pairwise.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger @@ -16,6 +16,44 @@ if TYPE_CHECKING: logger = get_logger(__name__) +def _encode_pairwise_example( + prompt: Sequence[Dict[str, str]], + response: Sequence[Dict[str, str]], + system: Optional[str], + tools: Optional[str], + template: "Template", + tokenizer: "PreTrainedTokenizer", + processor: Optional["ProcessorMixin"], + data_args: "DataArguments", +) -> Tuple[List[int], List[int], List[int], List[int]]: + if processor is not None and not hasattr(processor, "image_seq_length"): # llava-like models + prompt[0]["content"] = template.image_token + prompt[0]["content"] + + chosen_messages = prompt + [response[0]] + rejected_messages = prompt + [response[1]] + prompt_ids, chosen_ids = template.encode_oneturn( + tokenizer, chosen_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len + ) + _, rejected_ids = template.encode_oneturn( + tokenizer, rejected_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len + ) + + if template.efficient_eos: + chosen_ids += [tokenizer.eos_token_id] + rejected_ids += [tokenizer.eos_token_id] + + if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models + image_token_id = tokenizer.convert_tokens_to_ids(template.image_token) + prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids + + chosen_input_ids = prompt_ids + chosen_ids + chosen_labels = [IGNORE_INDEX] * len(prompt_ids) + chosen_ids + rejected_input_ids = prompt_ids + rejected_ids + rejected_labels = [IGNORE_INDEX] * len(prompt_ids) + rejected_ids + + return chosen_input_ids, chosen_labels, rejected_input_ids, rejected_labels + + def preprocess_pairwise_dataset( examples: Dict[str, List[Any]], template: "Template", @@ -43,40 +81,16 @@ def preprocess_pairwise_dataset( logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i])) continue - if processor is not None and not hasattr(processor, "image_seq_length"): # llava-like models - examples["prompt"][i][0]["content"] = template.image_token + examples["prompt"][i][0]["content"] - - chosen_messages = examples["prompt"][i] + [examples["response"][i][0]] - rejected_messages = examples["prompt"][i] + [examples["response"][i][1]] - prompt_ids, chosen_ids = template.encode_oneturn( - tokenizer, - chosen_messages, - examples["system"][i], - examples["tools"][i], - data_args.cutoff_len, - data_args.reserved_label_len, + chosen_input_ids, chosen_labels, rejected_input_ids, rejected_labels = _encode_pairwise_example( + prompt=examples["prompt"][i], + response=examples["response"][i], + system=examples["system"][i], + tools=examples["tools"][i], + template=template, + tokenizer=tokenizer, + processor=processor, + data_args=data_args, ) - _, rejected_ids = template.encode_oneturn( - tokenizer, - rejected_messages, - examples["system"][i], - examples["tools"][i], - data_args.cutoff_len, - data_args.reserved_label_len, - ) - - if template.efficient_eos: - chosen_ids += [tokenizer.eos_token_id] - rejected_ids += [tokenizer.eos_token_id] - - if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models - image_token_id = tokenizer.convert_tokens_to_ids(template.image_token) - prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids - - chosen_input_ids = prompt_ids + chosen_ids - chosen_labels = [IGNORE_INDEX] * len(prompt_ids) + chosen_ids - rejected_input_ids = prompt_ids + rejected_ids - rejected_labels = [IGNORE_INDEX] * len(prompt_ids) + rejected_ids model_inputs["chosen_input_ids"].append(chosen_input_ids) model_inputs["chosen_attention_mask"].append([1] * len(chosen_input_ids)) model_inputs["chosen_labels"].append(chosen_labels) diff --git a/src/llamafactory/data/processors/processor_utils.py b/src/llamafactory/data/processors/processor_utils.py index abc7c4b2..9903a053 100644 --- a/src/llamafactory/data/processors/processor_utils.py +++ b/src/llamafactory/data/processors/processor_utils.py @@ -1,3 +1,4 @@ +import bisect from typing import TYPE_CHECKING, List, Sequence from ...extras.packages import is_pillow_available @@ -14,14 +15,50 @@ if TYPE_CHECKING: from transformers.image_processing_utils import BaseImageProcessor +def search_for_fit(numbers: Sequence[int], capacity: int) -> int: + r""" + Finds the index of largest number that fits into the knapsack with the given capacity. + """ + index = bisect.bisect(numbers, capacity) + return -1 if index == 0 else (index - 1) + + +def greedy_knapsack(numbers: List[int], capacity: int) -> List[List[int]]: + r""" + An efficient greedy algorithm with binary search for the knapsack problem. + """ + numbers.sort() # sort numbers in ascending order for binary search + knapsacks = [] + + while numbers: + current_knapsack = [] + remaining_capacity = capacity + + while True: + index = search_for_fit(numbers, remaining_capacity) + if index == -1: + break # no more numbers fit in this knapsack + + remaining_capacity -= numbers[index] # update the remaining capacity + current_knapsack.append(numbers.pop(index)) # add the number to knapsack + + knapsacks.append(current_knapsack) + + return knapsacks + + def get_pixel_values(images: Sequence["ImageObject"], processor: "ProcessorMixin") -> "NDArray": - # process visual inputs (currently only supports a single image) + r""" + Processes visual inputs. (currently only supports a single image) + """ image_processor: "BaseImageProcessor" = getattr(processor, "image_processor") image = images[0] if len(images) != 0 else Image.new("RGB", (100, 100), (255, 255, 255)) return image_processor(image, return_tensors="pt")["pixel_values"][0] # shape (C, H, W) def get_paligemma_token_type_ids(input_len: int, processor: "ProcessorMixin") -> List[int]: - # get paligemma token type ids for computing loss + r""" + Gets paligemma token type ids for computing loss. + """ image_seq_length = getattr(processor, "image_seq_length") return [0] * image_seq_length + [1] * (input_len - image_seq_length) diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index 188c9f80..19d60280 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -1,10 +1,9 @@ -import bisect from collections import defaultdict from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from .processor_utils import get_paligemma_token_type_ids, get_pixel_values +from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, greedy_knapsack if TYPE_CHECKING: @@ -18,38 +17,6 @@ if TYPE_CHECKING: logger = get_logger(__name__) -def search_for_fit(numbers: Sequence[int], capacity: int) -> int: - r""" - Finds the index of largest number that fits into the knapsack with the given capacity. - """ - index = bisect.bisect(numbers, capacity) - return -1 if index == 0 else (index - 1) - - -def greedy_knapsack(numbers: List[int], capacity: int) -> List[List[int]]: - r""" - An efficient greedy algorithm with binary search for the knapsack problem. - """ - numbers.sort() # sort numbers in ascending order for binary search - knapsacks = [] - - while numbers: - current_knapsack = [] - remaining_capacity = capacity - - while True: - index = search_for_fit(numbers, remaining_capacity) - if index == -1: - break # no more numbers fit in this knapsack - - remaining_capacity -= numbers[index] # update the remaining capacity - current_knapsack.append(numbers.pop(index)) # add the number to knapsack - - knapsacks.append(current_knapsack) - - return knapsacks - - def _encode_supervised_example( prompt: Sequence[Dict[str, str]], response: Sequence[Dict[str, str]], diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py index e00bde55..f711eeac 100644 --- a/src/llamafactory/data/processors/unsupervised.py +++ b/src/llamafactory/data/processors/unsupervised.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.logging import get_logger from ..data_utils import Role @@ -16,6 +16,37 @@ if TYPE_CHECKING: logger = get_logger(__name__) +def _encode_unsupervised_example( + prompt: Sequence[Dict[str, str]], + response: Sequence[Dict[str, str]], + system: Optional[str], + tools: Optional[str], + template: "Template", + tokenizer: "PreTrainedTokenizer", + processor: Optional["ProcessorMixin"], + data_args: "DataArguments", +) -> Tuple[List[int], List[int]]: + if processor is not None and not hasattr(processor, "image_seq_length"): # llava-like models + prompt[0]["content"] = template.image_token + prompt[0]["content"] + + if len(response) == 1: + messages = prompt + response + else: + messages = prompt + [{"role": Role.ASSISTANT.value, "content": ""}] + + input_ids, labels = template.encode_oneturn( + tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len + ) + if template.efficient_eos: + labels += [tokenizer.eos_token_id] + + if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models + image_token_id = tokenizer.convert_tokens_to_ids(template.image_token) + input_ids = [image_token_id] * getattr(processor, "image_seq_length") + input_ids + + return input_ids, labels + + def preprocess_unsupervised_dataset( examples: Dict[str, List[Any]], template: "Template", @@ -35,30 +66,16 @@ def preprocess_unsupervised_dataset( logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i])) continue - if processor is not None and not hasattr(processor, "image_seq_length"): # llava-like models - examples["prompt"][i][0]["content"] = template.image_token + examples["prompt"][i][0]["content"] - - if len(examples["response"][i]) == 1: - messages = examples["prompt"][i] + examples["response"][i] - else: - messages = examples["prompt"][i] + [{"role": Role.ASSISTANT.value, "content": ""}] - - input_ids, labels = template.encode_oneturn( - tokenizer, - messages, - examples["system"][i], - examples["tools"][i], - data_args.cutoff_len, - data_args.reserved_label_len, + input_ids, labels = _encode_unsupervised_example( + prompt=examples["prompt"][i], + response=examples["response"][i], + system=examples["system"][i], + tools=examples["tools"][i], + template=template, + tokenizer=tokenizer, + processor=processor, + data_args=data_args, ) - - if template.efficient_eos: - labels += [tokenizer.eos_token_id] - - if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models - image_token_id = tokenizer.convert_tokens_to_ids(template.image_token) - input_ids = [image_token_id] * getattr(processor, "image_seq_length") + input_ids - model_inputs["input_ids"].append(input_ids) model_inputs["attention_mask"].append([1] * len(input_ids)) model_inputs["labels"].append(labels) From 8cc3bbdc62fdd1a6aca2d34410f71c0779b69337 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 7 Jun 2024 04:18:05 +0800 Subject: [PATCH 095/162] fix #4120 Former-commit-id: 2a44da678a5e360a9c0f9056397ac9e801329321 --- README.md | 2 +- README_zh.md | 2 +- requirements.txt | 2 +- src/llamafactory/extras/misc.py | 2 +- src/llamafactory/train/dpo/trainer.py | 9 +--- src/llamafactory/train/kto/trainer.py | 60 ++++++++----------------- src/llamafactory/train/trainer_utils.py | 24 +++++++++- 7 files changed, 47 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index 1358b6e8..036ee648 100644 --- a/README.md +++ b/README.md @@ -298,7 +298,7 @@ huggingface-cli login | datasets | 2.16.0 | 2.19.2 | | accelerate | 0.30.1 | 0.30.1 | | peft | 0.11.1 | 0.11.1 | -| trl | 0.9.3 | 0.9.3 | +| trl | 0.8.6 | 0.9.3 | | Optional | Minimum | Recommend | | ------------ | ------- | --------- | diff --git a/README_zh.md b/README_zh.md index becec988..fb0d790e 100644 --- a/README_zh.md +++ b/README_zh.md @@ -298,7 +298,7 @@ huggingface-cli login | datasets | 2.16.0 | 2.19.2 | | accelerate | 0.30.1 | 0.30.1 | | peft | 0.11.1 | 0.11.1 | -| trl | 0.9.3 | 0.9.3 | +| trl | 0.8.6 | 0.9.3 | | 可选项 | 至少 | 推荐 | | ------------ | ------- | --------- | diff --git a/requirements.txt b/requirements.txt index 7b6cbee9..9e00555e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ transformers>=4.41.2 datasets>=2.16.0 accelerate>=0.30.1 peft>=0.11.1 -trl>=0.9.3 +trl>=0.8.6 gradio>=4.0.0 scipy einops diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 48476f9c..fc33f77e 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -65,7 +65,7 @@ def check_dependencies() -> None: require_version("datasets>=2.16.0", "To fix: pip install datasets>=2.16.0") require_version("accelerate>=0.30.1", "To fix: pip install accelerate>=0.30.1") require_version("peft>=0.11.1", "To fix: pip install peft>=0.11.1") - require_version("trl>=0.9.3", "To fix: pip install trl>=0.9.3") + require_version("trl>=0.8.6", "To fix: pip install trl>=0.8.6") def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index f64c287f..d860b29a 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -10,7 +10,7 @@ from trl import DPOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_ref_context +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps, get_ref_context if TYPE_CHECKING: @@ -155,12 +155,7 @@ class CustomDPOTrainer(DPOTrainer): all_logits: "torch.Tensor" = model(**batch, return_dict=True, use_cache=False).logits.to(torch.float32) - all_logps, valid_length = self.get_batch_logps( - logits=all_logits, - labels=batch["labels"], - is_encoder_decoder=self.is_encoder_decoder, - label_pad_token_id=self.label_pad_token_id, - ) + all_logps, valid_length = get_batch_logps(logits=all_logits, labels=batch["labels"]) if self.loss_type in ["ipo", "orpo", "simpo"]: all_logps = all_logps / valid_length diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 1610ccfa..22a84e4a 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -9,7 +9,7 @@ from trl import KTOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_ref_context +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps, get_ref_context if TYPE_CHECKING: @@ -98,16 +98,6 @@ class CustomKTOTrainer(KTOTrainer): output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) - def sft_loss(self, chosen_logits: "torch.FloatTensor", chosen_labels: "torch.LongTensor") -> "torch.Tensor": - r""" - Computes supervised cross-entropy loss of given labels under the given logits. - - Returns: - A tensor of shape (batch_size,) containing the cross-entropy loss of each samples. - """ - all_logps = self.get_batch_logps(chosen_logits, chosen_labels, average_log_prob=True) - return -all_logps - def forward( self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"], prefix: Literal["", "kl_"] = "" ) -> Tuple["torch.Tensor", "torch.Tensor"]: @@ -127,28 +117,23 @@ class CustomKTOTrainer(KTOTrainer): logits = model(**model_inputs, return_dict=True, use_cache=False).logits.to(torch.float32) - logps = self.get_batch_logps( - logits=logits, - labels=batch["{}labels".format(prefix)], - average_log_prob=False, - is_encoder_decoder=self.is_encoder_decoder, - label_pad_token_id=self.label_pad_token_id, - ) - return logits, logps + logps, valid_length = get_batch_logps(logits=logits, labels=batch["{}labels".format(prefix)]) + return logps, logps / valid_length def concatenated_forward( self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"] - ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]: - target_logits, target_logps = self.forward(model, batch) + ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]: + target_logps, target_logps_avg = self.forward(model, batch) with torch.no_grad(): - _, kl_logps = self.forward(model, batch, prefix="kl_") + kl_logps, _ = self.forward(model, batch, prefix="kl_") if len(target_logps) != len(batch["kto_tags"]): raise ValueError("Mismatched shape of inputs and labels.") - chosen_logps, rejected_logps = target_logps[batch["kto_tags"]], target_logps[~batch["kto_tags"]] - chosen_logits, rejected_logits = target_logits[batch["kto_tags"]], target_logits[~batch["kto_tags"]] - return chosen_logps, rejected_logps, chosen_logits, rejected_logits, kl_logps + chosen_logps = target_logps[batch["kto_tags"]] + rejected_logps = target_logps[~batch["kto_tags"]] + chosen_logps_avg = target_logps_avg[batch["kto_tags"]] + return chosen_logps, rejected_logps, kl_logps, chosen_logps_avg def compute_reference_log_probs( self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"] @@ -164,13 +149,9 @@ class CustomKTOTrainer(KTOTrainer): ref_context = nullcontext() with torch.no_grad(), ref_context: - ( - reference_chosen_logps, - reference_rejected_logps, - _, - _, - reference_kl_logps, - ) = self.concatenated_forward(ref_model, batch) + reference_chosen_logps, reference_rejected_logps, reference_kl_logps, _ = self.concatenated_forward( + ref_model, batch + ) return reference_chosen_logps, reference_rejected_logps, reference_kl_logps @@ -183,14 +164,9 @@ class CustomKTOTrainer(KTOTrainer): Computes the DPO loss and other metrics for the given batch of inputs for train or test. """ metrics = {} - ( - policy_chosen_logps, - policy_rejected_logps, - policy_chosen_logits, - _, - policy_kl_logps, - ) = self.concatenated_forward(model, batch) - + policy_chosen_logps, policy_rejected_logps, policy_kl_logps, policy_chosen_logps_avg = ( + self.concatenated_forward(model, batch) + ) reference_chosen_logps, reference_rejected_logps, reference_kl_logps = self.compute_reference_log_probs( model, batch ) @@ -205,8 +181,8 @@ class CustomKTOTrainer(KTOTrainer): losses = losses.nanmean() if self.ftx_gamma > 1e-6 and len(policy_chosen_logps) > 0: # remember to rescale - sft_loss = self.sft_loss(policy_chosen_logits, batch["labels"][batch["kto_tags"]]) - losses += self.ftx_gamma * sft_loss.nanmean() / len(policy_chosen_logits) * len(batch["labels"]) + sft_loss = -policy_chosen_logps_avg + losses += self.ftx_gamma * sft_loss.nanmean() / len(policy_chosen_logps) * len(batch["labels"]) num_chosen = torch.Tensor([len(chosen_rewards)]).to(self.accelerator.device) num_rejected = torch.Tensor([len(rejected_rewards)]).to(self.accelerator.device) diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 2b33af1c..0ddcdb11 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import torch from transformers import Trainer @@ -7,6 +7,7 @@ from transformers.optimization import get_scheduler from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS from transformers.trainer_pt_utils import get_parameter_names +from ..extras.constants import IGNORE_INDEX from ..extras.logging import get_logger from ..extras.packages import is_galore_available from ..hparams import FinetuningArguments, ModelArguments @@ -399,3 +400,24 @@ def create_custom_scheduler( for param in optimizer_dict.keys(): param.register_post_accumulate_grad_hook(scheduler_hook) + + +def get_batch_logps( + logits: "torch.Tensor", labels: "torch.Tensor", label_pad_token_id: int = IGNORE_INDEX +) -> Tuple["torch.Tensor", "torch.Tensor"]: + r""" + Computes the log probabilities of the given labels under the given logits. + + Returns: + logps: A tensor of shape (batch_size,) containing the sum of log probabilities. + valid_length: A tensor of shape (batch_size,) containing the number of non-masked tokens. + """ + if logits.shape[:-1] != labels.shape: + raise ValueError("Logits (batchsize x seqlen) and labels must have the same shape.") + + labels = labels[:, 1:].clone() + logits = logits[:, :-1, :] + loss_mask = labels != label_pad_token_id + labels[labels == label_pad_token_id] = 0 # dummy token + per_token_logps = torch.gather(logits.log_softmax(-1), dim=2, index=labels.unsqueeze(2)).squeeze(2) + return (per_token_logps * loss_mask).sum(-1), loss_mask.sum(-1) From 7b7dc2f42390893f524f62f1e528b3390e905982 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 7 Jun 2024 04:48:29 +0800 Subject: [PATCH 096/162] fix ppo in trl 0.8.6 Former-commit-id: 5e0d66a0d80b4bd4a8506e2317209d8fb9d25ff6 --- README.md | 2 +- README_zh.md | 2 +- src/llamafactory/train/ppo/ppo_utils.py | 27 +++++++++++++++------ src/llamafactory/train/ppo/trainer.py | 31 ++++++++++++------------- 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 036ee648..77d9c709 100644 --- a/README.md +++ b/README.md @@ -298,7 +298,7 @@ huggingface-cli login | datasets | 2.16.0 | 2.19.2 | | accelerate | 0.30.1 | 0.30.1 | | peft | 0.11.1 | 0.11.1 | -| trl | 0.8.6 | 0.9.3 | +| trl | 0.8.6 | 0.9.4 | | Optional | Minimum | Recommend | | ------------ | ------- | --------- | diff --git a/README_zh.md b/README_zh.md index fb0d790e..9a52a963 100644 --- a/README_zh.md +++ b/README_zh.md @@ -298,7 +298,7 @@ huggingface-cli login | datasets | 2.16.0 | 2.19.2 | | accelerate | 0.30.1 | 0.30.1 | | peft | 0.11.1 | 0.11.1 | -| trl | 0.8.6 | 0.9.3 | +| trl | 0.8.6 | 0.9.4 | | 可选项 | 至少 | 推荐 | | ------------ | ------- | --------- | diff --git a/src/llamafactory/train/ppo/ppo_utils.py b/src/llamafactory/train/ppo/ppo_utils.py index 570409f2..cfda0b3d 100644 --- a/src/llamafactory/train/ppo/ppo_utils.py +++ b/src/llamafactory/train/ppo/ppo_utils.py @@ -1,7 +1,9 @@ import json +from contextlib import nullcontext from typing import TYPE_CHECKING, Dict, List, Literal, Optional import torch +from transformers.integrations import is_deepspeed_zero3_enabled from ...extras.packages import is_requests_available @@ -28,16 +30,27 @@ def get_rewards_from_server(server_url: str, messages: List[str]) -> List[torch. def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["default", "reward"]) -> None: r""" - Replaces the default/reward modules in the model. The model is already unwrapped (and gathered). + Replaces the default/reward modules in the model. The model is already unwrapped. """ - if target == "reward": # save default head temporarily - setattr(model, "default_head_weight", model.v_head.summary.weight.data.detach().clone()) - setattr(model, "default_head_bias", model.v_head.summary.bias.data.detach().clone()) + if is_deepspeed_zero3_enabled(): + import deepspeed # type: ignore + + params = [model.v_head.summary.weight, model.v_head.summary.bias] + context_maybe_zero3 = deepspeed.zero.GatheredParameters(params, modifier_rank=0) + else: + context_maybe_zero3 = nullcontext() model.pretrained_model.set_adapter(target) # set the LoRA adapter to be active - device = model.v_head.summary.weight.device - model.v_head.summary.weight.data = model.get_buffer("{}_head_weight".format(target)).detach().clone().to(device) - model.v_head.summary.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone().to(device) + with context_maybe_zero3: + if target == "reward": # save default head temporarily + setattr(model, "default_head_weight", model.v_head.summary.weight.data.detach().clone()) + setattr(model, "default_head_bias", model.v_head.summary.bias.data.detach().clone()) + + device = model.v_head.summary.weight.device + model.v_head.summary.weight.data = ( + model.get_buffer("{}_head_weight".format(target)).detach().clone().to(device) + ) + model.v_head.summary.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone().to(device) def dump_layernorm(model: "PreTrainedModel") -> Dict[str, torch.Tensor]: diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 7addfc3c..0ad1b8e8 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -309,12 +309,6 @@ class CustomPPOTrainer(PPOTrainer, Trainer): ) return lr_scheduler - def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: - super()._save(output_dir, state_dict) - if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir - getattr(self.processor, "image_processor").save_pretrained(output_dir) - @torch.no_grad() def get_inputs(self, batch: Dict[str, "torch.Tensor"]) -> Tuple[List["torch.Tensor"], List["torch.Tensor"]]: r""" @@ -326,6 +320,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): batch[k] = v[:, start_index:] with unwrap_model_for_generation(self.model, self.accelerator) as unwrapped_model: + unwrapped_model = self.accelerator.unwrap_model(self.model) # issue in trl v0.8.6 if self.model_args.upcast_layernorm: layernorm_params = dump_layernorm(unwrapped_model) @@ -369,19 +364,19 @@ class CustomPPOTrainer(PPOTrainer, Trainer): return get_rewards_from_server(self.reward_model, messages) batch = self.prepare_model_inputs(queries, responses) + unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model) - with unwrap_model_for_generation(self.model, self.accelerator) as unwrapped_model: - if self.finetuning_args.reward_model_type == "lora": - replace_model(unwrapped_model, target="reward") - reward_model = self.model - else: - reward_model = self.reward_model + if self.finetuning_args.reward_model_type == "lora": + replace_model(unwrapped_model, target="reward") + reward_model = self.model + else: + reward_model = self.reward_model - with self.amp_context: # support bf16 - _, _, values = reward_model(**batch, output_hidden_states=True, return_dict=True, use_cache=False) + with unwrap_model_for_generation(reward_model, self.accelerator), self.amp_context: # support bf16 + _, _, values = reward_model(**batch, output_hidden_states=True, return_dict=True, use_cache=False) - if self.finetuning_args.reward_model_type == "lora": - replace_model(unwrapped_model, target="default") + if self.finetuning_args.reward_model_type == "lora": + replace_model(unwrapped_model, target="default") if self.is_chatglm_model: # assume same architecture values = torch.transpose(values, 0, 1) @@ -482,3 +477,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): self._save(output_dir, state_dict={}) remove_dummy_checkpoint(True, output_dir, [WEIGHTS_NAME, SAFE_WEIGHTS_NAME]) self.model.save_checkpoint(output_dir) + + if self.processor is not None: + output_dir = output_dir if output_dir is not None else self.args.output_dir + getattr(self.processor, "image_processor").save_pretrained(output_dir) From 90f706c05bc4dfeb64229bcdb1cf4834c498e354 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 7 Jun 2024 05:14:19 +0800 Subject: [PATCH 097/162] fix ppo trainer save zero3 model accelerator.get_state_dict(ds_model) should be called at all ranks Former-commit-id: 3a0f60f0aa072531e4ae5819ec00c8fa42aa0913 --- src/llamafactory/train/ppo/trainer.py | 29 ++++++++++++++++++--------- src/llamafactory/train/sft/metric.py | 3 +++ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 0ad1b8e8..2e1288e4 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -123,9 +123,8 @@ class CustomPPOTrainer(PPOTrainer, Trainer): self.state = TrainerState() self.control = TrainerControl() - self.is_deepspeed_enabled = self.accelerator.distributed_type == "DEEPSPEED" and hasattr( - self.accelerator.state, "deepspeed_plugin" - ) + self.is_deepspeed_enabled = getattr(self.accelerator.state, "deepspeed_plugin", None) is not None + self.is_fsdp_enabled = getattr(self.accelerator.state, "fsdp_plugin", None) is not None self.log_callback, self.save_callback = callbacks[0], callbacks[1] assert isinstance(self.log_callback, LogCallback) and isinstance(self.save_callback, FixValueHeadModelCallback) @@ -466,18 +465,28 @@ class CustomPPOTrainer(PPOTrainer, Trainer): Subclass and override to inject custom behavior. """ - if self.args.should_save: + if output_dir is None: + output_dir = self.args.output_dir + + if self.is_fsdp_enabled or self.is_deepspeed_enabled: try: - self._save(output_dir, state_dict=self.accelerator.get_state_dict(self.model)) + state_dict = self.accelerator.get_state_dict(self.model) # must be called at all ranks + if self.args.should_save: + self._save(output_dir, state_dict=state_dict) except ValueError: logger.warning( " stage3_gather_16bit_weights_on_model_save=false. Saving the full checkpoint instead," " use zero_to_fp32.py to recover weights" ) - self._save(output_dir, state_dict={}) - remove_dummy_checkpoint(True, output_dir, [WEIGHTS_NAME, SAFE_WEIGHTS_NAME]) + if self.args.should_save: + self._save(output_dir, state_dict={}) + # remove the dummy state_dict + remove_dummy_checkpoint(self.args.should_save, output_dir, [WEIGHTS_NAME, SAFE_WEIGHTS_NAME]) self.model.save_checkpoint(output_dir) - if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir - getattr(self.processor, "image_processor").save_pretrained(output_dir) + elif self.args.should_save: + self._save(output_dir) + + if self.processor is not None and self.args.should_save: + output_dir = output_dir if output_dir is not None else self.args.output_dir + getattr(self.processor, "image_processor").save_pretrained(output_dir) diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py index d1af4c17..b135fcfb 100644 --- a/src/llamafactory/train/sft/metric.py +++ b/src/llamafactory/train/sft/metric.py @@ -10,12 +10,15 @@ from ...extras.packages import is_jieba_available, is_nltk_available, is_rouge_a if TYPE_CHECKING: from transformers.tokenization_utils import PreTrainedTokenizer + if is_jieba_available(): import jieba # type: ignore + if is_nltk_available(): from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu + if is_rouge_available(): from rouge_chinese import Rouge From 4785c723f84e992d396581d7bb40568fb14604f5 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 7 Jun 2024 05:19:21 +0800 Subject: [PATCH 098/162] tiny fix Former-commit-id: 0621bcad1dfbe8ce2464f741d4256c5df2a8d1b6 --- src/llamafactory/train/ppo/ppo_utils.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/llamafactory/train/ppo/ppo_utils.py b/src/llamafactory/train/ppo/ppo_utils.py index cfda0b3d..fec3fc1e 100644 --- a/src/llamafactory/train/ppo/ppo_utils.py +++ b/src/llamafactory/train/ppo/ppo_utils.py @@ -32,10 +32,11 @@ def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["d r""" Replaces the default/reward modules in the model. The model is already unwrapped. """ + v_head_layer = model.v_head.summary if is_deepspeed_zero3_enabled(): import deepspeed # type: ignore - params = [model.v_head.summary.weight, model.v_head.summary.bias] + params = [v_head_layer.weight, v_head_layer.bias] context_maybe_zero3 = deepspeed.zero.GatheredParameters(params, modifier_rank=0) else: context_maybe_zero3 = nullcontext() @@ -43,14 +44,12 @@ def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["d model.pretrained_model.set_adapter(target) # set the LoRA adapter to be active with context_maybe_zero3: if target == "reward": # save default head temporarily - setattr(model, "default_head_weight", model.v_head.summary.weight.data.detach().clone()) - setattr(model, "default_head_bias", model.v_head.summary.bias.data.detach().clone()) + setattr(model, "default_head_weight", v_head_layer.weight.data.detach().clone()) + setattr(model, "default_head_bias", v_head_layer.bias.data.detach().clone()) - device = model.v_head.summary.weight.device - model.v_head.summary.weight.data = ( - model.get_buffer("{}_head_weight".format(target)).detach().clone().to(device) - ) - model.v_head.summary.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone().to(device) + device = v_head_layer.weight.device + v_head_layer.weight.data = model.get_buffer("{}_head_weight".format(target)).detach().clone().to(device) + v_head_layer.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone().to(device) def dump_layernorm(model: "PreTrainedModel") -> Dict[str, torch.Tensor]: From 4377ad4391754fbdbf6d8fc95bd677a184a53ad1 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 7 Jun 2024 19:16:06 +0800 Subject: [PATCH 099/162] fix #4137 Former-commit-id: cdc0d6f5a2e5040e145c82c4801f37bd76529047 --- src/llamafactory/cli.py | 8 ++------ src/llamafactory/webui/runner.py | 3 +++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index b9e734e4..5042e53c 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -72,12 +72,8 @@ def main(): elif command == Command.EXPORT: export_model() elif command == Command.TRAIN: - disable_torchrun = os.environ.get("TORCHRUN_DISABLED", "0").lower() in ["true", "1"] - if disable_torchrun and get_device_count() > 1: - logger.warning("`torchrun` cannot be disabled when device count > 1.") - disable_torchrun = False - - if (not disable_torchrun) and (get_device_count() > 0): + force_torchrun = os.environ.get("FORCE_TORCHRUN", "0").lower() in ["true", "1"] + if force_torchrun or get_device_count() > 1: master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999))) logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port)) diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index e8fdd129..c046152c 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -278,6 +278,9 @@ class Runner: args = self._parse_train_args(data) if do_train else self._parse_eval_args(data) env = deepcopy(os.environ) env["LLAMABOARD_ENABLED"] = "1" + if args.get("deepspeed", None) is not None: + env["FORCE_TORCHRUN"] = "1" + self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True) yield from self.monitor() From 5606780ab69d4a0d9c58cab501a7599ef7e4ad27 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 00:22:16 +0800 Subject: [PATCH 100/162] add resume args in webui Former-commit-id: 1d86ad768b1f36e54b4c2a9f18f6ea5a7df04c90 --- src/llamafactory/extras/constants.py | 6 ++- src/llamafactory/model/adapter.py | 4 +- src/llamafactory/webui/common.py | 17 ++++---- src/llamafactory/webui/components/top.py | 3 +- src/llamafactory/webui/components/train.py | 10 ++++- src/llamafactory/webui/runner.py | 48 ++++++++++++++++------ src/llamafactory/webui/utils.py | 29 ++++--------- 7 files changed, 68 insertions(+), 49 deletions(-) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 4099fe56..7d96fb5f 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -35,6 +35,8 @@ IGNORE_INDEX = -100 LAYERNORM_NAMES = {"norm", "ln"} +LLAMABOARD_CONFIG = "llamaboard_config.yaml" + METHODS = ["full", "freeze", "lora"] MOD_SUPPORTED_MODELS = {"bloom", "falcon", "gemma", "llama", "mistral", "mixtral", "phi", "starcoder2"} @@ -47,10 +49,10 @@ SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"] SUPPORTED_MODELS = OrderedDict() -TRAINER_CONFIG = "trainer_config.yaml" - TRAINER_LOG = "trainer_log.jsonl" +TRAINING_ARGS = "training_args.yaml" + TRAINING_STAGES = { "Supervised Fine-Tuning": "sft", "Reward Modeling": "rm", diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index 1a77d613..d17873f7 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -50,7 +50,7 @@ def init_adapter( logger.info("Upcasting trainable params to float32.") cast_trainable_params_to_fp32 = True - if finetuning_args.finetuning_type == "full" and is_trainable: + if is_trainable and finetuning_args.finetuning_type == "full": logger.info("Fine-tuning method: Full") forbidden_modules = set() @@ -67,7 +67,7 @@ def init_adapter( else: param.requires_grad_(False) - if finetuning_args.finetuning_type == "freeze" and is_trainable: + if is_trainable and finetuning_args.finetuning_type == "freeze": logger.info("Fine-tuning method: Freeze") if model_args.visual_inputs: diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index 304b56a5..37b38df0 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -50,13 +50,6 @@ def get_config_path() -> os.PathLike: return os.path.join(DEFAULT_CACHE_DIR, USER_CONFIG) -def get_arg_save_path(config_path: str) -> os.PathLike: - r""" - Gets the path to saved arguments. - """ - return os.path.join(DEFAULT_CONFIG_DIR, config_path) - - def load_config() -> Dict[str, Any]: r""" Loads user config if exists. @@ -77,24 +70,28 @@ def save_config(lang: str, model_name: Optional[str] = None, model_path: Optiona user_config["lang"] = lang or user_config["lang"] if model_name: user_config["last_model"] = model_name + + if model_name and model_path: user_config["path_dict"][model_name] = model_path + with open(get_config_path(), "w", encoding="utf-8") as f: safe_dump(user_config, f) -def get_model_path(model_name: str) -> Optional[str]: +def get_model_path(model_name: str) -> str: r""" Gets the model path according to the model name. """ user_config = load_config() - path_dict: Dict[DownloadSource, str] = SUPPORTED_MODELS.get(model_name, defaultdict(str)) - model_path = user_config["path_dict"].get(model_name, None) or path_dict.get(DownloadSource.DEFAULT, None) + path_dict: Dict["DownloadSource", str] = SUPPORTED_MODELS.get(model_name, defaultdict(str)) + model_path = user_config["path_dict"].get(model_name, "") or path_dict.get(DownloadSource.DEFAULT, "") if ( use_modelscope() and path_dict.get(DownloadSource.MODELSCOPE) and model_path == path_dict.get(DownloadSource.DEFAULT) ): # replace path model_path = path_dict.get(DownloadSource.MODELSCOPE) + return model_path diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index c794d0aa..fd0ead3d 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -36,7 +36,8 @@ def create_top() -> Dict[str, "Component"]: visual_inputs = gr.Checkbox(scale=1) model_name.change(get_model_info, [model_name], [model_path, template, visual_inputs], queue=False) - model_path.change(save_config, inputs=[lang, model_name, model_path], queue=False) + model_name.input(save_config, inputs=[lang, model_name], queue=False) + model_path.input(save_config, inputs=[lang, model_name, model_path], queue=False) finetuning_type.change(can_quantize, [finetuning_type], [quantization_bit], queue=False) checkpoint_path.focus(list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False) diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 74f8ef2a..72dfc858 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -6,7 +6,7 @@ from ...extras.constants import TRAINING_STAGES from ...extras.misc import get_device_count from ...extras.packages import is_gradio_available from ..common import DEFAULT_DATA_DIR, list_checkpoints, list_datasets -from ..utils import change_stage, check_output_dir, list_config_paths, list_output_dirs +from ..utils import change_stage, list_config_paths, list_output_dirs from .data import create_preview_box @@ -319,7 +319,13 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: finetuning_type.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False) output_dir.change( list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], concurrency_limit=None - ).then(check_output_dir, inputs=[lang, model_name, finetuning_type, output_dir], concurrency_limit=None) + ) + output_dir.input( + engine.runner.check_output_dir, + [lang, model_name, finetuning_type, output_dir], + list(input_elems) + [output_box], + concurrency_limit=None, + ) config_path.change(list_config_paths, [current_time], [config_path], queue=False) return elem_dict diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index c046152c..35014628 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -5,11 +5,11 @@ from typing import TYPE_CHECKING, Any, Dict, Generator, Optional from transformers.trainer import TRAINING_ARGS_NAME -from ..extras.constants import PEFT_METHODS, TRAINING_STAGES +from ..extras.constants import LLAMABOARD_CONFIG, PEFT_METHODS, TRAINING_STAGES from ..extras.misc import is_gpu_or_npu_available, torch_gc from ..extras.packages import is_gradio_available -from .common import DEFAULT_CACHE_DIR, get_save_dir, load_config -from .locales import ALERTS +from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_save_dir, load_config +from .locales import ALERTS, LOCALES from .utils import abort_leaf_process, gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd @@ -276,6 +276,10 @@ class Runner: else: self.do_train, self.running_data = do_train, data args = self._parse_train_args(data) if do_train else self._parse_eval_args(data) + + os.makedirs(args["output_dir"], exist_ok=True) + save_args(os.path.join(args["output_dir"], LLAMABOARD_CONFIG), self._form_config_dict(data)) + env = deepcopy(os.environ) env["LLAMABOARD_ENABLED"] = "1" if args.get("deepspeed", None) is not None: @@ -284,6 +288,16 @@ class Runner: self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True) yield from self.monitor() + def _form_config_dict(self, data: Dict["Component", Any]) -> Dict[str, Any]: + config_dict = {} + skip_ids = ["top.lang", "top.model_path", "train.output_dir", "train.config_path", "train.device_count"] + for elem, value in data.items(): + elem_id = self.manager.get_id_by_elem(elem) + if elem_id not in skip_ids: + config_dict[elem_id] = value + + return config_dict + def preview_train(self, data): yield from self._preview(data, do_train=True) @@ -349,28 +363,24 @@ class Runner: } yield return_dict - def save_args(self, data: dict): + def save_args(self, data): output_box = self.manager.get_elem_by_id("train.output_box") error = self._initialize(data, do_train=True, from_preview=True) if error: gr.Warning(error) return {output_box: error} - config_dict: Dict[str, Any] = {} lang = data[self.manager.get_elem_by_id("top.lang")] config_path = data[self.manager.get_elem_by_id("train.config_path")] - skip_ids = ["top.lang", "top.model_path", "train.output_dir", "train.config_path", "train.device_count"] - for elem, value in data.items(): - elem_id = self.manager.get_id_by_elem(elem) - if elem_id not in skip_ids: - config_dict[elem_id] = value + os.makedirs(DEFAULT_CONFIG_DIR, exist_ok=True) + save_path = os.path.join(DEFAULT_CONFIG_DIR, config_path) - save_path = save_args(config_path, config_dict) + save_args(save_path, self._form_config_dict(data)) return {output_box: ALERTS["info_config_saved"][lang] + save_path} def load_args(self, lang: str, config_path: str): output_box = self.manager.get_elem_by_id("train.output_box") - config_dict = load_args(config_path) + config_dict = load_args(os.path.join(DEFAULT_CONFIG_DIR, config_path)) if config_dict is None: gr.Warning(ALERTS["err_config_not_found"][lang]) return {output_box: ALERTS["err_config_not_found"][lang]} @@ -380,3 +390,17 @@ class Runner: output_dict[self.manager.get_elem_by_id(elem_id)] = value return output_dict + + def check_output_dir(self, lang: str, model_name: str, finetuning_type: str, output_dir: str): + output_box = self.manager.get_elem_by_id("train.output_box") + output_dict: Dict["Component", Any] = {output_box: LOCALES["output_box"][lang]["value"]} + if model_name and output_dir and os.path.isdir(get_save_dir(model_name, finetuning_type, output_dir)): + gr.Warning(ALERTS["warn_output_dir_exists"][lang]) + output_dict[output_box] = ALERTS["warn_output_dir_exists"][lang] + + output_dir = get_save_dir(model_name, finetuning_type, output_dir) + config_dict = load_args(os.path.join(output_dir, LLAMABOARD_CONFIG)) # load llamaboard config + for elem_id, value in config_dict.items(): + output_dict[self.manager.get_elem_by_id(elem_id)] = value + + return output_dict diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index 23e62dca..e39f2aa4 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -8,10 +8,10 @@ import psutil from transformers.trainer_utils import get_last_checkpoint from yaml import safe_dump, safe_load -from ..extras.constants import PEFT_METHODS, RUNNING_LOG, TRAINER_CONFIG, TRAINER_LOG, TRAINING_STAGES +from ..extras.constants import PEFT_METHODS, RUNNING_LOG, TRAINER_LOG, TRAINING_ARGS, TRAINING_STAGES from ..extras.packages import is_gradio_available, is_matplotlib_available from ..extras.ploting import gen_loss_plot -from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_arg_save_path, get_save_dir +from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_save_dir from .locales import ALERTS @@ -93,10 +93,10 @@ def save_cmd(args: Dict[str, Any]) -> str: output_dir = args["output_dir"] os.makedirs(output_dir, exist_ok=True) - with open(os.path.join(output_dir, TRAINER_CONFIG), "w", encoding="utf-8") as f: + with open(os.path.join(output_dir, TRAINING_ARGS), "w", encoding="utf-8") as f: safe_dump(clean_cmd(args), f) - return os.path.join(output_dir, TRAINER_CONFIG) + return os.path.join(output_dir, TRAINING_ARGS) def get_eval_results(path: os.PathLike) -> str: @@ -157,22 +157,19 @@ def load_args(config_path: str) -> Optional[Dict[str, Any]]: Loads saved arguments. """ try: - with open(get_arg_save_path(config_path), "r", encoding="utf-8") as f: + with open(config_path, "r", encoding="utf-8") as f: return safe_load(f) except Exception: return None -def save_args(config_path: str, config_dict: Dict[str, Any]) -> str: +def save_args(config_path: str, config_dict: Dict[str, Any]): r""" Saves arguments. """ - os.makedirs(DEFAULT_CONFIG_DIR, exist_ok=True) - with open(get_arg_save_path(config_path), "w", encoding="utf-8") as f: + with open(config_path, "w", encoding="utf-8") as f: safe_dump(config_dict, f) - return str(get_arg_save_path(config_path)) - def list_config_paths(current_time: str) -> "gr.Dropdown": r""" @@ -181,13 +178,13 @@ def list_config_paths(current_time: str) -> "gr.Dropdown": config_files = ["{}.yaml".format(current_time)] if os.path.isdir(DEFAULT_CONFIG_DIR): for file_name in os.listdir(DEFAULT_CONFIG_DIR): - if file_name.endswith(".yaml"): + if file_name.endswith(".yaml") and file_name not in config_files: config_files.append(file_name) return gr.Dropdown(choices=config_files) -def list_output_dirs(model_name: str, finetuning_type: str, current_time: str) -> "gr.Dropdown": +def list_output_dirs(model_name: Optional[str], finetuning_type: str, current_time: str) -> "gr.Dropdown": r""" Lists all the directories that can resume from. """ @@ -203,14 +200,6 @@ def list_output_dirs(model_name: str, finetuning_type: str, current_time: str) - return gr.Dropdown(choices=output_dirs) -def check_output_dir(lang: str, model_name: str, finetuning_type: str, output_dir: str) -> None: - r""" - Check if output dir exists. - """ - if model_name and output_dir and os.path.isdir(get_save_dir(model_name, finetuning_type, output_dir)): - gr.Warning(ALERTS["warn_output_dir_exists"][lang]) - - def create_ds_config() -> None: r""" Creates deepspeed config. From 17c66e9502d5e4e63c4859be395134075bb24bde Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 8 Jun 2024 00:45:02 +0800 Subject: [PATCH 101/162] fix #4139 Former-commit-id: c025a4d74f293c14c2705e68af20a82a84608520 --- src/llamafactory/model/adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index d17873f7..bd14a52f 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -239,7 +239,7 @@ def init_adapter( ) model = get_peft_model(model, lora_config) - if cast_trainable_params_to_fp32: + if is_trainable and cast_trainable_params_to_fp32: for param in filter(lambda p: p.requires_grad, model.parameters()): param.data = param.data.to(torch.float32) From 35a36d96e546e72a8ea99f76436dd3366321744c Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 00:47:23 +0800 Subject: [PATCH 102/162] reorganize adapter code Former-commit-id: b26c2df9d97f4efffccbf7d28de13619b43f10dd --- src/llamafactory/hparams/model_args.py | 9 +- src/llamafactory/model/adapter.py | 408 +++++++++++++------------ 2 files changed, 224 insertions(+), 193 deletions(-) diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 20271173..6352a420 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -15,7 +15,12 @@ class ModelArguments: ) adapter_name_or_path: Optional[str] = field( default=None, - metadata={"help": "Path to the adapter weight or identifier from huggingface.co/models."}, + metadata={ + "help": ( + "Path to the adapter weight or identifier from huggingface.co/models. " + "Use commas to separate multiple adapters." + ) + }, ) cache_dir: Optional[str] = field( default=None, @@ -35,7 +40,7 @@ class ModelArguments: ) new_special_tokens: Optional[str] = field( default=None, - metadata={"help": "Special tokens to be added into the tokenizer."}, + metadata={"help": "Special tokens to be added into the tokenizer. Use commas to separate multiple tokens."}, ) model_revision: str = field( default="main", diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index bd14a52f..f4e501a7 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -21,6 +21,218 @@ if TYPE_CHECKING: logger = get_logger(__name__) +def _setup_full_tuning( + model: "PreTrainedModel", + model_args: "ModelArguments", + finetuning_args: "FinetuningArguments", + cast_trainable_params_to_fp32: bool, +) -> None: + logger.info("Fine-tuning method: Full") + forbidden_modules = set() + if model_args.visual_inputs and finetuning_args.freeze_vision_tower: + forbidden_modules.add("vision_tower") + + if model_args.visual_inputs and finetuning_args.train_mm_proj_only: + forbidden_modules.add("language_model") + + for name, param in model.named_parameters(): + if not any(forbidden_module in name for forbidden_module in forbidden_modules): + if cast_trainable_params_to_fp32: + param.data = param.data.to(torch.float32) + else: + param.requires_grad_(False) + + +def _setup_freeze_tuning( + model: "PreTrainedModel", + model_args: "ModelArguments", + finetuning_args: "FinetuningArguments", + cast_trainable_params_to_fp32: bool, +) -> None: + logger.info("Fine-tuning method: Freeze") + if model_args.visual_inputs: + config = model.config.text_config + else: + config = model.config + + num_layers = ( + getattr(config, "num_hidden_layers", None) + or getattr(config, "num_layers", None) + or getattr(config, "n_layer", None) + ) + if not num_layers: + raise ValueError("Current model does not support freeze tuning.") + + if finetuning_args.use_llama_pro: + if num_layers % finetuning_args.freeze_trainable_layers != 0: + raise ValueError( + "`num_layers` {} should be divisible by `num_layer_trainable` {}.".format( + num_layers, finetuning_args.freeze_trainable_layers + ) + ) + + stride = num_layers // finetuning_args.freeze_trainable_layers + trainable_layer_ids = range(stride - 1, num_layers + stride - 1, stride) + elif finetuning_args.freeze_trainable_layers > 0: # fine-tuning the last n layers if num_layer_trainable > 0 + trainable_layer_ids = range(max(0, num_layers - finetuning_args.freeze_trainable_layers), num_layers) + else: # fine-tuning the first n layers if num_layer_trainable < 0 + trainable_layer_ids = range(min(-finetuning_args.freeze_trainable_layers, num_layers)) + + hidden_modules = set() + non_hidden_modules = set() + for name, _ in model.named_parameters(): + if ".0." in name: + hidden_modules.add(name.split(".0.")[-1].split(".")[0]) + elif ".1." in name: # MoD starts from layer 1 + hidden_modules.add(name.split(".1.")[-1].split(".")[0]) + + if re.search(r"\.\d+\.", name) is None: + non_hidden_modules.add(name.split(".")[-2]) + + trainable_layers = [] + for module_name in finetuning_args.freeze_trainable_modules: + if module_name != "all" and module_name not in hidden_modules: + raise ValueError( + "Module {} is not found, please choose from {}".format(module_name, ", ".join(hidden_modules)) + ) + + for idx in trainable_layer_ids: + trainable_layers.append(".{:d}.{}".format(idx, module_name if module_name != "all" else "")) + + if finetuning_args.freeze_extra_modules: + for module_name in finetuning_args.freeze_extra_modules: + if module_name not in non_hidden_modules: + raise ValueError( + "Module {} is not found, please choose from {}".format(module_name, ", ".join(non_hidden_modules)) + ) + + trainable_layers.append(module_name) + + forbidden_modules = set() + if model_args.visual_inputs and finetuning_args.freeze_vision_tower: + forbidden_modules.add("vision_tower") + + for name, param in model.named_parameters(): + if any(trainable_layer in name for trainable_layer in trainable_layers) and not any( + forbidden_module in name for forbidden_module in forbidden_modules + ): + if cast_trainable_params_to_fp32: + param.data = param.data.to(torch.float32) + else: + param.requires_grad_(False) + + logger.info("Set trainable layers: {}".format(",".join(trainable_layers))) + + +def _setup_lora_tuning( + config: "PretrainedConfig", + model: "PreTrainedModel", + model_args: "ModelArguments", + finetuning_args: "FinetuningArguments", + is_trainable: bool, + cast_trainable_params_to_fp32: bool, +) -> "PeftModel": + logger.info("Fine-tuning method: {}".format("DoRA" if finetuning_args.use_dora else "LoRA")) + adapter_to_resume = None + + if model_args.adapter_name_or_path is not None: + is_mergeable = True + if getattr(model, "quantization_method", None): # merge lora in quantized model is unstable + assert len(model_args.adapter_name_or_path) == 1, "Quantized model only accepts a single adapter." + is_mergeable = False + + if is_deepspeed_zero3_enabled(): + assert len(model_args.adapter_name_or_path) == 1, "Cannot use multiple adapters in DeepSpeed ZeRO-3." + is_mergeable = False + + if model_args.use_unsloth: + assert len(model_args.adapter_name_or_path) == 1, "Unsloth model only accepts a single adapter." + is_mergeable = False + + if (is_trainable and not finetuning_args.create_new_adapter) or (not is_mergeable): + adapter_to_merge = model_args.adapter_name_or_path[:-1] + adapter_to_resume = model_args.adapter_name_or_path[-1] + else: + adapter_to_merge = model_args.adapter_name_or_path + + for adapter in adapter_to_merge: + model: "LoraModel" = PeftModel.from_pretrained(model, adapter, offload_folder=model_args.offload_folder) + model = model.merge_and_unload() + + if len(adapter_to_merge) > 0: + logger.info("Merged {} adapter(s).".format(len(adapter_to_merge))) + + if adapter_to_resume is not None: # resume lora training + if model_args.use_unsloth: + model = load_unsloth_peft_model(config, model_args, is_trainable=is_trainable) + else: + model = PeftModel.from_pretrained( + model, + adapter_to_resume, + is_trainable=is_trainable, + offload_folder=model_args.offload_folder, + ) + + if is_trainable and adapter_to_resume is None: # create new lora weights while training + if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all": + target_modules = find_all_linear_modules(model, finetuning_args.freeze_vision_tower) + else: + target_modules = finetuning_args.lora_target + + if finetuning_args.use_llama_pro: + target_modules = find_expanded_modules(model, target_modules, finetuning_args.freeze_trainable_layers) + + if model_args.visual_inputs and finetuning_args.freeze_vision_tower: + target_modules = "^(?!.*vision_tower).*(?:{}).*".format("|".join(target_modules)) + + if ( + finetuning_args.use_dora + and getattr(model, "quantization_method", None) is not None + and getattr(model, "quantization_method", None) != QuantizationMethod.BITS_AND_BYTES + ): + raise ValueError("DoRA is not compatible with PTQ-quantized models.") + + if model_args.resize_vocab and finetuning_args.additional_target is None: + input_embeddings = model.get_input_embeddings() + output_embeddings = model.get_output_embeddings() + module_names = set() + for name, module in model.named_modules(): + if module in [input_embeddings, output_embeddings]: + module_names.add(name.split(".")[-1]) + + finetuning_args.additional_target = module_names + logger.warning("Vocab has been resized, add {} to trainable params.".format(",".join(module_names))) + + peft_kwargs = { + "r": finetuning_args.lora_rank, + "target_modules": target_modules, + "lora_alpha": finetuning_args.lora_alpha, + "lora_dropout": finetuning_args.lora_dropout, + "use_rslora": finetuning_args.use_rslora, + "modules_to_save": finetuning_args.additional_target, + } + + if model_args.use_unsloth: + model = get_unsloth_peft_model(model, model_args, peft_kwargs) + else: + lora_config = LoraConfig( + task_type=TaskType.CAUSAL_LM, + inference_mode=False, + use_dora=finetuning_args.use_dora, + **peft_kwargs, + ) + model = get_peft_model(model, lora_config) + + if is_trainable and cast_trainable_params_to_fp32: + for param in filter(lambda p: p.requires_grad, model.parameters()): + param.data = param.data.to(torch.float32) + + if model_args.adapter_name_or_path is not None: + logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path))) + + return model + + def init_adapter( config: "PretrainedConfig", model: "PreTrainedModel", @@ -35,7 +247,6 @@ def init_adapter( Note that the trainable parameters must be cast to float32. """ - if (not is_trainable) and model_args.adapter_name_or_path is None: logger.info("Adapter is not found at evaluation, load the base model.") return model @@ -51,199 +262,14 @@ def init_adapter( cast_trainable_params_to_fp32 = True if is_trainable and finetuning_args.finetuning_type == "full": - logger.info("Fine-tuning method: Full") - - forbidden_modules = set() - if model_args.visual_inputs and finetuning_args.freeze_vision_tower: - forbidden_modules.add("vision_tower") - - if model_args.visual_inputs and finetuning_args.train_mm_proj_only: - forbidden_modules.add("language_model") - - for name, param in model.named_parameters(): - if not any(forbidden_module in name for forbidden_module in forbidden_modules): - if cast_trainable_params_to_fp32: - param.data = param.data.to(torch.float32) - else: - param.requires_grad_(False) + _setup_full_tuning(model, model_args, finetuning_args, cast_trainable_params_to_fp32) if is_trainable and finetuning_args.finetuning_type == "freeze": - logger.info("Fine-tuning method: Freeze") - - if model_args.visual_inputs: - config = model.config.text_config - else: - config = model.config - - num_layers = ( - getattr(config, "num_hidden_layers", None) - or getattr(config, "num_layers", None) - or getattr(config, "n_layer", None) - ) - if not num_layers: - raise ValueError("Current model does not support freeze tuning.") - - if finetuning_args.use_llama_pro: - if num_layers % finetuning_args.freeze_trainable_layers != 0: - raise ValueError( - "`num_layers` {} should be divisible by `num_layer_trainable` {}.".format( - num_layers, finetuning_args.freeze_trainable_layers - ) - ) - - stride = num_layers // finetuning_args.freeze_trainable_layers - trainable_layer_ids = range(stride - 1, num_layers + stride - 1, stride) - elif finetuning_args.freeze_trainable_layers > 0: # fine-tuning the last n layers if num_layer_trainable > 0 - trainable_layer_ids = range(max(0, num_layers - finetuning_args.freeze_trainable_layers), num_layers) - else: # fine-tuning the first n layers if num_layer_trainable < 0 - trainable_layer_ids = range(min(-finetuning_args.freeze_trainable_layers, num_layers)) - - hidden_modules = set() - non_hidden_modules = set() - for name, _ in model.named_parameters(): - if ".0." in name: - hidden_modules.add(name.split(".0.")[-1].split(".")[0]) - elif ".1." in name: # MoD starts from layer 1 - hidden_modules.add(name.split(".1.")[-1].split(".")[0]) - - if re.search(r"\.\d+\.", name) is None: - non_hidden_modules.add(name.split(".")[-2]) - - trainable_layers = [] - for module_name in finetuning_args.freeze_trainable_modules: - if module_name != "all" and module_name not in hidden_modules: - raise ValueError( - "Module {} is not found, please choose from {}".format(module_name, ", ".join(hidden_modules)) - ) - - for idx in trainable_layer_ids: - trainable_layers.append(".{:d}.{}".format(idx, module_name if module_name != "all" else "")) - - if finetuning_args.freeze_extra_modules: - for module_name in finetuning_args.freeze_extra_modules: - if module_name not in non_hidden_modules: - raise ValueError( - "Module {} is not found, please choose from {}".format( - module_name, ", ".join(non_hidden_modules) - ) - ) - - trainable_layers.append(module_name) - - forbidden_modules = set() - if model_args.visual_inputs and finetuning_args.freeze_vision_tower: - forbidden_modules.add("vision_tower") - - for name, param in model.named_parameters(): - if any(trainable_layer in name for trainable_layer in trainable_layers) and not any( - forbidden_module in name for forbidden_module in forbidden_modules - ): - if cast_trainable_params_to_fp32: - param.data = param.data.to(torch.float32) - else: - param.requires_grad_(False) - - logger.info("Set trainable layers: {}".format(",".join(map(str, trainable_layer_ids)))) + _setup_freeze_tuning(model, model_args, finetuning_args, cast_trainable_params_to_fp32) if finetuning_args.finetuning_type == "lora": - logger.info("Fine-tuning method: {}".format("DoRA" if finetuning_args.use_dora else "LoRA")) - adapter_to_resume = None - - if model_args.adapter_name_or_path is not None: - is_mergeable = True - if getattr(model, "quantization_method", None): # merge lora in quantized model is unstable - assert len(model_args.adapter_name_or_path) == 1, "Quantized model only accepts a single adapter." - is_mergeable = False - - if is_deepspeed_zero3_enabled(): - assert len(model_args.adapter_name_or_path) == 1, "Cannot use multiple adapters in DeepSpeed ZeRO-3." - is_mergeable = False - - if model_args.use_unsloth: - assert len(model_args.adapter_name_or_path) == 1, "Unsloth model only accepts a single adapter." - is_mergeable = False - - if (is_trainable and not finetuning_args.create_new_adapter) or (not is_mergeable): - adapter_to_merge = model_args.adapter_name_or_path[:-1] - adapter_to_resume = model_args.adapter_name_or_path[-1] - else: - adapter_to_merge = model_args.adapter_name_or_path - - for adapter in adapter_to_merge: - model: "LoraModel" = PeftModel.from_pretrained( - model, adapter, offload_folder=model_args.offload_folder - ) - model = model.merge_and_unload() - - if len(adapter_to_merge) > 0: - logger.info("Merged {} adapter(s).".format(len(adapter_to_merge))) - - if adapter_to_resume is not None: # resume lora training - if model_args.use_unsloth: - model = load_unsloth_peft_model(config, model_args, is_trainable=is_trainable) - else: - model = PeftModel.from_pretrained( - model, - adapter_to_resume, - is_trainable=is_trainable, - offload_folder=model_args.offload_folder, - ) - - if is_trainable and adapter_to_resume is None: # create new lora weights while training - if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all": - target_modules = find_all_linear_modules(model, finetuning_args.freeze_vision_tower) - else: - target_modules = finetuning_args.lora_target - - if finetuning_args.use_llama_pro: - target_modules = find_expanded_modules(model, target_modules, finetuning_args.freeze_trainable_layers) - - if model_args.visual_inputs and finetuning_args.freeze_vision_tower: - target_modules = "^(?!.*vision_tower).*(?:{}).*".format("|".join(target_modules)) - - if ( - finetuning_args.use_dora - and getattr(model, "quantization_method", None) is not None - and getattr(model, "quantization_method", None) != QuantizationMethod.BITS_AND_BYTES - ): - raise ValueError("DoRA is not compatible with PTQ-quantized models.") - - if model_args.resize_vocab and finetuning_args.additional_target is None: - input_embeddings = model.get_input_embeddings() - output_embeddings = model.get_output_embeddings() - module_names = set() - for name, module in model.named_modules(): - if module in [input_embeddings, output_embeddings]: - module_names.add(name.split(".")[-1]) - - finetuning_args.additional_target = module_names - logger.warning("Vocab has been resized, add {} to trainable params.".format(",".join(module_names))) - - peft_kwargs = { - "r": finetuning_args.lora_rank, - "target_modules": target_modules, - "lora_alpha": finetuning_args.lora_alpha, - "lora_dropout": finetuning_args.lora_dropout, - "use_rslora": finetuning_args.use_rslora, - "modules_to_save": finetuning_args.additional_target, - } - - if model_args.use_unsloth: - model = get_unsloth_peft_model(model, model_args, peft_kwargs) - else: - lora_config = LoraConfig( - task_type=TaskType.CAUSAL_LM, - inference_mode=False, - use_dora=finetuning_args.use_dora, - **peft_kwargs, - ) - model = get_peft_model(model, lora_config) - - if is_trainable and cast_trainable_params_to_fp32: - for param in filter(lambda p: p.requires_grad, model.parameters()): - param.data = param.data.to(torch.float32) - - if model_args.adapter_name_or_path is not None: - logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path))) + model = _setup_lora_tuning( + config, model, model_args, finetuning_args, is_trainable, cast_trainable_params_to_fp32 + ) return model From 9ba1f5958cfbccbc2ecd2421710c3266f961e223 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 00:58:10 +0800 Subject: [PATCH 103/162] Delete .readthedocs.yaml Former-commit-id: dd3ee514216a9a329519c58d79208040adcad126 --- .readthedocs.yaml | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index 3a9eaea1..00000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -version: 2 - -build: - os: ubuntu-22.04 - tools: - python: "3.8" - -sphinx: - configuration: docs/source/conf.py - -formats: - - pdf - -python: - install: - - requirements: docs/requirements-docs.txt From cf2afe5db0f1d30d0a6376a86ed918d4751f1ec7 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 01:35:58 +0800 Subject: [PATCH 104/162] init unittest Former-commit-id: 1c6f21cb8878ced043fe0b27c72cad2ef6ee990e --- README.md | 1 - README_zh.md | 1 - docker-compose.yml | 2 -- {tests => scripts}/test_toolcall.py | 2 +- tests/model/test_attn.py | 35 +++++++++++++++++++++++++++++ tests/test_throughput.py | 30 ------------------------- 6 files changed, 36 insertions(+), 35 deletions(-) rename {tests => scripts}/test_toolcall.py (97%) create mode 100644 tests/model/test_attn.py delete mode 100644 tests/test_throughput.py diff --git a/README.md b/README.md index 77d9c709..44897420 100644 --- a/README.md +++ b/README.md @@ -430,7 +430,6 @@ docker run --gpus=all \ -v ./hf_cache:/root/.cache/huggingface/ \ -v ./data:/app/data \ -v ./output:/app/output \ - -e CUDA_VISIBLE_DEVICES=0 \ -p 7860:7860 \ --shm-size 16G \ --name llama_factory \ diff --git a/README_zh.md b/README_zh.md index 9a52a963..8321d202 100644 --- a/README_zh.md +++ b/README_zh.md @@ -428,7 +428,6 @@ docker run --gpus=all \ -v ./hf_cache:/root/.cache/huggingface/ \ -v ./data:/app/data \ -v ./output:/app/output \ - -e CUDA_VISIBLE_DEVICES=0 \ -p 7860:7860 \ --shm-size 16G \ --name llama_factory \ diff --git a/docker-compose.yml b/docker-compose.yml index 333dc51e..9602a3e3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,8 +10,6 @@ services: - ./hf_cache:/root/.cache/huggingface/ - ./data:/app/data - ./output:/app/output - environment: - - CUDA_VISIBLE_DEVICES=0 ports: - "7860:7860" ipc: host diff --git a/tests/test_toolcall.py b/scripts/test_toolcall.py similarity index 97% rename from tests/test_toolcall.py rename to scripts/test_toolcall.py index d36e7fec..7e460017 100644 --- a/tests/test_toolcall.py +++ b/scripts/test_toolcall.py @@ -20,7 +20,7 @@ def calculate_gpa(grades: Sequence[str], hours: Sequence[int]) -> float: def main(): client = OpenAI( - api_key="0", + api_key="{}".format(os.environ.get("API_KEY", "0")), base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)), ) tools = [ diff --git a/tests/model/test_attn.py b/tests/model/test_attn.py new file mode 100644 index 00000000..12d920ef --- /dev/null +++ b/tests/model/test_attn.py @@ -0,0 +1,35 @@ +import os + +from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available + +from llamafactory.hparams import get_infer_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") + + +def test_attention(): + attention_available = ["off"] + if is_torch_sdpa_available(): + attention_available.append("sdpa") + + if is_flash_attn_2_available(): + attention_available.append("fa2") + + llama_attention_classes = { + "off": "LlamaAttention", + "sdpa": "LlamaSdpaAttention", + "fa2": "LlamaFlashAttention2", + } + for requested_attention in attention_available: + model_args, _, finetuning_args, _ = get_infer_args({ + "model_name_or_path": TINY_LLAMA, + "template": "llama2", + "flash_attn": requested_attention, + }) + tokenizer = load_tokenizer(model_args) + model = load_model(tokenizer["tokenizer"], model_args, finetuning_args) + for module in model.modules(): + if "Attention" in module.__class__.__name__: + assert module.__class__.__name__ == llama_attention_classes[requested_attention] diff --git a/tests/test_throughput.py b/tests/test_throughput.py deleted file mode 100644 index e8048910..00000000 --- a/tests/test_throughput.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -import time - -from openai import OpenAI -from transformers.utils.versions import require_version - - -require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0") - - -def main(): - client = OpenAI( - api_key="0", - base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)), - ) - messages = [{"role": "user", "content": "Write a long essay about environment protection as long as possible."}] - num_tokens = 0 - start_time = time.time() - for _ in range(8): - result = client.chat.completions.create(messages=messages, model="test") - num_tokens += result.usage.completion_tokens - - elapsed_time = time.time() - start_time - print("Throughput: {:.2f} tokens/s".format(num_tokens / elapsed_time)) - # --infer_backend hf: 27.22 tokens/s (1.0x) - # --infer_backend vllm: 73.03 tokens/s (2.7x) - - -if __name__ == "__main__": - main() From 9e7d1315ad4ddf045251462b04923e51dc349077 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 01:48:30 +0800 Subject: [PATCH 105/162] add ci Former-commit-id: 3ea3acdadaa54abe33d93538580196cfdd91ee56 --- .github/workflows/tests.yml | 46 ++++++++++++++++++++++++++++++------- Makefile | 5 +++- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f891f711..a8246986 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,28 +2,58 @@ name: tests on: push: - branches: [ "main" ] + branches: + - main + paths: + - "**.py" + - "requirements.txt" + - ".github/workflows/*.yml" pull_request: - branches: [ "main" ] + branches: + - main + paths: + - "**.py" + - "requirements.txt" + - ".github/workflows/*.yml" jobs: check_code_quality: - runs-on: ubuntu-latest - steps: - uses: actions/checkout@v4 - - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.8" - + cache: "pip" + cache-dependency-path: "setup.py" - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install ruff - + python -m pip install .[torch,metrics,quality] - name: Check quality run: | make style && make quality + + pytest: + needs: check_code_quality + strategy: + matrix: + python-version: ["3.8", "3.9"] + os: ["ubuntu-latest", "windows-latest"] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + cache-dependency-path: "setup.py" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install .[torch,metrics,quality] + - name: Test with pytest + run: | + make test diff --git a/Makefile b/Makefile index 3a4a12c9..65be047b 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: quality style +.PHONY: quality style test check_dirs := scripts src tests @@ -9,3 +9,6 @@ quality: style: ruff check $(check_dirs) --fix ruff format $(check_dirs) + +test: + pytest tests/ From ec97bee94415a0ef1a5a225d9a430bcb3ceb9c28 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 01:57:36 +0800 Subject: [PATCH 106/162] fix ci Former-commit-id: 95aceebd61d195be5c980a919c12c59b56722898 --- .github/workflows/tests.yml | 6 +++--- setup.py | 2 +- tests/model/{test_attn.py => test_attention.py} | 14 ++++++++------ 3 files changed, 12 insertions(+), 10 deletions(-) rename tests/model/{test_attn.py => test_attention.py} (73%) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a8246986..a66b579b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,10 +30,10 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install .[torch,metrics,quality] + python -m pip install .[torch,dev] - name: Check quality run: | - make style && make quality + make style && make quality pytest: needs: check_code_quality @@ -53,7 +53,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install .[torch,metrics,quality] + python -m pip install .[torch,dev] - name: Test with pytest run: | make test diff --git a/setup.py b/setup.py index c32be8af..405ac46e 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ extra_require = { "aqlm": ["aqlm[gpu]>=1.1.0"], "qwen": ["transformers_stream_generator"], "modelscope": ["modelscope"], - "quality": ["ruff"], + "dev": ["ruff", "pytest"], } diff --git a/tests/model/test_attn.py b/tests/model/test_attention.py similarity index 73% rename from tests/model/test_attn.py rename to tests/model/test_attention.py index 12d920ef..6dd46050 100644 --- a/tests/model/test_attn.py +++ b/tests/model/test_attention.py @@ -23,13 +23,15 @@ def test_attention(): "fa2": "LlamaFlashAttention2", } for requested_attention in attention_available: - model_args, _, finetuning_args, _ = get_infer_args({ - "model_name_or_path": TINY_LLAMA, - "template": "llama2", - "flash_attn": requested_attention, - }) + model_args, _, finetuning_args, _ = get_infer_args( + { + "model_name_or_path": TINY_LLAMA, + "template": "llama2", + "flash_attn": requested_attention, + } + ) tokenizer = load_tokenizer(model_args) model = load_model(tokenizer["tokenizer"], model_args, finetuning_args) for module in model.modules(): if "Attention" in module.__class__.__name__: - assert module.__class__.__name__ == llama_attention_classes[requested_attention] + assert module.__class__.__name__ == llama_attention_classes[requested_attention] From 71d048adb4139836bbbf660e2ad95da1aea3f0f0 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 02:00:44 +0800 Subject: [PATCH 107/162] fix ci Former-commit-id: 3f4d293fd861d765edb2040f80d16f99a5e1e3c6 --- .github/workflows/tests.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a66b579b..818d58fc 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -39,8 +39,10 @@ jobs: needs: check_code_quality strategy: matrix: - python-version: ["3.8", "3.9"] - os: ["ubuntu-latest", "windows-latest"] + python-version: + - "3.8" + os: + - "ubuntu-latest" runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 From d9aa226c0858c82f9f3c4aa95db1197f061469d4 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 02:42:34 +0800 Subject: [PATCH 108/162] add ultrafeedback and fineweb #4085 #4132 Former-commit-id: 968e4992e2f2a3ccba73e8668f1654ddc6eb0034 --- .github/workflows/tests.yml | 24 +----------------------- README.md | 3 +++ README_zh.md | 3 +++ data/dataset_info.json | 31 +++++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 23 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 818d58fc..32edf6a8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -17,7 +17,7 @@ on: - ".github/workflows/*.yml" jobs: - check_code_quality: + tests: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -34,28 +34,6 @@ jobs: - name: Check quality run: | make style && make quality - - pytest: - needs: check_code_quality - strategy: - matrix: - python-version: - - "3.8" - os: - - "ubuntu-latest" - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: "pip" - cache-dependency-path: "setup.py" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install .[torch,dev] - name: Test with pytest run: | make test diff --git a/README.md b/README.md index 44897420..fb6c5782 100644 --- a/README.md +++ b/README.md @@ -214,6 +214,8 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered) - [Pile (en)](https://huggingface.co/datasets/EleutherAI/pile) - [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B) +- [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb) +- [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu) - [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack) - [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata) @@ -273,6 +275,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t
Preference datasets - [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k) +- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized) - [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs) - [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf) - [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar) diff --git a/README_zh.md b/README_zh.md index 8321d202..142254df 100644 --- a/README_zh.md +++ b/README_zh.md @@ -214,6 +214,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered) - [Pile (en)](https://huggingface.co/datasets/EleutherAI/pile) - [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B) +- [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb) +- [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu) - [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack) - [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata) @@ -273,6 +275,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
偏好数据集 - [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k) +- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized) - [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs) - [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf) - [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar) diff --git a/data/dataset_info.json b/data/dataset_info.json index 2d9b0c83..8c5cbb45 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -391,6 +391,16 @@ "rejected": "rejected" } }, + "ultrafeedback": { + "hf_hub_url": "llamafactory/ultrafeedback_binarized", + "ms_hub_url": "llamafactory/ultrafeedback_binarized", + "ranking": true, + "columns": { + "prompt": "instruction", + "chosen": "chosen", + "rejected": "rejected" + } + }, "orca_pairs": { "hf_hub_url": "Intel/orca_dpo_pairs", "ranking": true, @@ -448,6 +458,15 @@ "assistant_tag": "assistant" } }, + "ultrafeedback_kto": { + "hf_hub_url": "argilla/ultrafeedback-binarized-preferences-cleaned-kto", + "ms_hub_url": "AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto", + "columns": { + "prompt": "prompt", + "response": "completion", + "kto_tag": "label" + } + }, "wiki_demo": { "file_name": "wiki_demo.txt", "columns": { @@ -501,6 +520,18 @@ "prompt": "text" } }, + "fileweb": { + "hf_hub_url": "HuggingFaceFW/fineweb", + "columns": { + "prompt": "text" + } + }, + "fileweb_edu": { + "hf_hub_url": "HuggingFaceFW/fineweb-edu", + "columns": { + "prompt": "text" + } + }, "the_stack": { "hf_hub_url": "bigcode/the-stack", "ms_hub_url": "AI-ModelScope/the-stack", From cabe5ca7d0513a1c52ea322aaff7312bce19e322 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 05:20:54 +0800 Subject: [PATCH 109/162] release v0.8.0 Former-commit-id: 004db680b9e3996ec511ee818df6c0c02bf13603 --- src/llamafactory/data/template.py | 11 +--- src/llamafactory/extras/env.py | 2 +- tests/data/test_supervised.py | 44 +++++++++++++ .../model/{ => model_utils}/test_attention.py | 4 +- tests/model/test_freeze.py | 61 +++++++++++++++++++ tests/model/test_full.py | 33 ++++++++++ 6 files changed, 142 insertions(+), 13 deletions(-) create mode 100644 tests/data/test_supervised.py rename tests/model/{ => model_utils}/test_attention.py (88%) create mode 100644 tests/model/test_freeze.py create mode 100644 tests/model/test_full.py diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 3dce5ec6..b600c567 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -700,17 +700,8 @@ _register_template( _register_template( name="llama2", format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]), + format_assistant=StringFormatter(slots=[" {{content}} ", {"eos_token"}]), format_system=StringFormatter(slots=["<>\n{{content}}\n<>\n\n"]), - default_system=( - "You are a helpful, respectful and honest assistant. " - "Always answer as helpfully as possible, while being safe. " - "Your answers should not include any harmful, unethical, " - "racist, sexist, toxic, dangerous, or illegal content. " - "Please ensure that your responses are socially unbiased and positive in nature.\n\n" - "If a question does not make any sense, or is not factually coherent, " - "explain why instead of answering something not correct. " - "If you don't know the answer to a question, please don't share false information." - ), ) diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index fdccf86b..cd81442d 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -12,7 +12,7 @@ from transformers.utils import is_bitsandbytes_available, is_torch_cuda_availabl from .packages import is_vllm_available -VERSION = "0.7.2.dev0" +VERSION = "0.8.0" def print_env() -> None: diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py new file mode 100644 index 00000000..bb7f71df --- /dev/null +++ b/tests/data/test_supervised.py @@ -0,0 +1,44 @@ +import os + +import pytest +from datasets import load_dataset + +from llamafactory.data import get_dataset +from llamafactory.hparams import get_train_args +from llamafactory.model import load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") + +TRAINING_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "full", + "dataset": "llamafactory/tiny_dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + + +@pytest.mark.parametrize("test_num", [5]) +def test_supervised(test_num: int): + model_args, data_args, training_args, _, _ = get_train_args(TRAINING_ARGS) + tokenizer_module = load_tokenizer(model_args) + tokenizer = tokenizer_module["tokenizer"] + tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module) + + original_data = load_dataset(TRAINING_ARGS["dataset"], split="train") + for test_idx in range(test_num): + decode_result = tokenizer.decode(tokenized_data["input_ids"][test_idx]) + messages = [ + {"role": "user", "content": original_data[test_idx]["instruction"]}, + {"role": "assistant", "content": original_data[test_idx]["output"]}, + ] + templated_result = tokenizer.apply_chat_template(messages, tokenize=False) + assert decode_result == templated_result diff --git a/tests/model/test_attention.py b/tests/model/model_utils/test_attention.py similarity index 88% rename from tests/model/test_attention.py rename to tests/model/model_utils/test_attention.py index 6dd46050..4d414289 100644 --- a/tests/model/test_attention.py +++ b/tests/model/model_utils/test_attention.py @@ -30,8 +30,8 @@ def test_attention(): "flash_attn": requested_attention, } ) - tokenizer = load_tokenizer(model_args) - model = load_model(tokenizer["tokenizer"], model_args, finetuning_args) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args) for module in model.modules(): if "Attention" in module.__class__.__name__: assert module.__class__.__name__ == llama_attention_classes[requested_attention] diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py new file mode 100644 index 00000000..c6cdec78 --- /dev/null +++ b/tests/model/test_freeze.py @@ -0,0 +1,61 @@ +import os + +import torch + +from llamafactory.hparams import get_train_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") + +TRAINING_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "freeze", + "dataset": "llamafactory/tiny_dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + + +def test_freeze_all_modules(): + model_args, _, _, finetuning_args, _ = get_train_args( + { + "freeze_trainable_layers": 1, + **TRAINING_ARGS, + } + ) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): + if name.startswith("model.layers.1."): + assert param.requires_grad is True + assert param.dtype == torch.float32 + else: + assert param.requires_grad is False + assert param.dtype == torch.float16 + + +def test_freeze_extra_modules(): + model_args, _, _, finetuning_args, _ = get_train_args( + { + "freeze_trainable_layers": 1, + "freeze_extra_modules": "embed_tokens,lm_head", + **TRAINING_ARGS, + } + ) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): + if name.startswith("model.layers.1.") or any(module in name for module in ["embed_tokens", "lm_head"]): + assert param.requires_grad is True + assert param.dtype == torch.float32 + else: + assert param.requires_grad is False + assert param.dtype == torch.float16 diff --git a/tests/model/test_full.py b/tests/model/test_full.py new file mode 100644 index 00000000..ef57a980 --- /dev/null +++ b/tests/model/test_full.py @@ -0,0 +1,33 @@ +import os + +import torch + +from llamafactory.hparams import get_train_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") + +TRAINING_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "full", + "dataset": "llamafactory/tiny_dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + + +def test_full(): + model_args, _, _, finetuning_args, _ = get_train_args(TRAINING_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for param in model.parameters(): + assert param.requires_grad is True + assert param.dtype == torch.float32 From 088292e84a65b4c27750761e84178a5b59ef9024 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 06:46:09 +0800 Subject: [PATCH 110/162] set dev version Former-commit-id: 08b7fe1c452cc99264ff0312e310b579590c6a45 --- src/llamafactory/extras/env.py | 2 +- tests/model/test_lora.py | 72 ++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 tests/model/test_lora.py diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index cd81442d..2b9c6458 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -12,7 +12,7 @@ from transformers.utils import is_bitsandbytes_available, is_torch_cuda_availabl from .packages import is_vllm_available -VERSION = "0.8.0" +VERSION = "0.8.1.dev0" def print_env() -> None: diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py new file mode 100644 index 00000000..1f2c02ae --- /dev/null +++ b/tests/model/test_lora.py @@ -0,0 +1,72 @@ +import os + +import torch + +from llamafactory.hparams import get_train_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") + +TRAINING_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "lora", + "dataset": "llamafactory/tiny_dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + + +def test_lora_all_modules(): + model_args, _, _, finetuning_args, _ = get_train_args( + { + "lora_target": "all", + **TRAINING_ARGS, + } + ) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + linear_modules = set() + for name, param in model.named_parameters(): + if any(module in name for module in ["lora_A", "lora_B"]): + linear_modules.add(name.split(".lora_", maxsplit=1)[0].split(".")[-1]) + assert param.requires_grad is True + assert param.dtype == torch.float32 + else: + assert param.requires_grad is False + assert param.dtype == torch.float16 + + assert linear_modules == {"q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"} + + +def test_lora_extra_modules(): + model_args, _, _, finetuning_args, _ = get_train_args( + { + "lora_target": "all", + "additional_target": "embed_tokens,lm_head", + **TRAINING_ARGS, + } + ) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + extra_modules = set() + for name, param in model.named_parameters(): + if any(module in name for module in ["lora_A", "lora_B"]): + assert param.requires_grad is True + assert param.dtype == torch.float32 + elif "modules_to_save" in name: + extra_modules.add(name.split(".modules_to_save", maxsplit=1)[0].split(".")[-1]) + assert param.requires_grad is True + assert param.dtype == torch.float32 + else: + assert param.requires_grad is False + assert param.dtype == torch.float16 + + assert extra_modules == {"embed_tokens", "lm_head"} From a2acefea6eef0603aff2639e29dcafeaf4a93c10 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 07:15:45 +0800 Subject: [PATCH 111/162] fix llamafactory-cli env Former-commit-id: b0515e5f42831b67d1f4d049999ecb68756e66db --- src/llamafactory/extras/env.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index 2b9c6458..1d4e43f1 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -6,10 +6,7 @@ import peft import torch import transformers import trl -from transformers.integrations import is_deepspeed_available -from transformers.utils import is_bitsandbytes_available, is_torch_cuda_available, is_torch_npu_available - -from .packages import is_vllm_available +from transformers.utils import is_torch_cuda_available, is_torch_npu_available VERSION = "0.8.1.dev0" @@ -37,19 +34,25 @@ def print_env() -> None: info["NPU type"] = torch.npu.get_device_name() info["CANN version"] = torch.version.cann - if is_deepspeed_available(): + try: import deepspeed # type: ignore info["DeepSpeed version"] = deepspeed.__version__ + except Exception: + pass - if is_bitsandbytes_available(): + try: import bitsandbytes info["Bitsandbytes version"] = bitsandbytes.__version__ + except Exception: + pass - if is_vllm_available(): + try: import vllm info["vLLM version"] = vllm.__version__ + except Exception: + pass print("\n" + "\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n") From 9d88d0bcd861b1383dcbaedde4bcbe20452816fc Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 21:11:32 +0800 Subject: [PATCH 112/162] update git workflows Former-commit-id: 5a3f26bc53433caa98b2a66294becaf156280a4c --- .github/workflows/label_issue.yml | 17 +++++++++++++++++ .github/workflows/tests.yml | 17 ++++++++--------- 2 files changed, 25 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/label_issue.yml diff --git a/.github/workflows/label_issue.yml b/.github/workflows/label_issue.yml new file mode 100644 index 00000000..b9a5543c --- /dev/null +++ b/.github/workflows/label_issue.yml @@ -0,0 +1,17 @@ +name: label_issue + +on: + issues: + types: + - opened + +jobs: + label_issue: + runs-on: ubuntu-latest + + steps: + - env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE_URL: ${{ github.event.issue.html_url }} + run: | + gh issue edit $ISSUE_URL --add-label "pending" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 32edf6a8..6ddcbc05 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -3,14 +3,7 @@ name: tests on: push: branches: - - main - paths: - - "**.py" - - "requirements.txt" - - ".github/workflows/*.yml" - pull_request: - branches: - - main + - $default-branch paths: - "**.py" - "requirements.txt" @@ -19,21 +12,27 @@ on: jobs: tests: runs-on: ubuntu-latest + steps: - - uses: actions/checkout@v4 + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.8" cache: "pip" cache-dependency-path: "setup.py" + - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install .[torch,dev] + - name: Check quality run: | make style && make quality + - name: Test with pytest run: | make test From d999691d4fba74bc0d7f5474fb3c037ba81a804a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 21:15:36 +0800 Subject: [PATCH 113/162] Update tests.yml Former-commit-id: e90f0cc30d6bb819246ccc08935c39e714c179a1 --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6ddcbc05..f3ac96db 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -3,7 +3,7 @@ name: tests on: push: branches: - - $default-branch + - main paths: - "**.py" - "requirements.txt" From fcd42d8e3adeca9b3d4e65e178834d620ce3c451 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 21:25:35 +0800 Subject: [PATCH 114/162] add pr ci Former-commit-id: 9b05bb8540b946d0c74bf804bcafc4a785d22c47 --- .github/workflows/tests.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f3ac96db..96092662 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,6 +8,15 @@ on: - "**.py" - "requirements.txt" - ".github/workflows/*.yml" + pull_request: + types: + - review_requested + branches: + - main + paths: + - "**.py" + - "requirements.txt" + - ".github/workflows/*.yml" jobs: tests: From 74764be45affef2b2d1145dd36b26df8ad744b8a Mon Sep 17 00:00:00 2001 From: "-.-" Date: Sat, 8 Jun 2024 23:51:56 +0800 Subject: [PATCH 115/162] fix README Former-commit-id: fa30028c0b83c38610b596209493a748b8ca0928 --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fb6c5782..4dea65b9 100644 --- a/README.md +++ b/README.md @@ -335,7 +335,7 @@ huggingface-cli login ```bash git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git cd LLaMA-Factory -pip install -e '.[torch,metrics]' +pip install -e ".[torch,metrics]" ``` Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality diff --git a/README_zh.md b/README_zh.md index 142254df..ab0e8cb7 100644 --- a/README_zh.md +++ b/README_zh.md @@ -335,7 +335,7 @@ huggingface-cli login ```bash git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git cd LLaMA-Factory -pip install -e '.[torch,metrics]' +pip install -e ".[torch,metrics]" ``` 可选的额外依赖项:torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality From 7474e8035fcdbb08a3a047d0e81354229c785dbc Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 10 Jun 2024 21:24:15 +0800 Subject: [PATCH 116/162] fix #2666 Former-commit-id: f121d5c4f94af9f165132c4309cb9bdc8217d985 --- src/llamafactory/model/adapter.py | 2 +- tests/data/test_supervised.py | 32 ++++++++++++++--------- tests/model/model_utils/test_attention.py | 15 +++++------ tests/model/test_freeze.py | 19 ++++---------- tests/model/test_full.py | 8 +++--- tests/model/test_lora.py | 19 ++++---------- 6 files changed, 41 insertions(+), 54 deletions(-) diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index f4e501a7..34b9eda6 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -209,6 +209,7 @@ def _setup_lora_tuning( "lora_alpha": finetuning_args.lora_alpha, "lora_dropout": finetuning_args.lora_dropout, "use_rslora": finetuning_args.use_rslora, + "use_dora": finetuning_args.use_dora, "modules_to_save": finetuning_args.additional_target, } @@ -218,7 +219,6 @@ def _setup_lora_tuning( lora_config = LoraConfig( task_type=TaskType.CAUSAL_LM, inference_mode=False, - use_dora=finetuning_args.use_dora, **peft_kwargs, ) model = get_peft_model(model, lora_config) diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py index bb7f71df..63a3453f 100644 --- a/tests/data/test_supervised.py +++ b/tests/data/test_supervised.py @@ -1,4 +1,5 @@ import os +import random import pytest from datasets import load_dataset @@ -8,17 +9,17 @@ from llamafactory.hparams import get_train_args from llamafactory.model import load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") -TRAINING_ARGS = { +TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", "do_train": True, "finetuning_type": "full", - "dataset": "llamafactory/tiny_dataset", + "dataset": "llamafactory/tiny-supervised-dataset", "dataset_dir": "ONLINE", "template": "llama3", - "cutoff_len": 1024, + "cutoff_len": 8192, "overwrite_cache": True, "output_dir": "dummy_dir", "overwrite_output_dir": True, @@ -26,19 +27,24 @@ TRAINING_ARGS = { } -@pytest.mark.parametrize("test_num", [5]) -def test_supervised(test_num: int): - model_args, data_args, training_args, _, _ = get_train_args(TRAINING_ARGS) +@pytest.mark.parametrize("num_samples", [10]) +def test_supervised(num_samples: int): + model_args, data_args, training_args, _, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) tokenizer = tokenizer_module["tokenizer"] tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module) - original_data = load_dataset(TRAINING_ARGS["dataset"], split="train") - for test_idx in range(test_num): - decode_result = tokenizer.decode(tokenized_data["input_ids"][test_idx]) + original_data = load_dataset(TRAIN_ARGS["dataset"], split="train") + indexes = random.choices(range(len(original_data)), k=num_samples) + for index in indexes: + decoded_result = tokenizer.decode(tokenized_data["input_ids"][index]) + prompt = original_data[index]["instruction"] + if original_data[index]["input"]: + prompt += "\n" + original_data[index]["input"] + messages = [ - {"role": "user", "content": original_data[test_idx]["instruction"]}, - {"role": "assistant", "content": original_data[test_idx]["output"]}, + {"role": "user", "content": prompt}, + {"role": "assistant", "content": original_data[index]["output"]}, ] templated_result = tokenizer.apply_chat_template(messages, tokenize=False) - assert decode_result == templated_result + assert decoded_result == templated_result diff --git a/tests/model/model_utils/test_attention.py b/tests/model/model_utils/test_attention.py index 4d414289..751adda4 100644 --- a/tests/model/model_utils/test_attention.py +++ b/tests/model/model_utils/test_attention.py @@ -6,7 +6,12 @@ from llamafactory.hparams import get_infer_args from llamafactory.model import load_model, load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") + +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA, + "template": "llama3", +} def test_attention(): @@ -23,13 +28,7 @@ def test_attention(): "fa2": "LlamaFlashAttention2", } for requested_attention in attention_available: - model_args, _, finetuning_args, _ = get_infer_args( - { - "model_name_or_path": TINY_LLAMA, - "template": "llama2", - "flash_attn": requested_attention, - } - ) + model_args, _, finetuning_args, _ = get_infer_args({"flash_attn": requested_attention, **INFER_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args) for module in model.modules(): diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py index c6cdec78..97800696 100644 --- a/tests/model/test_freeze.py +++ b/tests/model/test_freeze.py @@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args from llamafactory.model import load_model, load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") -TRAINING_ARGS = { +TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", "do_train": True, "finetuning_type": "freeze", - "dataset": "llamafactory/tiny_dataset", + "dataset": "llamafactory/tiny-supervised-dataset", "dataset_dir": "ONLINE", "template": "llama3", "cutoff_len": 1024, @@ -25,12 +25,7 @@ TRAINING_ARGS = { def test_freeze_all_modules(): - model_args, _, _, finetuning_args, _ = get_train_args( - { - "freeze_trainable_layers": 1, - **TRAINING_ARGS, - } - ) + model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) for name, param in model.named_parameters(): @@ -44,11 +39,7 @@ def test_freeze_all_modules(): def test_freeze_extra_modules(): model_args, _, _, finetuning_args, _ = get_train_args( - { - "freeze_trainable_layers": 1, - "freeze_extra_modules": "embed_tokens,lm_head", - **TRAINING_ARGS, - } + {"freeze_trainable_layers": 1, "freeze_extra_modules": "embed_tokens,lm_head", **TRAIN_ARGS} ) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) diff --git a/tests/model/test_full.py b/tests/model/test_full.py index ef57a980..6cb78f37 100644 --- a/tests/model/test_full.py +++ b/tests/model/test_full.py @@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args from llamafactory.model import load_model, load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") -TRAINING_ARGS = { +TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", "do_train": True, "finetuning_type": "full", - "dataset": "llamafactory/tiny_dataset", + "dataset": "llamafactory/tiny-supervised-dataset", "dataset_dir": "ONLINE", "template": "llama3", "cutoff_len": 1024, @@ -25,7 +25,7 @@ TRAINING_ARGS = { def test_full(): - model_args, _, _, finetuning_args, _ = get_train_args(TRAINING_ARGS) + model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) for param in model.parameters(): diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index 1f2c02ae..2e2b89d9 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args from llamafactory.model import load_model, load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") -TRAINING_ARGS = { +TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", "do_train": True, "finetuning_type": "lora", - "dataset": "llamafactory/tiny_dataset", + "dataset": "llamafactory/tiny-supervised-dataset", "dataset_dir": "ONLINE", "template": "llama3", "cutoff_len": 1024, @@ -25,12 +25,7 @@ TRAINING_ARGS = { def test_lora_all_modules(): - model_args, _, _, finetuning_args, _ = get_train_args( - { - "lora_target": "all", - **TRAINING_ARGS, - } - ) + model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) linear_modules = set() @@ -48,11 +43,7 @@ def test_lora_all_modules(): def test_lora_extra_modules(): model_args, _, _, finetuning_args, _ = get_train_args( - { - "lora_target": "all", - "additional_target": "embed_tokens,lm_head", - **TRAINING_ARGS, - } + {"lora_target": "all", "additional_target": "embed_tokens,lm_head", **TRAIN_ARGS} ) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) From bc86e70af9b30c3aa96d0162b21b292ca79e252e Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 10 Jun 2024 23:56:00 +0800 Subject: [PATCH 117/162] update evaluator Former-commit-id: bb8661e62481ff7027b8969f3d8a6a17290c9da3 --- src/llamafactory/eval/evaluator.py | 4 +- src/llamafactory/eval/template.py | 9 ++-- tests/eval/test_eval_template.py | 77 ++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 9 deletions(-) create mode 100644 tests/eval/test_eval_template.py diff --git a/src/llamafactory/eval/evaluator.py b/src/llamafactory/eval/evaluator.py index 192f4815..5c6fb104 100644 --- a/src/llamafactory/eval/evaluator.py +++ b/src/llamafactory/eval/evaluator.py @@ -26,9 +26,7 @@ class Evaluator: self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template) self.model = load_model(self.tokenizer, self.model_args, finetuning_args) self.eval_template = get_eval_template(self.eval_args.lang) - self.choice_inputs = [ - self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES - ] + self.choice_inputs = [self.tokenizer.encode(ch, add_special_tokens=False)[-1] for ch in CHOICES] @torch.inference_mode() def batch_inference(self, batch_input: Dict[str, torch.Tensor]) -> List[str]: diff --git a/src/llamafactory/eval/template.py b/src/llamafactory/eval/template.py index a4a6ef0e..2cbb5aaf 100644 --- a/src/llamafactory/eval/template.py +++ b/src/llamafactory/eval/template.py @@ -10,7 +10,6 @@ class EvalTemplate: system: str choice: str answer: str - prefix: str def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]: r""" @@ -42,8 +41,8 @@ class EvalTemplate: eval_templates: Dict[str, "EvalTemplate"] = {} -def _register_eval_template(name: str, system: str, choice: str, answer: str, prefix: str) -> None: - eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer, prefix=prefix) +def _register_eval_template(name: str, system: str, choice: str, answer: str) -> None: + eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer) def get_eval_template(name: str) -> "EvalTemplate": @@ -56,8 +55,7 @@ _register_eval_template( name="en", system="The following are multiple choice questions (with answers) about {subject}.\n\n", choice="\n{choice}. {content}", - answer="\nAnswer: ", - prefix=" ", + answer="\nAnswer:", ) @@ -66,5 +64,4 @@ _register_eval_template( system="以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n", choice="\n{choice}. {content}", answer="\n答案:", - prefix=" ", ) diff --git a/tests/eval/test_eval_template.py b/tests/eval/test_eval_template.py new file mode 100644 index 00000000..f6a91a67 --- /dev/null +++ b/tests/eval/test_eval_template.py @@ -0,0 +1,77 @@ +from llamafactory.eval.template import get_eval_template + + +def test_eval_template_en(): + support_set = [ + { + "question": "Fewshot question", + "A": "Fewshot1", + "B": "Fewshot2", + "C": "Fewshot3", + "D": "Fewshot4", + "answer": "B", + } + ] + example = { + "question": "Target question", + "A": "Target1", + "B": "Target2", + "C": "Target3", + "D": "Target4", + "answer": "C", + } + template = get_eval_template(name="en") + messages = template.format_example(example, support_set=support_set, subject_name="SubName") + assert messages == [ + { + "role": "user", + "content": ( + "The following are multiple choice questions (with answers) about SubName.\n\n" + "Fewshot question\nA. Fewshot1\nB. Fewshot2\nC. Fewshot3\nD. Fewshot4\nAnswer:" + ), + }, + {"role": "assistant", "content": "B"}, + { + "role": "user", + "content": "Target question\nA. Target1\nB. Target2\nC. Target3\nD. Target4\nAnswer:", + }, + {"role": "assistant", "content": "C"}, + ] + + +def test_eval_template_zh(): + support_set = [ + { + "question": "示例问题", + "A": "示例答案1", + "B": "示例答案2", + "C": "示例答案3", + "D": "示例答案4", + "answer": "B", + } + ] + example = { + "question": "目标问题", + "A": "目标答案1", + "B": "目标答案2", + "C": "目标答案3", + "D": "目标答案4", + "answer": "C", + } + template = get_eval_template(name="zh") + messages = template.format_example(example, support_set=support_set, subject_name="主题") + assert messages == [ + { + "role": "user", + "content": ( + "以下是中国关于主题考试的单项选择题,请选出其中的正确答案。\n\n" + "示例问题\nA. 示例答案1\nB. 示例答案2\nC. 示例答案3\nD. 示例答案4\n答案:" + ), + }, + {"role": "assistant", "content": "B"}, + { + "role": "user", + "content": "目标问题\nA. 目标答案1\nB. 目标答案2\nC. 目标答案3\nD. 目标答案4\n答案:", + }, + {"role": "assistant", "content": "C"}, + ] From 0e7c15d2bd422dc952ce782fbd61b726ab828f3e Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 00:19:17 +0800 Subject: [PATCH 118/162] fix #4145 Fix the docker image Former-commit-id: a9838281156fe870bfcde5d1f7afc15264fd4aad --- Dockerfile | 38 ++++++++++++++++++++++++++++++++++---- README.md | 36 ++++++++++++++++++------------------ README_zh.md | 34 ++++++++++++++++++---------------- docker-compose.yml | 10 ++++++++-- 4 files changed, 78 insertions(+), 40 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0a35e355..45849601 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,44 @@ -FROM nvcr.io/nvidia/pytorch:24.01-py3 +# Use the NVIDIA official image with PyTorch 2.3.0 +# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html +FROM nvcr.io/nvidia/pytorch:24.02-py3 +# Define installation arguments +ARG INSTALL_BNB=false +ARG INSTALL_VLLM=false +ARG INSTALL_DEEPSPEED=false +ARG PIP_INDEX=https://pypi.org/simple + +# Set the working directory WORKDIR /app +# Install the requirements COPY requirements.txt /app/ -RUN pip install -r requirements.txt +RUN pip config set global.index-url $PIP_INDEX +RUN python -m pip install --upgrade pip +RUN python -m pip install -r requirements.txt +# Copy the rest of the application into the image COPY . /app/ -RUN pip install -e .[metrics,bitsandbytes,qwen] +# Install the LLaMA Factory +RUN EXTRA_PACKAGES="metrics"; \ + if [ "$INSTALL_BNB" = "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \ + fi; \ + if [ "$INSTALL_VLLM" = "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \ + fi; \ + if [ "$INSTALL_DEEPSPEED" = "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ + fi; \ + pip install -e .[$EXTRA_PACKAGES] && \ + pip uninstall -y transformer-engine + +# Set up volumes VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] + +# Expose port 7860 for the LLaMA Board EXPOSE 7860 -CMD [ "llamafactory-cli", "webui" ] +# Expose port 8000 for the API service +EXPOSE 8000 diff --git a/README.md b/README.md index 4dea65b9..35dacd2e 100644 --- a/README.md +++ b/README.md @@ -405,9 +405,9 @@ Please refer to [data/README.md](data/README.md) for checking the details about Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively. ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli chat examples/inference/llama3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` See [examples/README.md](examples/README.md) for advanced usage (including distributed training). @@ -417,33 +417,33 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr ### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio)) -#### Use local environment - ```bash -CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui +llamafactory-cli webui ``` -
- -#### Use Docker +### Build Docker ```bash -docker build -f ./Dockerfile -t llama-factory:latest . -docker run --gpus=all \ +docker build -f ./Dockerfile \ + --build-arg INSTALL_BNB=false \ + --build-arg INSTALL_VLLM=false \ + --build-arg INSTALL_DEEPSPEED=false \ + --build-arg PIP_INDEX=https://pypi.org/simple \ + -t llamafactory:latest . + +docker run -it --gpus=all \ -v ./hf_cache:/root/.cache/huggingface/ \ -v ./data:/app/data \ -v ./output:/app/output \ -p 7860:7860 \ + -p 8000:8000 \ --shm-size 16G \ - --name llama_factory \ - -d llama-factory:latest + --name llamafactory \ + llamafactory:latest ``` -#### Use Docker Compose - -```bash -docker compose -f ./docker-compose.yml up -d -``` +> [!TIP] +> Use Docker Compose to build image via `docker compose up -d`.
Details about volume diff --git a/README_zh.md b/README_zh.md index ab0e8cb7..0ddb8b19 100644 --- a/README_zh.md +++ b/README_zh.md @@ -405,9 +405,9 @@ Docker 镜像: 下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli chat examples/inference/llama3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` 高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。 @@ -417,31 +417,33 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s ### LLaMA Board 可视化微调(由 [Gradio](https://github.com/gradio-app/gradio) 驱动) -#### 使用本地环境 - ```bash -CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui +llamafactory-cli webui ``` -#### 使用 Docker +### 构建 Docker ```bash -docker build -f ./Dockerfile -t llama-factory:latest . -docker run --gpus=all \ +docker build -f ./Dockerfile \ + --build-arg INSTALL_BNB=false \ + --build-arg INSTALL_VLLM=false \ + --build-arg INSTALL_DEEPSPEED=false \ + --build-arg PIP_INDEX=https://pypi.org/simple \ + -t llamafactory:latest . + +docker run -it --gpus=all \ -v ./hf_cache:/root/.cache/huggingface/ \ -v ./data:/app/data \ -v ./output:/app/output \ -p 7860:7860 \ + -p 8000:8000 \ --shm-size 16G \ - --name llama_factory \ - -d llama-factory:latest + --name llamafactory \ + llamafactory:latest ``` -#### 使用 Docker Compose - -```bash -docker compose -f ./docker-compose.yml up -d -``` +> [!TIP] +> 通过 `docker compose up -d` 使用 Docker Compose 构建镜像。
数据卷详情 diff --git a/docker-compose.yml b/docker-compose.yml index 9602a3e3..b3e4a34d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,17 +1,23 @@ version: '3.8' services: - llama-factory: + llamafactory: build: dockerfile: Dockerfile context: . - container_name: llama_factory + args: + INSTALL_BNB: false + INSTALL_VLLM: false + INSTALL_DEEPSPEED: false + PIP_INDEX: https://pypi.org/simple + container_name: llamafactory volumes: - ./hf_cache:/root/.cache/huggingface/ - ./data:/app/data - ./output:/app/output ports: - "7860:7860" + - "8000:8000" ipc: host deploy: resources: From 68df064c1fd485ef4ce64a0162b93b76eb1a8061 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 00:37:17 +0800 Subject: [PATCH 119/162] fix #4160 The split heads should be concatenated in dim=2 Former-commit-id: 4b3f247f270d44df9fe226cfe0dabfb7fcd2deda --- src/llamafactory/model/model_utils/longlora.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py index c8dc52f5..cd468979 100644 --- a/src/llamafactory/model/model_utils/longlora.py +++ b/src/llamafactory/model/model_utils/longlora.py @@ -96,7 +96,8 @@ def llama_attention_forward( ( attn_output[:, :, : self.num_heads // 2], attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1), - ) + ), + dim=2, ) attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) @@ -194,7 +195,8 @@ def llama_flash_attention_2_forward( ( attn_output[:, :, : self.num_heads // 2], attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1), - ) + ), + dim=2, ) attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous() @@ -293,7 +295,8 @@ def llama_sdpa_attention_forward( ( attn_output[:, :, : self.num_heads // 2], attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1), - ) + ), + dim=2, ) attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) @@ -303,7 +306,7 @@ def llama_sdpa_attention_forward( def _apply_llama_patch() -> None: - require_version("transformers==4.40.2", "To fix: pip install transformers==4.40.2") + require_version("transformers==4.41.2", "To fix: pip install transformers==4.41.2") LlamaAttention.forward = llama_attention_forward LlamaFlashAttention2.forward = llama_flash_attention_2_forward LlamaSdpaAttention.forward = llama_sdpa_attention_forward From 41eadf54599323b8c64c54cfd848f60972cf0dd0 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 00:44:26 +0800 Subject: [PATCH 120/162] release v0.8.1 Former-commit-id: 875a34f492701d1c644facbe9ede411af2931513 --- src/llamafactory/extras/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index 1d4e43f1..8c34fd96 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -9,7 +9,7 @@ import trl from transformers.utils import is_torch_cuda_available, is_torch_npu_available -VERSION = "0.8.1.dev0" +VERSION = "0.8.1" def print_env() -> None: From e540759f4f31756dd35497b2956cc8c729ebc6ab Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 00:50:53 +0800 Subject: [PATCH 121/162] set dev version Former-commit-id: 16c47cc15226119e33e46ba0f2f6ccb37072257f --- src/llamafactory/extras/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index 8c34fd96..a8cb799d 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -9,7 +9,7 @@ import trl from transformers.utils import is_torch_cuda_available, is_torch_npu_available -VERSION = "0.8.1" +VERSION = "0.8.2.dev0" def print_env() -> None: From 8c7943c4de5c6a4d695407304b2fdf37798caf00 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 01:04:16 +0800 Subject: [PATCH 122/162] tiny fix Former-commit-id: b5e9711ef375cc323fc083e742cccfc974550416 --- src/llamafactory/model/model_utils/longlora.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py index cd468979..4a8c562a 100644 --- a/src/llamafactory/model/model_utils/longlora.py +++ b/src/llamafactory/model/model_utils/longlora.py @@ -182,11 +182,9 @@ def llama_flash_attention_2_forward( query_states, key_states, value_states = shift(query_states), shift(key_states), shift(value_states) if attention_mask is not None: attention_mask = attention_mask[:, :groupsz].repeat(num_groups, 1) - else: - groupsz = q_len attn_output: torch.Tensor = self._flash_attention_forward( - query_states, key_states, value_states, attention_mask, groupsz, dropout=dropout_rate + query_states, key_states, value_states, attention_mask, query_states.size(1), dropout=dropout_rate ) if getattr(self.config, "group_size_ratio", None) and self.training: # shift back From 4f33de245c5e00ea4f4457676ea92779d1ab3533 Mon Sep 17 00:00:00 2001 From: Alfredo Luque Date: Tue, 11 Jun 2024 00:07:06 +0000 Subject: [PATCH 123/162] add manifest so requirements.txt in sdist Former-commit-id: b501a3c56c51786c3006a2aca15a145641a4556c --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..82c51f63 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include LICENSE requirements.txt From b7458a24b6adbc958839b4a27803069055c3fbf4 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 12:48:53 +0800 Subject: [PATCH 124/162] tiny fix Former-commit-id: c4b2e263d9cefbad0fbc5de72422e4ef8edbcb54 --- src/llamafactory/hparams/parser.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index ff1fbf5d..ec5dd62c 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -171,9 +171,6 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if training_args.do_train and model_args.quantization_device_map == "auto": raise ValueError("Cannot use device map for quantized models in training.") - if finetuning_args.use_dora and model_args.use_unsloth: - raise ValueError("Unsloth does not support DoRA.") - if finetuning_args.pure_bf16: if not is_torch_bf16_gpu_available(): raise ValueError("This device does not support `pure_bf16`.") From 95f95bef609cf3f44bd4c939ee9d7201ae4749cb Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 15:38:38 +0800 Subject: [PATCH 125/162] fix #4198 Former-commit-id: 945d2c6cc73542adf9272ebd9aa332ea2c1c7361 --- src/llamafactory/hparams/model_args.py | 12 ++++++++++ src/llamafactory/model/patcher.py | 2 +- src/llamafactory/train/trainer_utils.py | 32 +++++++++++-------------- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 6352a420..71467770 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -1,6 +1,8 @@ from dataclasses import asdict, dataclass, field from typing import Any, Dict, Literal, Optional +from typing_extensions import Self + @dataclass class ModelArguments: @@ -216,3 +218,13 @@ class ModelArguments: def to_dict(self) -> Dict[str, Any]: return asdict(self) + + @classmethod + def copyfrom(cls, old_arg: Self, **kwargs) -> Self: + arg_dict = old_arg.to_dict() + arg_dict.update(**kwargs) + new_arg = cls(**arg_dict) + new_arg.compute_dtype = old_arg.compute_dtype + new_arg.device_map = old_arg.device_map + new_arg.model_max_length = old_arg.model_max_length + return new_arg diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 87c92315..18221a10 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -79,7 +79,7 @@ def patch_config( if "device_map" not in init_kwargs and model_args.device_map: init_kwargs["device_map"] = model_args.device_map - if init_kwargs["device_map"] == "auto": + if init_kwargs.get("device_map", None) == "auto": init_kwargs["offload_folder"] = model_args.offload_folder diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 0ddcdb11..7e9cc881 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -83,15 +83,12 @@ def create_ref_model( The valuehead parameter is randomly initialized since it is useless for PPO training. """ if finetuning_args.ref_model is not None: - ref_model_args_dict = model_args.to_dict() - ref_model_args_dict.update( - dict( - model_name_or_path=finetuning_args.ref_model, - adapter_name_or_path=finetuning_args.ref_model_adapters, - quantization_bit=finetuning_args.ref_model_quantization_bit, - ) + ref_model_args = ModelArguments.copyfrom( + model_args, + model_name_or_path=finetuning_args.ref_model, + adapter_name_or_path=finetuning_args.ref_model_adapters, + quantization_bit=finetuning_args.ref_model_quantization_bit, ) - ref_model_args = ModelArguments(**ref_model_args_dict) ref_finetuning_args = FinetuningArguments() tokenizer = load_tokenizer(ref_model_args)["tokenizer"] ref_model = load_model( @@ -102,9 +99,11 @@ def create_ref_model( if finetuning_args.finetuning_type == "lora": ref_model = None else: - tokenizer = load_tokenizer(model_args)["tokenizer"] + ref_model_args = ModelArguments.copyfrom(model_args) + ref_finetuning_args = FinetuningArguments() + tokenizer = load_tokenizer(ref_model_args)["tokenizer"] ref_model = load_model( - tokenizer, model_args, finetuning_args, is_trainable=False, add_valuehead=add_valuehead + tokenizer, ref_model_args, ref_finetuning_args, is_trainable=False, add_valuehead=add_valuehead ) logger.info("Created reference model from the model itself.") @@ -139,15 +138,12 @@ def create_reward_model( logger.info("Loaded adapter weights of reward model from {}".format(finetuning_args.reward_model)) return None else: - reward_model_args_dict = model_args.to_dict() - reward_model_args_dict.update( - dict( - model_name_or_path=finetuning_args.reward_model, - adapter_name_or_path=finetuning_args.reward_model_adapters, - quantization_bit=finetuning_args.reward_model_quantization_bit, - ) + reward_model_args = ModelArguments.copyfrom( + model_args, + model_name_or_path=finetuning_args.reward_model, + adapter_name_or_path=finetuning_args.reward_model_adapters, + quantization_bit=finetuning_args.reward_model_quantization_bit, ) - reward_model_args = ModelArguments(**reward_model_args_dict) reward_finetuning_args = FinetuningArguments() tokenizer = load_tokenizer(reward_model_args)["tokenizer"] reward_model = load_model( From 3f52c233b1b1c2502e199d4f3a1204215d1bd0cc Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 15:40:21 +0800 Subject: [PATCH 126/162] Update bug-report.yml Former-commit-id: bb022cd867ebf2593e40fc6ba43b768603b129a3 --- .github/ISSUE_TEMPLATE/bug-report.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 1d962200..768adea6 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -38,7 +38,9 @@ body: 请合理使用 Markdown 标签来格式化您的文本。 placeholder: | + ```bash llamafactory-cli train ... + ``` - type: textarea id: expected-behavior From dfac202c7d87551e9eb13fb1a40d47d3c904eb96 Mon Sep 17 00:00:00 2001 From: d <913015993@qq.com> Date: Tue, 11 Jun 2024 16:21:48 +0800 Subject: [PATCH 127/162] =?UTF-8?q?=E7=BB=8F=E8=BF=87=E5=A4=A7=E9=87=8F?= =?UTF-8?q?=E7=9A=84=E5=A2=9E=E9=87=8F=E9=A2=84=E8=AE=AD=E7=BB=83=EF=BC=8C?= =?UTF-8?q?=E8=BF=9B=E8=A1=8C=E5=AF=B9=E6=AF=94=E8=AF=95=E9=AA=8C=EF=BC=8C?= =?UTF-8?q?=E5=8F=91=E7=8E=B0=E8=BF=99=E4=B8=AAbug=EF=BC=9Allama3=E5=9C=A8?= =?UTF-8?q?=E9=A2=84=E8=AE=AD=E7=BB=83=E6=97=B6=E4=BD=BF=E7=94=A8=E7=9A=84?= =?UTF-8?q?tokenizer.eos=5Ftoke=E6=98=AF'<|end=5Fof=5Ftext|>'=20=EF=BC=8C?= =?UTF-8?q?=E8=BF=99=E9=87=8C=E5=9C=A8=E6=AF=8F=E6=9D=A1=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=90=8E=E9=9D=A2=E4=B9=9F=E5=BE=97=E7=94=A8=E8=BF=99=E4=B8=AA?= =?UTF-8?q?=EF=BC=8C=E8=80=8C=E4=B8=8D=E6=98=AF'<|eot=5Fid|>'=EF=BC=8C?= =?UTF-8?q?=E5=90=A6=E5=88=99=E5=BE=88=E5=AE=B9=E6=98=93=E5=AF=BC=E8=87=B4?= =?UTF-8?q?=E4=B8=A5=E9=87=8D=E7=9A=84=E6=80=A7=E8=83=BD=E4=B8=8B=E9=99=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Former-commit-id: ef470561f742b16eaa0f99c4cadecd7c84ce6bd2 --- src/llamafactory/data/processors/pretrain.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index 87727b55..4050f74c 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -12,7 +12,8 @@ def preprocess_pretrain_dataset( examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments" ) -> Dict[str, List[List[int]]]: # build grouped texts with format `X1 X2 X3 ...` if packing is enabled - text_examples = [messages[0]["content"] + tokenizer.eos_token for messages in examples["prompt"]] + eos_token = '<|end_of_text|>' if data_args.template == 'llama3' else tokenizer.eos_token + text_examples = [messages[0]["content"] + eos_token for messages in examples["prompt"]] if not data_args.packing: if data_args.template == "gemma": From a7233181f28bb6e9008c8c67654c04621e8bc8ea Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 16:52:36 +0800 Subject: [PATCH 128/162] fix deepspeed version Former-commit-id: 938a69bb07d4de7d82928ff01c582032162c1480 --- src/llamafactory/model/model_utils/moe.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/llamafactory/model/model_utils/moe.py b/src/llamafactory/model/model_utils/moe.py index e554e45a..8a73c844 100644 --- a/src/llamafactory/model/model_utils/moe.py +++ b/src/llamafactory/model/model_utils/moe.py @@ -1,5 +1,6 @@ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Sequence +import torch from transformers.integrations import is_deepspeed_zero3_enabled from transformers.utils.versions import require_version @@ -10,6 +11,13 @@ if TYPE_CHECKING: from ...hparams import ModelArguments +def _set_z3_leaf_modules(model: "PreTrainedModel", leaf_modules: Sequence["torch.nn.Module"]) -> None: + require_version("deepspeed>=0.13.0", "To fix: pip install deepspeed>=0.13.0") + from deepspeed.utils import set_z3_leaf_modules # type: ignore + + set_z3_leaf_modules(model, leaf_modules) + + def add_z3_leaf_module(model: "PreTrainedModel") -> None: r""" Sets module as a leaf module to skip partitioning in deepspeed zero3. @@ -17,33 +25,30 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None: if not is_deepspeed_zero3_enabled(): return - require_version("deepspeed>=0.13.0", "To fix: pip install deepspeed>=0.13.0") - from deepspeed.utils import set_z3_leaf_modules # type: ignore - if getattr(model.config, "model_type", None) == "dbrx": from transformers.models.dbrx.modeling_dbrx import DbrxFFN - set_z3_leaf_modules(model, [DbrxFFN]) + _set_z3_leaf_modules(model, [DbrxFFN]) if getattr(model.config, "model_type", None) == "jamba": from transformers.models.jamba.modeling_jamba import JambaSparseMoeBlock - set_z3_leaf_modules(model, [JambaSparseMoeBlock]) + _set_z3_leaf_modules(model, [JambaSparseMoeBlock]) if getattr(model.config, "model_type", None) == "jetmoe": from transformers.models.jetmoe.modeling_jetmoe import JetMoeMoA, JetMoeMoE - set_z3_leaf_modules(model, [JetMoeMoA, JetMoeMoE]) + _set_z3_leaf_modules(model, [JetMoeMoA, JetMoeMoE]) if getattr(model.config, "model_type", None) == "mixtral": from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock - set_z3_leaf_modules(model, [MixtralSparseMoeBlock]) + _set_z3_leaf_modules(model, [MixtralSparseMoeBlock]) if getattr(model.config, "model_type", None) == "qwen2moe": from transformers.models.qwen2_moe.modeling_qwen2_moe import Qwen2MoeSparseMoeBlock - set_z3_leaf_modules(model, [Qwen2MoeSparseMoeBlock]) + _set_z3_leaf_modules(model, [Qwen2MoeSparseMoeBlock]) def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None: From 6625bf6b332a52d09be21c0de3b989468d1ecf6c Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Tue, 11 Jun 2024 17:02:14 +0800 Subject: [PATCH 129/162] Update pretrain.py Former-commit-id: e2317b2a84149e39fddfd6366be3de23dfb71f82 --- src/llamafactory/data/processors/pretrain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index 4050f74c..832c987e 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -12,7 +12,7 @@ def preprocess_pretrain_dataset( examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments" ) -> Dict[str, List[List[int]]]: # build grouped texts with format `X1 X2 X3 ...` if packing is enabled - eos_token = '<|end_of_text|>' if data_args.template == 'llama3' else tokenizer.eos_token + eos_token = "<|end_of_text|>" if data_args.template == "llama3" else tokenizer.eos_token text_examples = [messages[0]["content"] + eos_token for messages in examples["prompt"]] if not data_args.packing: From 45712c6251414024413eb2f669214e93c693f8c6 Mon Sep 17 00:00:00 2001 From: Arthur Kim Date: Wed, 12 Jun 2024 16:49:12 +0900 Subject: [PATCH 130/162] Support vllm==0.5.0 Former-commit-id: e7a8ffd7af21bc3759f055033ba2209fa7a1be0e --- src/llamafactory/chat/vllm_engine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index 87ce8684..d096f6eb 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -13,7 +13,10 @@ from .base_engine import BaseEngine, Response if is_vllm_available(): from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams from vllm.lora.request import LoRARequest - from vllm.sequence import MultiModalData + try: + from vllm.multimodal import MultiModalData # vllm==0.5.0 + except ImportError: + from vllm.sequence import MultiModalData # vllm<0.5.0 if TYPE_CHECKING: From f7b66af7aff84177d6310b01e15bf58f2d0ca170 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 12 Jun 2024 16:50:11 +0800 Subject: [PATCH 131/162] fix #4242 Former-commit-id: cf260e7af03f49aa5e3d6daf3b27738ff9b9bcb8 --- Dockerfile | 2 +- src/llamafactory/chat/vllm_engine.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 45849601..3932ff30 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,7 +32,7 @@ RUN EXTRA_PACKAGES="metrics"; \ EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ fi; \ pip install -e .[$EXTRA_PACKAGES] && \ - pip uninstall -y transformer-engine + pip uninstall -y transformer-engine flash-attn # Set up volumes VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index d096f6eb..d2850a6e 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -13,10 +13,11 @@ from .base_engine import BaseEngine, Response if is_vllm_available(): from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams from vllm.lora.request import LoRARequest + try: - from vllm.multimodal import MultiModalData # vllm==0.5.0 + from vllm.multimodal import MultiModalData # type: ignore (for vllm>=0.5.0) except ImportError: - from vllm.sequence import MultiModalData # vllm<0.5.0 + from vllm.sequence import MultiModalData # for vllm<0.5.0 if TYPE_CHECKING: From 8ad6a70a57f2892a748a94db80c7185a3ba8b7d8 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 12 Jun 2024 17:39:12 +0800 Subject: [PATCH 132/162] update readme Former-commit-id: a436aaa83f0cf12c8f404459e5486f9369d538ec --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 35dacd2e..a773a5f4 100644 --- a/README.md +++ b/README.md @@ -443,7 +443,7 @@ docker run -it --gpus=all \ ``` > [!TIP] -> Use Docker Compose to build image via `docker compose up -d`. +> Use Docker Compose to build image via `docker-compose up -d`.
Details about volume diff --git a/README_zh.md b/README_zh.md index 0ddb8b19..7a9cb159 100644 --- a/README_zh.md +++ b/README_zh.md @@ -443,7 +443,7 @@ docker run -it --gpus=all \ ``` > [!TIP] -> 通过 `docker compose up -d` 使用 Docker Compose 构建镜像。 +> 通过 `docker-compose up -d` 使用 Docker Compose 构建镜像。
数据卷详情 From d98900d804b94bc0a972483c2d91c6883547c18d Mon Sep 17 00:00:00 2001 From: hzhaoy Date: Wed, 12 Jun 2024 18:29:03 +0800 Subject: [PATCH 133/162] adapt vllm==0.5.0 Former-commit-id: 02afd9ff64f23e6707ac739ae1269f41bd70c340 --- src/llamafactory/chat/vllm_engine.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index d2850a6e..34126adf 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -1,10 +1,12 @@ import uuid from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union +from packaging import version + from ..data import get_template_and_fix_tokenizer from ..extras.logging import get_logger from ..extras.misc import get_device_count -from ..extras.packages import is_vllm_available +from ..extras.packages import is_vllm_available, _get_package_version from ..model import load_config, load_tokenizer from ..model.model_utils.visual import LlavaMultiModalProjectorForYiVLForVLLM from .base_engine import BaseEngine, Response @@ -14,10 +16,10 @@ if is_vllm_available(): from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams from vllm.lora.request import LoRARequest - try: - from vllm.multimodal import MultiModalData # type: ignore (for vllm>=0.5.0) - except ImportError: - from vllm.sequence import MultiModalData # for vllm<0.5.0 + if _get_package_version("vllm") >= version.parse("0.5.0"): + from vllm.multimodal.image import ImagePixelData + else: + from vllm.sequence import MultiModalData if TYPE_CHECKING: @@ -110,7 +112,10 @@ class VllmEngine(BaseEngine): if self.processor is not None and image is not None: # add image features image_processor: "BaseImageProcessor" = getattr(self.processor, "image_processor") pixel_values = image_processor(image, return_tensors="pt")["pixel_values"] - multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values) + if _get_package_version("vllm") >= version.parse("0.5.0"): + multi_modal_data = ImagePixelData(pixel_values) + else: + multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values) else: multi_modal_data = None From 7d3a9b10b70d1fc1af3c218bb21f7c3fe42b59b9 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 00:07:48 +0800 Subject: [PATCH 134/162] fix docker compose usage Former-commit-id: 59a5bd5d5c8d2a44e2dad26b74e77a45e109c8d6 --- README.md | 10 ++++++++-- README_zh.md | 10 ++++++++-- docker-compose.yml | 5 +++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a773a5f4..65964560 100644 --- a/README.md +++ b/README.md @@ -423,6 +423,8 @@ llamafactory-cli webui ### Build Docker +#### Use Docker + ```bash docker build -f ./Dockerfile \ --build-arg INSTALL_BNB=false \ @@ -442,8 +444,12 @@ docker run -it --gpus=all \ llamafactory:latest ``` -> [!TIP] -> Use Docker Compose to build image via `docker-compose up -d`. +#### Use Docker Compose + +```bash +docker-compose up -d +docker-compose exec -it llamafactory bash +```
Details about volume diff --git a/README_zh.md b/README_zh.md index 7a9cb159..7962a6d1 100644 --- a/README_zh.md +++ b/README_zh.md @@ -423,6 +423,8 @@ llamafactory-cli webui ### 构建 Docker +#### 使用 Docker + ```bash docker build -f ./Dockerfile \ --build-arg INSTALL_BNB=false \ @@ -442,8 +444,12 @@ docker run -it --gpus=all \ llamafactory:latest ``` -> [!TIP] -> 通过 `docker-compose up -d` 使用 Docker Compose 构建镜像。 +#### 使用 Docker Compose + +```bash +docker-compose up -d +docker-compose exec -it llamafactory bash +```
数据卷详情 diff --git a/docker-compose.yml b/docker-compose.yml index b3e4a34d..c5dc34e9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: llamafactory: build: @@ -19,6 +17,9 @@ services: - "7860:7860" - "8000:8000" ipc: host + tty: true + stdin_open: true + command: bash deploy: resources: reservations: From f4c95557609699ff36d483beb49d0f792fbff146 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 00:48:44 +0800 Subject: [PATCH 135/162] fix lint Former-commit-id: b170165679317af2b3f03633afac27661b3deb06 --- README.md | 2 +- README_zh.md | 2 +- src/llamafactory/hparams/finetuning_args.py | 17 ++++++++--------- src/llamafactory/hparams/model_args.py | 12 ++++++++---- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 65964560..994a62c6 100644 --- a/README.md +++ b/README.md @@ -448,7 +448,7 @@ docker run -it --gpus=all \ ```bash docker-compose up -d -docker-compose exec -it llamafactory bash +docker-compose exec llamafactory bash ```
Details about volume diff --git a/README_zh.md b/README_zh.md index 7962a6d1..fa395c6b 100644 --- a/README_zh.md +++ b/README_zh.md @@ -448,7 +448,7 @@ docker run -it --gpus=all \ ```bash docker-compose up -d -docker-compose exec -it llamafactory bash +docker-compose exec llamafactory bash ```
数据卷详情 diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index 08af31e4..facbe792 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import Literal, Optional +from typing import List, Literal, Optional @dataclass @@ -319,20 +319,19 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA return [item.strip() for item in arg.split(",")] return arg - self.freeze_trainable_modules = split_arg(self.freeze_trainable_modules) - self.freeze_extra_modules = split_arg(self.freeze_extra_modules) - self.lora_alpha = self.lora_alpha or self.lora_rank * 2 - self.lora_target = split_arg(self.lora_target) - self.additional_target = split_arg(self.additional_target) - self.galore_target = split_arg(self.galore_target) + self.freeze_trainable_modules: List[str] = split_arg(self.freeze_trainable_modules) + self.freeze_extra_modules: Optional[List[str]] = split_arg(self.freeze_extra_modules) + self.lora_alpha: int = self.lora_alpha or self.lora_rank * 2 + self.lora_target: List[str] = split_arg(self.lora_target) + self.additional_target: Optional[List[str]] = split_arg(self.additional_target) + self.galore_target: List[str] = split_arg(self.galore_target) self.freeze_vision_tower = self.freeze_vision_tower or self.train_mm_proj_only + self.use_ref_model = self.pref_loss not in ["orpo", "simpo"] assert self.finetuning_type in ["lora", "freeze", "full"], "Invalid fine-tuning method." assert self.ref_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization." assert self.reward_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization." - self.use_ref_model = self.pref_loss not in ["orpo", "simpo"] - if self.stage == "ppo" and self.reward_model is None: raise ValueError("`reward_model` is necessary for PPO training.") diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 71467770..359beafd 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -1,9 +1,13 @@ from dataclasses import asdict, dataclass, field -from typing import Any, Dict, Literal, Optional +from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Union from typing_extensions import Self +if TYPE_CHECKING: + import torch + + @dataclass class ModelArguments: r""" @@ -194,9 +198,9 @@ class ModelArguments: ) def __post_init__(self): - self.compute_dtype = None - self.device_map = None - self.model_max_length = None + self.compute_dtype: Optional["torch.dtype"] = None + self.device_map: Optional[Union[str, Dict[str, Any]]] = None + self.model_max_length: Optional[int] = None if self.split_special_tokens and self.use_fast_tokenizer: raise ValueError("`split_special_tokens` is only supported for slow tokenizers.") From 39e3d3fed63a5185806802e0b7de2eaf22451c22 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 01:00:56 +0800 Subject: [PATCH 136/162] add neo-sft dataset Former-commit-id: 34863fa7cb641ceca92e3a2eec914126db537b62 --- README.md | 1 + README_zh.md | 1 + data/dataset_info.json | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/README.md b/README.md index 994a62c6..5bbaf2d7 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia) - [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction) - [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo) +- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) diff --git a/README_zh.md b/README_zh.md index fa395c6b..fb616909 100644 --- a/README_zh.md +++ b/README_zh.md @@ -259,6 +259,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia) - [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction) - [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo) +- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) diff --git a/data/dataset_info.json b/data/dataset_info.json index 8c5cbb45..1d226b3a 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -248,6 +248,10 @@ "ruozhiba_gpt4": { "hf_hub_url": "hfl/ruozhiba_gpt4_turbo" }, + "neo_sft": { + "hf_hub_url": "m-a-p/neo_sft_phase2", + "formatting": "sharegpt" + }, "llava_1k_en": { "hf_hub_url": "BUAADreamer/llava-en-zh-2k", "subset": "en", From 344d1192acfbfb2aba7cbb8bbe45900546618e20 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 01:58:16 +0800 Subject: [PATCH 137/162] clean code Former-commit-id: f54cafd5c7f0383370d1a2f357834a61a97397ce --- src/llamafactory/chat/vllm_engine.py | 12 +++++----- src/llamafactory/extras/packages.py | 22 +++++-------------- .../model/model_utils/attention.py | 7 +++--- src/llamafactory/train/sft/metric.py | 3 ++- 4 files changed, 17 insertions(+), 27 deletions(-) diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index 34126adf..e4c05478 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -1,12 +1,10 @@ import uuid from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union -from packaging import version - from ..data import get_template_and_fix_tokenizer from ..extras.logging import get_logger from ..extras.misc import get_device_count -from ..extras.packages import is_vllm_available, _get_package_version +from ..extras.packages import is_vllm_available, is_vllm_version_greater_than_0_5 from ..model import load_config, load_tokenizer from ..model.model_utils.visual import LlavaMultiModalProjectorForYiVLForVLLM from .base_engine import BaseEngine, Response @@ -16,7 +14,7 @@ if is_vllm_available(): from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams from vllm.lora.request import LoRARequest - if _get_package_version("vllm") >= version.parse("0.5.0"): + if is_vllm_version_greater_than_0_5(): from vllm.multimodal.image import ImagePixelData else: from vllm.sequence import MultiModalData @@ -112,9 +110,9 @@ class VllmEngine(BaseEngine): if self.processor is not None and image is not None: # add image features image_processor: "BaseImageProcessor" = getattr(self.processor, "image_processor") pixel_values = image_processor(image, return_tensors="pt")["pixel_values"] - if _get_package_version("vllm") >= version.parse("0.5.0"): - multi_modal_data = ImagePixelData(pixel_values) - else: + if is_vllm_version_greater_than_0_5(): + multi_modal_data = ImagePixelData(image=pixel_values) + else: # TODO: remove vllm 0.4.3 support multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values) else: multi_modal_data = None diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py index 4c9e6492..0746bb4f 100644 --- a/src/llamafactory/extras/packages.py +++ b/src/llamafactory/extras/packages.py @@ -1,5 +1,6 @@ import importlib.metadata import importlib.util +from functools import lru_cache from typing import TYPE_CHECKING from packaging import version @@ -24,10 +25,6 @@ def is_fastapi_available(): return _is_package_available("fastapi") -def is_flash_attn2_available(): - return _is_package_available("flash_attn") and _get_package_version("flash_attn") > version.parse("2.0.0") - - def is_galore_available(): return _is_package_available("galore_torch") @@ -36,18 +33,10 @@ def is_gradio_available(): return _is_package_available("gradio") -def is_jieba_available(): - return _is_package_available("jieba") - - def is_matplotlib_available(): return _is_package_available("matplotlib") -def is_nltk_available(): - return _is_package_available("nltk") - - def is_pillow_available(): return _is_package_available("PIL") @@ -60,10 +49,6 @@ def is_rouge_available(): return _is_package_available("rouge_chinese") -def is_sdpa_available(): - return _get_package_version("torch") > version.parse("2.1.1") - - def is_starlette_available(): return _is_package_available("sse_starlette") @@ -74,3 +59,8 @@ def is_uvicorn_available(): def is_vllm_available(): return _is_package_available("vllm") + + +@lru_cache +def is_vllm_version_greater_than_0_5(): + return _get_package_version("vllm") >= version.parse("0.5.0") diff --git a/src/llamafactory/model/model_utils/attention.py b/src/llamafactory/model/model_utils/attention.py index b52ddc86..2bd36fdc 100644 --- a/src/llamafactory/model/model_utils/attention.py +++ b/src/llamafactory/model/model_utils/attention.py @@ -1,7 +1,8 @@ from typing import TYPE_CHECKING +from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available + from ...extras.logging import get_logger -from ...extras.packages import is_flash_attn2_available, is_sdpa_available if TYPE_CHECKING: @@ -21,13 +22,13 @@ def configure_attn_implementation(config: "PretrainedConfig", model_args: "Model requested_attn_implementation = "eager" elif model_args.flash_attn == "sdpa": - if not is_sdpa_available(): + if not is_torch_sdpa_available(): logger.warning("torch>=2.1.1 is required for SDPA attention.") return requested_attn_implementation = "sdpa" elif model_args.flash_attn == "fa2": - if not is_flash_attn2_available(): + if not is_flash_attn_2_available(): logger.warning("FlashAttention-2 is not installed.") return diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py index b135fcfb..6ed356c1 100644 --- a/src/llamafactory/train/sft/metric.py +++ b/src/llamafactory/train/sft/metric.py @@ -2,9 +2,10 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Dict, Sequence, Tuple, Union import numpy as np +from transformers.utils import is_jieba_available, is_nltk_available from ...extras.constants import IGNORE_INDEX -from ...extras.packages import is_jieba_available, is_nltk_available, is_rouge_available +from ...extras.packages import is_rouge_available if TYPE_CHECKING: From 045cef901ea3fb1516f242ae0b9faac9a99185df Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 02:25:50 +0800 Subject: [PATCH 138/162] fix #4209 DeepSpeed ZeRO3 has inflight param error when calling model.eval() Former-commit-id: 4be013f18ea6a35b5a11db98db5f0670ffb41619 --- src/llamafactory/train/dpo/trainer.py | 7 +++++-- src/llamafactory/train/kto/trainer.py | 7 +++++-- src/llamafactory/train/ppo/trainer.py | 2 ++ src/llamafactory/train/trainer_utils.py | 13 ------------- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index d860b29a..5bdb9c43 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -1,3 +1,4 @@ +import warnings from collections import defaultdict from contextlib import nullcontext from types import MethodType @@ -10,7 +11,7 @@ from trl import DPOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps, get_ref_context +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps if TYPE_CHECKING: @@ -61,6 +62,8 @@ class CustomDPOTrainer(DPOTrainer): if not hasattr(self, "accelerator"): raise AttributeError("Please update `transformers`.") + warnings.simplefilter("ignore") # remove gc warnings on ref model + if ref_model is not None: if self.is_deepspeed_enabled: if not ( @@ -176,7 +179,7 @@ class CustomDPOTrainer(DPOTrainer): if self.ref_model is None: ref_model = model - ref_context = get_ref_context(self.accelerator, model) + ref_context = self.accelerator.unwrap_model(model).disable_adapter() else: ref_model = self.ref_model ref_context = nullcontext() diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 22a84e4a..3b4488fc 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -1,3 +1,4 @@ +import warnings from collections import defaultdict from contextlib import nullcontext from types import MethodType @@ -9,7 +10,7 @@ from trl import KTOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps, get_ref_context +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps if TYPE_CHECKING: @@ -60,6 +61,8 @@ class CustomKTOTrainer(KTOTrainer): if not hasattr(self, "accelerator"): raise AttributeError("Please update `transformers`.") + warnings.simplefilter("ignore") # remove gc warnings on ref model + if ref_model is not None: if self.is_deepspeed_enabled: if not ( @@ -143,7 +146,7 @@ class CustomKTOTrainer(KTOTrainer): """ if self.ref_model is None: ref_model = model - ref_context = get_ref_context(self.accelerator, model) + ref_context = self.accelerator.unwrap_model(model).disable_adapter() else: ref_model = self.ref_model ref_context = nullcontext() diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 2e1288e4..737c45a3 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -1,6 +1,7 @@ import math import os import sys +import warnings from types import MethodType from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple @@ -136,6 +137,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): device_type = unwrapped_model.pretrained_model.device.type self.amp_context = torch.autocast(device_type, dtype=model_args.compute_dtype) + warnings.simplefilter("ignore") # remove gc warnings on ref model if finetuning_args.reward_model_type == "full": if self.is_deepspeed_enabled: diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 7e9cc881..48944a63 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -1,4 +1,3 @@ -from contextlib import contextmanager from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import torch @@ -19,7 +18,6 @@ if is_galore_available(): if TYPE_CHECKING: - from accelerate import Accelerator from transformers import PreTrainedModel, Seq2SeqTrainingArguments from trl import AutoModelForCausalLMWithValueHead @@ -154,17 +152,6 @@ def create_reward_model( return reward_model -@contextmanager -def get_ref_context(accelerator: "Accelerator", model: "PreTrainedModel"): - r""" - Gets adapter context for the reference model. - """ - with accelerator.unwrap_model(model).disable_adapter(): - model.eval() - yield - model.train() - - def _get_decay_parameter_names(model: "PreTrainedModel") -> List[str]: r""" Returns a list of names of parameters with weight decay. (weights in non-layernorm layers) From e8885443a9edae3452c9fd26cfc0e1feef1c2fe5 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 02:48:21 +0800 Subject: [PATCH 139/162] fix #4221 Former-commit-id: 05a3be4853b941909e7d193c31e8d62c8c5f879b --- src/llamafactory/data/aligner.py | 8 ++++++-- src/llamafactory/data/loader.py | 11 ++++++----- src/llamafactory/data/preprocess.py | 3 +-- src/llamafactory/data/processors/feedback.py | 3 +-- src/llamafactory/data/processors/pairwise.py | 3 +-- src/llamafactory/data/processors/pretrain.py | 2 +- src/llamafactory/data/processors/supervised.py | 3 +-- src/llamafactory/data/processors/unsupervised.py | 3 +-- src/llamafactory/train/sft/metric.py | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/llamafactory/data/aligner.py b/src/llamafactory/data/aligner.py index 434956af..3e9d5c46 100644 --- a/src/llamafactory/data/aligner.py +++ b/src/llamafactory/data/aligner.py @@ -10,6 +10,7 @@ from .data_utils import Role if TYPE_CHECKING: from datasets import Dataset, IterableDataset + from transformers import Seq2SeqTrainingArguments from ..hparams import DataArguments from .parser import DatasetAttr @@ -175,7 +176,10 @@ def convert_sharegpt( def align_dataset( - dataset: Union["Dataset", "IterableDataset"], dataset_attr: "DatasetAttr", data_args: "DataArguments" + dataset: Union["Dataset", "IterableDataset"], + dataset_attr: "DatasetAttr", + data_args: "DataArguments", + training_args: "Seq2SeqTrainingArguments", ) -> Union["Dataset", "IterableDataset"]: r""" Aligned dataset: @@ -208,7 +212,7 @@ def align_dataset( if not data_args.streaming: kwargs = dict( num_proc=data_args.preprocessing_num_workers, - load_from_cache_file=(not data_args.overwrite_cache), + load_from_cache_file=(not data_args.overwrite_cache) or (training_args.local_process_index != 0), desc="Converting format of dataset", ) diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 2c236c76..ba426f81 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -18,8 +18,7 @@ from .template import get_template_and_fix_tokenizer if TYPE_CHECKING: from datasets import Dataset, IterableDataset - from transformers import ProcessorMixin, Seq2SeqTrainingArguments - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer, ProcessorMixin, Seq2SeqTrainingArguments from ..hparams import DataArguments, ModelArguments from .parser import DatasetAttr @@ -32,6 +31,7 @@ def load_single_dataset( dataset_attr: "DatasetAttr", model_args: "ModelArguments", data_args: "DataArguments", + training_args: "Seq2SeqTrainingArguments", ) -> Union["Dataset", "IterableDataset"]: logger.info("Loading dataset {}...".format(dataset_attr)) data_path, data_name, data_dir, data_files = None, None, None, None @@ -123,7 +123,7 @@ def load_single_dataset( max_samples = min(data_args.max_samples, len(dataset)) dataset = dataset.select(range(max_samples)) - return align_dataset(dataset, dataset_attr, data_args) + return align_dataset(dataset, dataset_attr, data_args, training_args) def get_dataset( @@ -157,7 +157,8 @@ def get_dataset( if (stage == "rm" and dataset_attr.ranking is False) or (stage != "rm" and dataset_attr.ranking is True): raise ValueError("The dataset is not applicable in the current training stage.") - all_datasets.append(load_single_dataset(dataset_attr, model_args, data_args)) + all_datasets.append(load_single_dataset(dataset_attr, model_args, data_args, training_args)) + dataset = merge_dataset(all_datasets, data_args, training_args) with training_args.main_process_first(desc="pre-process dataset"): @@ -169,7 +170,7 @@ def get_dataset( if not data_args.streaming: kwargs = dict( num_proc=data_args.preprocessing_num_workers, - load_from_cache_file=(not data_args.overwrite_cache), + load_from_cache_file=(not data_args.overwrite_cache) or (training_args.local_process_index != 0), desc="Running tokenizer on dataset", ) diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py index 97789c39..875f55d6 100644 --- a/src/llamafactory/data/preprocess.py +++ b/src/llamafactory/data/preprocess.py @@ -13,8 +13,7 @@ from .processors.unsupervised import preprocess_unsupervised_dataset, print_unsu if TYPE_CHECKING: - from transformers import ProcessorMixin, Seq2SeqTrainingArguments - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer, ProcessorMixin, Seq2SeqTrainingArguments from ..hparams import DataArguments from .template import Template diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py index 98d83658..5fba452c 100644 --- a/src/llamafactory/data/processors/feedback.py +++ b/src/llamafactory/data/processors/feedback.py @@ -6,8 +6,7 @@ from .processor_utils import get_paligemma_token_type_ids, get_pixel_values if TYPE_CHECKING: - from transformers import ProcessorMixin - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer, ProcessorMixin from ...hparams import DataArguments from ..template import Template diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py index fe984efa..db52c6a7 100644 --- a/src/llamafactory/data/processors/pairwise.py +++ b/src/llamafactory/data/processors/pairwise.py @@ -6,8 +6,7 @@ from .processor_utils import get_paligemma_token_type_ids, get_pixel_values if TYPE_CHECKING: - from transformers import ProcessorMixin - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer, ProcessorMixin from ...hparams import DataArguments from ..template import Template diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index 832c987e..a10ccabd 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Any, Dict, List if TYPE_CHECKING: - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer from ...hparams import DataArguments diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index 19d60280..f59f5371 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -7,8 +7,7 @@ from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, gre if TYPE_CHECKING: - from transformers import ProcessorMixin - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer, ProcessorMixin from ...hparams import DataArguments from ..template import Template diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py index f711eeac..38497a15 100644 --- a/src/llamafactory/data/processors/unsupervised.py +++ b/src/llamafactory/data/processors/unsupervised.py @@ -6,8 +6,7 @@ from .processor_utils import get_paligemma_token_type_ids, get_pixel_values if TYPE_CHECKING: - from transformers import ProcessorMixin - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer, ProcessorMixin from ...hparams import DataArguments from ..template import Template diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py index 6ed356c1..923238d6 100644 --- a/src/llamafactory/train/sft/metric.py +++ b/src/llamafactory/train/sft/metric.py @@ -9,7 +9,7 @@ from ...extras.packages import is_rouge_available if TYPE_CHECKING: - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer if is_jieba_available(): From 554c84f8d3af480336deb0b3649fde8cb5da766f Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 03:15:06 +0800 Subject: [PATCH 140/162] update examples Former-commit-id: 19681f93db399d695aa8e35f8ec2a9e720875baa --- README.md | 2 +- README_zh.md | 2 +- examples/README.md | 126 ++++++++--------- examples/README_zh.md | 128 ++++++++---------- .../extras/fsdp_qlora/llama3_lora_sft.yaml | 4 +- .../extras/llama_pro/llama3_freeze_sft.yaml | 1 + examples/extras/loraplus/llama3_lora_sft.yaml | 1 + examples/extras/mod/llama3_full_sft.yaml | 1 + examples/lora_multi_gpu/llama3_lora_sft.yaml | 41 ------ .../llama3_full_predict.yaml | 0 .../llama3_full_sft_ds3.yaml} | 0 .../llama3_lora_dpo.yaml | 1 + .../llama3_lora_eval.yaml | 0 .../llama3_lora_kto.yaml | 2 + .../llama3_lora_ppo.yaml | 1 + .../llama3_lora_predict.yaml | 1 + .../llama3_lora_pretrain.yaml | 1 + .../llama3_lora_reward.yaml | 1 + .../llama3_lora_sft.yaml | 1 + .../llama3_lora_sft_ds0.yaml} | 4 +- .../llama3_lora_sft_ds3.yaml} | 4 +- .../llama3_preprocess.yaml | 0 .../llava1_5_lora_sft.yaml | 1 + .../llama3_lora_sft_aqlm.yaml | 1 + .../llama3_lora_sft_awq.yaml | 1 + .../llama3_lora_sft_bitsandbytes.yaml | 1 + .../llama3_lora_sft_gptq.yaml | 1 + 27 files changed, 128 insertions(+), 199 deletions(-) delete mode 100644 examples/lora_multi_gpu/llama3_lora_sft.yaml rename examples/{full_multi_gpu => train_full}/llama3_full_predict.yaml (100%) rename examples/{full_multi_gpu/llama3_full_sft.yaml => train_full/llama3_full_sft_ds3.yaml} (100%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_dpo.yaml (96%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_eval.yaml (100%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_kto.yaml (94%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_ppo.yaml (96%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_predict.yaml (95%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_pretrain.yaml (96%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_reward.yaml (96%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_sft.yaml (96%) rename examples/{lora_multi_npu/llama3_lora_sft_ds.yaml => train_lora/llama3_lora_sft_ds0.yaml} (98%) rename examples/{lora_multi_gpu/llama3_lora_sft_ds.yaml => train_lora/llama3_lora_sft_ds3.yaml} (98%) rename examples/{lora_single_gpu => train_lora}/llama3_preprocess.yaml (100%) rename examples/{lora_single_gpu => train_lora}/llava1_5_lora_sft.yaml (96%) rename examples/{qlora_single_gpu => train_qlora}/llama3_lora_sft_aqlm.yaml (96%) rename examples/{qlora_single_gpu => train_qlora}/llama3_lora_sft_awq.yaml (96%) rename examples/{qlora_single_gpu => train_qlora}/llama3_lora_sft_bitsandbytes.yaml (96%) rename examples/{qlora_single_gpu => train_qlora}/llama3_lora_sft_gptq.yaml (96%) diff --git a/README.md b/README.md index 5bbaf2d7..5dd10d5a 100644 --- a/README.md +++ b/README.md @@ -406,7 +406,7 @@ Please refer to [data/README.md](data/README.md) for checking the details about Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively. ```bash -llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml llamafactory-cli chat examples/inference/llama3_lora_sft.yaml llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` diff --git a/README_zh.md b/README_zh.md index fb616909..76bd2d89 100644 --- a/README_zh.md +++ b/README_zh.md @@ -406,7 +406,7 @@ Docker 镜像: 下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。 ```bash -llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml llamafactory-cli chat examples/inference/llama3_lora_sft.yaml llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` diff --git a/examples/README.md b/examples/README.md index f985d552..3372afb9 100644 --- a/examples/README.md +++ b/examples/README.md @@ -4,59 +4,57 @@ Make sure to execute these commands in the `LLaMA-Factory` directory. ## Table of Contents -- [LoRA Fine-Tuning on A Single GPU](#lora-fine-tuning-on-a-single-gpu) -- [QLoRA Fine-Tuning on a Single GPU](#qlora-fine-tuning-on-a-single-gpu) -- [LoRA Fine-Tuning on Multiple GPUs](#lora-fine-tuning-on-multiple-gpus) -- [LoRA Fine-Tuning on Multiple NPUs](#lora-fine-tuning-on-multiple-npus) -- [Full-Parameter Fine-Tuning on Multiple GPUs](#full-parameter-fine-tuning-on-multiple-gpus) +- [LoRA Fine-Tuning](#lora-fine-tuning) +- [QLoRA Fine-Tuning](#qlora-fine-tuning) +- [Full-Parameter Fine-Tuning](#full-parameter-fine-tuning) - [Merging LoRA Adapters and Quantization](#merging-lora-adapters-and-quantization) - [Inferring LoRA Fine-Tuned Models](#inferring-lora-fine-tuned-models) - [Extras](#extras) ## Examples -### LoRA Fine-Tuning on A Single GPU +### LoRA Fine-Tuning #### (Continuous) Pre-Training ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_pretrain.yaml +llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml ``` #### Supervised Fine-Tuning ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml ``` #### Multimodal Supervised Fine-Tuning ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml +llamafactory-cli train examples/train_lora/llava1_5_lora_sft.yaml ``` #### Reward Modeling ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_reward.yaml +llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml ``` #### PPO Training ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_ppo.yaml +llamafactory-cli train examples/train_lora/llama3_lora_ppo.yaml ``` #### DPO/ORPO/SimPO Training ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_dpo.yaml +llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml ``` #### KTO Training ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_kto.yaml +llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml ``` #### Preprocess Dataset @@ -64,95 +62,79 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo It is useful for large dataset, use `tokenized_path` in config to load the preprocessed dataset. ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_preprocess.yaml +llamafactory-cli train examples/train_lora/llama3_preprocess.yaml ``` #### Evaluating on MMLU/CMMLU/C-Eval Benchmarks ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli eval examples/lora_single_gpu/llama3_lora_eval.yaml +llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml ``` #### Batch Predicting and Computing BLEU and ROUGE Scores ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_predict.yaml -``` - -### QLoRA Fine-Tuning on a Single GPU - -#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes Quantization (Recommended) - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml -``` - -#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml -``` - -#### Supervised Fine-Tuning with 4-bit AWQ Quantization - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_awq.yaml -``` - -#### Supervised Fine-Tuning with 2-bit AQLM Quantization - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml -``` - -### LoRA Fine-Tuning on Multiple GPUs - -#### Supervised Fine-Tuning on Single Node - -```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml ``` #### Supervised Fine-Tuning on Multiple Nodes ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml ``` #### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding) ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft_ds.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds.yaml ``` -### LoRA Fine-Tuning on Multiple NPUs +### QLoRA Fine-Tuning -#### Supervised Fine-Tuning with DeepSpeed ZeRO-0 +#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes Quantization (Recommended) ```bash -ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml ``` -### Full-Parameter Fine-Tuning on Multiple GPUs +#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization + +```bash +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml +``` + +#### Supervised Fine-Tuning with 4-bit AWQ Quantization + +```bash +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml +``` + +#### Supervised Fine-Tuning with 2-bit AQLM Quantization + +```bash +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml +``` + +### Full-Parameter Fine-Tuning #### Supervised Fine-Tuning on Single Node ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml ``` #### Supervised Fine-Tuning on Multiple Nodes ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml ``` #### Batch Predicting and Computing BLEU and ROUGE Scores ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_predict.yaml +llamafactory-cli train examples/train_full/llama3_full_predict.yaml ``` ### Merging LoRA Adapters and Quantization @@ -162,35 +144,33 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llam Note: DO NOT use quantized model or `quantization_bit` when merging LoRA adapters. ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` #### Quantizing Model using AutoGPTQ ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_gptq.yaml +llamafactory-cli export examples/merge_lora/llama3_gptq.yaml ``` ### Inferring LoRA Fine-Tuned Models -Use `CUDA_VISIBLE_DEVICES=0,1` to infer models on multiple devices. - #### Use CLI ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml +llamafactory-cli chat examples/inference/llama3_lora_sft.yaml ``` #### Use Web UI ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml +llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml ``` #### Launch OpenAI-style API ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.yaml +llamafactory-cli api examples/inference/llama3_lora_sft.yaml ``` ### Extras @@ -198,32 +178,32 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.y #### Full-Parameter Fine-Tuning using GaLore ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml +llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml ``` #### Full-Parameter Fine-Tuning using BAdam ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml +llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml ``` #### LoRA+ Fine-Tuning ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml +llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml ``` #### Mixture-of-Depths Fine-Tuning ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml +llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml ``` #### LLaMA-Pro Fine-Tuning ```bash bash examples/extras/llama_pro/expand.sh -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml +llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml ``` #### FSDP+QLoRA Fine-Tuning diff --git a/examples/README_zh.md b/examples/README_zh.md index cf5bbf49..64c31fbd 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -4,59 +4,57 @@ ## 目录 -- [单 GPU LoRA 微调](#单-gpu-lora-微调) -- [单 GPU QLoRA 微调](#单-gpu-qlora-微调) -- [多 GPU LoRA 微调](#多-gpu-lora-微调) -- [多 NPU LoRA 微调](#多-npu-lora-微调) -- [多 GPU 全参数微调](#多-gpu-全参数微调) +- [LoRA 微调](#lora-微调) +- [QLoRA 微调](#qlora-微调) +- [全参数微调](#全参数微调) - [合并 LoRA 适配器与模型量化](#合并-lora-适配器与模型量化) - [推理 LoRA 模型](#推理-lora-模型) - [杂项](#杂项) ## 示例 -### 单 GPU LoRA 微调 +### LoRA 微调 #### (增量)预训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_pretrain.yaml +llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml ``` #### 指令监督微调 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml ``` #### 多模态指令监督微调 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml +llamafactory-cli train examples/train_lora/llava1_5_lora_sft.yaml ``` #### 奖励模型训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_reward.yaml +llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml ``` #### PPO 训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_ppo.yaml +llamafactory-cli train examples/train_lora/llama3_lora_ppo.yaml ``` #### DPO/ORPO/SimPO 训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_dpo.yaml +llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml ``` #### KTO 训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_kto.yaml +llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml ``` #### 预处理数据集 @@ -64,95 +62,79 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo 对于大数据集有帮助,在配置中使用 `tokenized_path` 以加载预处理后的数据集。 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_preprocess.yaml +llamafactory-cli train examples/train_lora/llama3_preprocess.yaml ``` #### 在 MMLU/CMMLU/C-Eval 上评估 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli eval examples/lora_single_gpu/llama3_lora_eval.yaml +llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml ``` #### 批量预测并计算 BLEU 和 ROUGE 分数 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_predict.yaml +llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml ``` -### 单 GPU QLoRA 微调 - -#### 基于 4/8 比特 Bitsandbytes 量化进行指令监督微调(推荐) +#### 多机指令监督微调 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml -``` - -#### 基于 4/8 比特 GPTQ 量化进行指令监督微调 - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml -``` - -#### 基于 4 比特 AWQ 量化进行指令监督微调 - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_awq.yaml -``` - -#### 基于 2 比特 AQLM 量化进行指令监督微调 - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml -``` - -### 多 GPU LoRA 微调 - -#### 在单机上进行指令监督微调 - -```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml -``` - -#### 在多机上进行指令监督微调 - -```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml ``` #### 使用 DeepSpeed ZeRO-3 平均分配显存 ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft_ds.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds.yaml ``` -### 多 NPU LoRA 微调 +### QLoRA 微调 -#### 使用 DeepSpeed ZeRO-0 进行指令监督微调 +#### 基于 4/8 比特 Bitsandbytes 量化进行指令监督微调(推荐) ```bash -ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml +llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml ``` -### 多 GPU 全参数微调 +#### 基于 4/8 比特 GPTQ 量化进行指令监督微调 + +```bash +llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml +``` + +#### 基于 4 比特 AWQ 量化进行指令监督微调 + +```bash +llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml +``` + +#### 基于 2 比特 AQLM 量化进行指令监督微调 + +```bash +llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml +``` + +### 全参数微调 #### 在单机上进行指令监督微调 ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml ``` #### 在多机上进行指令监督微调 ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml ``` #### 批量预测并计算 BLEU 和 ROUGE 分数 ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_predict.yaml +llamafactory-cli train examples/train_full/llama3_full_predict.yaml ``` ### 合并 LoRA 适配器与模型量化 @@ -162,35 +144,33 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llam 注:请勿使用量化后的模型或 `quantization_bit` 参数来合并 LoRA 适配器。 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` #### 使用 AutoGPTQ 量化模型 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_gptq.yaml +llamafactory-cli export examples/merge_lora/llama3_gptq.yaml ``` ### 推理 LoRA 模型 -使用 `CUDA_VISIBLE_DEVICES=0,1` 进行多卡推理。 - #### 使用命令行接口 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml +llamafactory-cli chat examples/inference/llama3_lora_sft.yaml ``` #### 使用浏览器界面 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml +llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml ``` #### 启动 OpenAI 风格 API ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.yaml +llamafactory-cli api examples/inference/llama3_lora_sft.yaml ``` ### 杂项 @@ -198,32 +178,32 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.y #### 使用 GaLore 进行全参数训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml +llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml ``` #### 使用 BAdam 进行全参数训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml +llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml ``` #### LoRA+ 微调 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml +llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml ``` #### 深度混合微调 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml +llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml ``` #### LLaMA-Pro 微调 ```bash bash examples/extras/llama_pro/expand.sh -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml +llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml ``` #### FSDP+QLoRA 微调 diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml index 084269ef..cc773991 100644 --- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml +++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml @@ -8,9 +8,6 @@ do_train: true finetuning_type: lora lora_target: all -### ddp -ddp_timeout: 180000000 - ### dataset dataset: identity,alpaca_en_demo template: llama3 @@ -34,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/extras/llama_pro/llama3_freeze_sft.yaml b/examples/extras/llama_pro/llama3_freeze_sft.yaml index 444a1113..f92d6945 100644 --- a/examples/extras/llama_pro/llama3_freeze_sft.yaml +++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml @@ -32,6 +32,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml index 1ba654ec..57383ae0 100644 --- a/examples/extras/loraplus/llama3_lora_sft.yaml +++ b/examples/extras/loraplus/llama3_lora_sft.yaml @@ -31,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/extras/mod/llama3_full_sft.yaml b/examples/extras/mod/llama3_full_sft.yaml index df03c1e0..085febfc 100644 --- a/examples/extras/mod/llama3_full_sft.yaml +++ b/examples/extras/mod/llama3_full_sft.yaml @@ -31,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 pure_bf16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_multi_gpu/llama3_lora_sft.yaml b/examples/lora_multi_gpu/llama3_lora_sft.yaml deleted file mode 100644 index 348e53b9..00000000 --- a/examples/lora_multi_gpu/llama3_lora_sft.yaml +++ /dev/null @@ -1,41 +0,0 @@ -### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct - -### method -stage: sft -do_train: true -finetuning_type: lora -lora_target: all - -### ddp -ddp_timeout: 180000000 - -### dataset -dataset: identity,alpaca_en_demo -template: llama3 -cutoff_len: 1024 -max_samples: 1000 -overwrite_cache: true -preprocessing_num_workers: 16 - -### output -output_dir: saves/llama3-8b/lora/sft -logging_steps: 10 -save_steps: 500 -plot_loss: true -overwrite_output_dir: true - -### train -per_device_train_batch_size: 1 -gradient_accumulation_steps: 2 -learning_rate: 1.0e-4 -num_train_epochs: 3.0 -lr_scheduler_type: cosine -warmup_ratio: 0.1 -fp16: true - -### eval -val_size: 0.1 -per_device_eval_batch_size: 1 -eval_strategy: steps -eval_steps: 500 diff --git a/examples/full_multi_gpu/llama3_full_predict.yaml b/examples/train_full/llama3_full_predict.yaml similarity index 100% rename from examples/full_multi_gpu/llama3_full_predict.yaml rename to examples/train_full/llama3_full_predict.yaml diff --git a/examples/full_multi_gpu/llama3_full_sft.yaml b/examples/train_full/llama3_full_sft_ds3.yaml similarity index 100% rename from examples/full_multi_gpu/llama3_full_sft.yaml rename to examples/train_full/llama3_full_sft_ds3.yaml diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/train_lora/llama3_lora_dpo.yaml similarity index 96% rename from examples/lora_single_gpu/llama3_lora_dpo.yaml rename to examples/train_lora/llama3_lora_dpo.yaml index 78344330..db25fb51 100644 --- a/examples/lora_single_gpu/llama3_lora_dpo.yaml +++ b/examples/train_lora/llama3_lora_dpo.yaml @@ -32,6 +32,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_single_gpu/llama3_lora_eval.yaml b/examples/train_lora/llama3_lora_eval.yaml similarity index 100% rename from examples/lora_single_gpu/llama3_lora_eval.yaml rename to examples/train_lora/llama3_lora_eval.yaml diff --git a/examples/lora_single_gpu/llama3_lora_kto.yaml b/examples/train_lora/llama3_lora_kto.yaml similarity index 94% rename from examples/lora_single_gpu/llama3_lora_kto.yaml rename to examples/train_lora/llama3_lora_kto.yaml index d5234c0a..f730c82e 100644 --- a/examples/lora_single_gpu/llama3_lora_kto.yaml +++ b/examples/train_lora/llama3_lora_kto.yaml @@ -6,6 +6,7 @@ stage: kto do_train: true finetuning_type: lora lora_target: all +pref_beta: 0.1 ### dataset dataset: kto_en_demo @@ -30,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_single_gpu/llama3_lora_ppo.yaml b/examples/train_lora/llama3_lora_ppo.yaml similarity index 96% rename from examples/lora_single_gpu/llama3_lora_ppo.yaml rename to examples/train_lora/llama3_lora_ppo.yaml index 98c842f9..e574014e 100644 --- a/examples/lora_single_gpu/llama3_lora_ppo.yaml +++ b/examples/train_lora/llama3_lora_ppo.yaml @@ -31,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### generate max_new_tokens: 512 diff --git a/examples/lora_single_gpu/llama3_lora_predict.yaml b/examples/train_lora/llama3_lora_predict.yaml similarity index 95% rename from examples/lora_single_gpu/llama3_lora_predict.yaml rename to examples/train_lora/llama3_lora_predict.yaml index a127d248..148c8635 100644 --- a/examples/lora_single_gpu/llama3_lora_predict.yaml +++ b/examples/train_lora/llama3_lora_predict.yaml @@ -22,3 +22,4 @@ overwrite_output_dir: true ### eval per_device_eval_batch_size: 1 predict_with_generate: true +ddp_timeout: 180000000 diff --git a/examples/lora_single_gpu/llama3_lora_pretrain.yaml b/examples/train_lora/llama3_lora_pretrain.yaml similarity index 96% rename from examples/lora_single_gpu/llama3_lora_pretrain.yaml rename to examples/train_lora/llama3_lora_pretrain.yaml index db435ca9..839b3e51 100644 --- a/examples/lora_single_gpu/llama3_lora_pretrain.yaml +++ b/examples/train_lora/llama3_lora_pretrain.yaml @@ -29,6 +29,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_single_gpu/llama3_lora_reward.yaml b/examples/train_lora/llama3_lora_reward.yaml similarity index 96% rename from examples/lora_single_gpu/llama3_lora_reward.yaml rename to examples/train_lora/llama3_lora_reward.yaml index 1ce42ea4..79559d19 100644 --- a/examples/lora_single_gpu/llama3_lora_reward.yaml +++ b/examples/train_lora/llama3_lora_reward.yaml @@ -30,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_single_gpu/llama3_lora_sft.yaml b/examples/train_lora/llama3_lora_sft.yaml similarity index 96% rename from examples/lora_single_gpu/llama3_lora_sft.yaml rename to examples/train_lora/llama3_lora_sft.yaml index 651b636f..fe30c575 100644 --- a/examples/lora_single_gpu/llama3_lora_sft.yaml +++ b/examples/train_lora/llama3_lora_sft.yaml @@ -30,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml b/examples/train_lora/llama3_lora_sft_ds0.yaml similarity index 98% rename from examples/lora_multi_npu/llama3_lora_sft_ds.yaml rename to examples/train_lora/llama3_lora_sft_ds0.yaml index a0ec8aa1..08b638e6 100644 --- a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml +++ b/examples/train_lora/llama3_lora_sft_ds0.yaml @@ -6,9 +6,6 @@ stage: sft do_train: true finetuning_type: lora lora_target: all - -### ddp -ddp_timeout: 180000000 deepspeed: examples/deepspeed/ds_z0_config.json ### dataset @@ -34,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml b/examples/train_lora/llama3_lora_sft_ds3.yaml similarity index 98% rename from examples/lora_multi_gpu/llama3_lora_sft_ds.yaml rename to examples/train_lora/llama3_lora_sft_ds3.yaml index 1c432fa7..b7266d61 100644 --- a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml +++ b/examples/train_lora/llama3_lora_sft_ds3.yaml @@ -6,9 +6,6 @@ stage: sft do_train: true finetuning_type: lora lora_target: all - -### ddp -ddp_timeout: 180000000 deepspeed: examples/deepspeed/ds_z3_config.json ### dataset @@ -34,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_single_gpu/llama3_preprocess.yaml b/examples/train_lora/llama3_preprocess.yaml similarity index 100% rename from examples/lora_single_gpu/llama3_preprocess.yaml rename to examples/train_lora/llama3_preprocess.yaml diff --git a/examples/lora_single_gpu/llava1_5_lora_sft.yaml b/examples/train_lora/llava1_5_lora_sft.yaml similarity index 96% rename from examples/lora_single_gpu/llava1_5_lora_sft.yaml rename to examples/train_lora/llava1_5_lora_sft.yaml index df510a93..55ac31fa 100644 --- a/examples/lora_single_gpu/llava1_5_lora_sft.yaml +++ b/examples/train_lora/llava1_5_lora_sft.yaml @@ -31,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml b/examples/train_qlora/llama3_lora_sft_aqlm.yaml similarity index 96% rename from examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml rename to examples/train_qlora/llama3_lora_sft_aqlm.yaml index d54d6af6..7b6767d5 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml +++ b/examples/train_qlora/llama3_lora_sft_aqlm.yaml @@ -30,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml b/examples/train_qlora/llama3_lora_sft_awq.yaml similarity index 96% rename from examples/qlora_single_gpu/llama3_lora_sft_awq.yaml rename to examples/train_qlora/llama3_lora_sft_awq.yaml index 5cef178a..a2a26e4b 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml +++ b/examples/train_qlora/llama3_lora_sft_awq.yaml @@ -30,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml b/examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml similarity index 96% rename from examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml rename to examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml index b308dcab..cc773991 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml +++ b/examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml @@ -31,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml b/examples/train_qlora/llama3_lora_sft_gptq.yaml similarity index 96% rename from examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml rename to examples/train_qlora/llama3_lora_sft_gptq.yaml index b950042e..ad3d854c 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml +++ b/examples/train_qlora/llama3_lora_sft_gptq.yaml @@ -30,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 From e06568de988db0f6e44e663bbc8457e8aa0f0507 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 03:16:20 +0800 Subject: [PATCH 141/162] Update llama3_full_sft_ds3.yaml Former-commit-id: e715af62d521112d9c155cfa91fbb42fa0e77710 --- examples/train_full/llama3_full_sft_ds3.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/train_full/llama3_full_sft_ds3.yaml b/examples/train_full/llama3_full_sft_ds3.yaml index 40b62f24..40afd2ee 100644 --- a/examples/train_full/llama3_full_sft_ds3.yaml +++ b/examples/train_full/llama3_full_sft_ds3.yaml @@ -5,9 +5,6 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: sft do_train: true finetuning_type: full - -### ddp -ddp_timeout: 180000000 deepspeed: examples/deepspeed/ds_z3_config.json ### dataset @@ -33,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 From 48d87a2decf4dd4d687ef0d2eec8b93109ad7d98 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 03:19:18 +0800 Subject: [PATCH 142/162] add quant check in webui export tab Former-commit-id: 6455ca07061ae9858cd7bc996b28be1fde697a3d --- src/llamafactory/webui/components/export.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py index 7e1493c8..9d756a38 100644 --- a/src/llamafactory/webui/components/export.py +++ b/src/llamafactory/webui/components/export.py @@ -21,6 +21,13 @@ if TYPE_CHECKING: GPTQ_BITS = ["8", "4", "3", "2"] +def can_quantize(checkpoint_path: Union[str, List[str]]) -> "gr.Dropdown": + if isinstance(checkpoint_path, list) and len(checkpoint_path) != 0: + return gr.Dropdown(value="none", interactive=False) + else: + return gr.Dropdown(interactive=True) + + def save_model( lang: str, model_name: str, @@ -96,6 +103,9 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]: export_dir = gr.Textbox() export_hub_model_id = gr.Textbox() + checkpoint_path: gr.Dropdown = engine.manager.get_elem_by_id("top.checkpoint_path") + checkpoint_path.change(can_quantize, [checkpoint_path], [export_quantization_bit], queue=False) + export_btn = gr.Button() info_box = gr.Textbox(show_label=False, interactive=False) From db6e74884c633df80aedcd245a2dd08532ae37c3 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 03:26:10 +0800 Subject: [PATCH 143/162] update examples Former-commit-id: d6bf6231290d79eb3a63e711f18fa711ef18a4f6 --- examples/README.md | 10 ++++++---- examples/README_zh.md | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/examples/README.md b/examples/README.md index 3372afb9..180d5f7b 100644 --- a/examples/README.md +++ b/examples/README.md @@ -11,6 +11,8 @@ Make sure to execute these commands in the `LLaMA-Factory` directory. - [Inferring LoRA Fine-Tuned Models](#inferring-lora-fine-tuned-models) - [Extras](#extras) +Use `CUDA_VISIBLE_DEVICES` (GPU) or `ASCEND_RT_VISIBLE_DEVICES` (NPU) to choose computing devices. + ## Examples ### LoRA Fine-Tuning @@ -87,7 +89,7 @@ FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llama #### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding) ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml ``` ### QLoRA Fine-Tuning @@ -121,14 +123,14 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_s #### Supervised Fine-Tuning on Single Node ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml ``` #### Supervised Fine-Tuning on Multiple Nodes ```bash -FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml -FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml ``` #### Batch Predicting and Computing BLEU and ROUGE Scores diff --git a/examples/README_zh.md b/examples/README_zh.md index 64c31fbd..b6168a95 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -11,6 +11,8 @@ - [推理 LoRA 模型](#推理-lora-模型) - [杂项](#杂项) +使用 `CUDA_VISIBLE_DEVICES`(GPU)或 `ASCEND_RT_VISIBLE_DEVICES`(NPU)选择计算设备。 + ## 示例 ### LoRA 微调 @@ -87,7 +89,7 @@ FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llama #### 使用 DeepSpeed ZeRO-3 平均分配显存 ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml ``` ### QLoRA 微调 @@ -121,14 +123,14 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml #### 在单机上进行指令监督微调 ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml ``` #### 在多机上进行指令监督微调 ```bash -FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml -FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml ``` #### 批量预测并计算 BLEU 和 ROUGE 分数 From 6306f3e2ead0a80b75997e29bf265710a67c2fd2 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 16:02:21 +0800 Subject: [PATCH 144/162] Update README.md Former-commit-id: f8d701cd3ce2e56f95b4f5439b8b48d5b62e0d2b --- examples/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/README.md b/examples/README.md index 180d5f7b..a6d78936 100644 --- a/examples/README.md +++ b/examples/README.md @@ -97,25 +97,25 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3. #### Supervised Fine-Tuning with 4/8-bit Bitsandbytes Quantization (Recommended) ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml +llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml ``` #### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml +llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml ``` #### Supervised Fine-Tuning with 4-bit AWQ Quantization ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml +llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml ``` #### Supervised Fine-Tuning with 2-bit AQLM Quantization ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml +llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml ``` ### Full-Parameter Fine-Tuning From bbeb3b10aacd38261ab0e352ffd3a35ee0027a38 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 04:05:54 +0800 Subject: [PATCH 145/162] add test cases Former-commit-id: 731176ff34cdf0cbf6b41c40c69f4ceb54c2daf6 --- src/llamafactory/chat/vllm_engine.py | 2 +- src/llamafactory/hparams/model_args.py | 8 +-- src/llamafactory/model/adapter.py | 43 +++++++------ src/llamafactory/model/patcher.py | 5 +- src/llamafactory/train/ppo/trainer.py | 3 +- tests/model/test_base.py | 32 ++++++++++ tests/model/test_freeze.py | 22 ++++++- tests/model/test_full.py | 20 ++++++- tests/model/test_lora.py | 83 +++++++++++++++++++++++++- 9 files changed, 184 insertions(+), 34 deletions(-) create mode 100644 tests/model/test_base.py diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index e4c05478..f0812a99 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -52,7 +52,7 @@ class VllmEngine(BaseEngine): "model": model_args.model_name_or_path, "trust_remote_code": True, "download_dir": model_args.cache_dir, - "dtype": model_args.vllm_dtype, + "dtype": model_args.infer_dtype, "max_model_len": model_args.vllm_maxlen, "tensor_parallel_size": get_device_count() or 1, "gpu_memory_utilization": model_args.vllm_gpu_util, diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 359beafd..bbac2e4b 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -136,10 +136,6 @@ class ModelArguments: default=8, metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."}, ) - vllm_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field( - default="auto", - metadata={"help": "Data type for model weights and activations in the vLLM engine."}, - ) offload_folder: str = field( default="offload", metadata={"help": "Path to offload model weights."}, @@ -148,6 +144,10 @@ class ModelArguments: default=True, metadata={"help": "Whether or not to use KV cache in generation."}, ) + infer_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field( + default="auto", + metadata={"help": "Data type for model weights and activations at inference."} + ) hf_hub_token: Optional[str] = field( default=None, metadata={"help": "Auth token to log in with Hugging Face Hub."}, diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index 34b9eda6..c37f6009 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -25,8 +25,12 @@ def _setup_full_tuning( model: "PreTrainedModel", model_args: "ModelArguments", finetuning_args: "FinetuningArguments", + is_trainable: bool, cast_trainable_params_to_fp32: bool, ) -> None: + if not is_trainable: + return + logger.info("Fine-tuning method: Full") forbidden_modules = set() if model_args.visual_inputs and finetuning_args.freeze_vision_tower: @@ -47,8 +51,12 @@ def _setup_freeze_tuning( model: "PreTrainedModel", model_args: "ModelArguments", finetuning_args: "FinetuningArguments", + is_trainable: bool, cast_trainable_params_to_fp32: bool, ) -> None: + if not is_trainable: + return + logger.info("Fine-tuning method: Freeze") if model_args.visual_inputs: config = model.config.text_config @@ -132,7 +140,9 @@ def _setup_lora_tuning( is_trainable: bool, cast_trainable_params_to_fp32: bool, ) -> "PeftModel": - logger.info("Fine-tuning method: {}".format("DoRA" if finetuning_args.use_dora else "LoRA")) + if is_trainable: + logger.info("Fine-tuning method: {}".format("DoRA" if finetuning_args.use_dora else "LoRA")) + adapter_to_resume = None if model_args.adapter_name_or_path is not None: @@ -173,6 +183,8 @@ def _setup_lora_tuning( offload_folder=model_args.offload_folder, ) + logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path))) + if is_trainable and adapter_to_resume is None: # create new lora weights while training if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all": target_modules = find_all_linear_modules(model, finetuning_args.freeze_vision_tower) @@ -227,9 +239,6 @@ def _setup_lora_tuning( for param in filter(lambda p: p.requires_grad, model.parameters()): param.data = param.data.to(torch.float32) - if model_args.adapter_name_or_path is not None: - logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path))) - return model @@ -247,29 +256,27 @@ def init_adapter( Note that the trainable parameters must be cast to float32. """ - if (not is_trainable) and model_args.adapter_name_or_path is None: - logger.info("Adapter is not found at evaluation, load the base model.") - return model + if is_trainable and getattr(model, "quantization_method", None) and finetuning_args.finetuning_type != "lora": + raise ValueError("Quantized models can only be used for the LoRA tuning.") - if finetuning_args.finetuning_type != "lora" and getattr(model, "quantization_method", None): - raise ValueError("You can only use lora for quantized models.") - - if is_deepspeed_zero3_enabled() or is_fsdp_enabled() or finetuning_args.pure_bf16 or finetuning_args.use_badam: + if not is_trainable: + cast_trainable_params_to_fp32 = False + elif is_deepspeed_zero3_enabled() or is_fsdp_enabled() or finetuning_args.pure_bf16 or finetuning_args.use_badam: logger.info("ZeRO3/FSDP/PureBF16/BAdam detected, remaining trainable params as their original precision.") cast_trainable_params_to_fp32 = False else: logger.info("Upcasting trainable params to float32.") cast_trainable_params_to_fp32 = True - if is_trainable and finetuning_args.finetuning_type == "full": - _setup_full_tuning(model, model_args, finetuning_args, cast_trainable_params_to_fp32) - - if is_trainable and finetuning_args.finetuning_type == "freeze": - _setup_freeze_tuning(model, model_args, finetuning_args, cast_trainable_params_to_fp32) - - if finetuning_args.finetuning_type == "lora": + if finetuning_args.finetuning_type == "full": + _setup_full_tuning(model, model_args, finetuning_args, is_trainable, cast_trainable_params_to_fp32) + elif finetuning_args.finetuning_type == "freeze": + _setup_freeze_tuning(model, model_args, finetuning_args, is_trainable, cast_trainable_params_to_fp32) + elif finetuning_args.finetuning_type == "lora": model = _setup_lora_tuning( config, model, model_args, finetuning_args, is_trainable, cast_trainable_params_to_fp32 ) + else: + raise NotImplementedError("Unknown finetuning type: {}.".format(finetuning_args.finetuning_type)) return model diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 18221a10..b97ff433 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -44,7 +44,10 @@ def patch_config( is_trainable: bool, ) -> None: if model_args.compute_dtype is None: # priority: bf16 > fp16 > fp32 - model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None)) + if model_args.infer_dtype == "auto": + model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None)) + else: + model_args.compute_dtype = getattr(torch, model_args.infer_dtype) if is_torch_npu_available(): use_jit_compile = os.environ.get("JIT_COMPILE", "0").lower() in ["true", "1"] diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 737c45a3..45f47455 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -135,8 +135,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model) self.is_chatglm_model = getattr(unwrapped_model.config, "model_type", None) == "chatglm" - device_type = unwrapped_model.pretrained_model.device.type - self.amp_context = torch.autocast(device_type, dtype=model_args.compute_dtype) + self.amp_context = torch.autocast(self.current_device.type, dtype=self.model_args.compute_dtype) warnings.simplefilter("ignore") # remove gc warnings on ref model if finetuning_args.reward_model_type == "full": diff --git a/tests/model/test_base.py b/tests/model/test_base.py new file mode 100644 index 00000000..32a3918e --- /dev/null +++ b/tests/model/test_base.py @@ -0,0 +1,32 @@ +import os + +import torch +from transformers import AutoModelForCausalLM + +from llamafactory.hparams import get_infer_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") + +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA, + "template": "llama3", + "infer_dtype": "float16", +} + + +def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): + state_dict_a = model_a.state_dict() + state_dict_b = model_b.state_dict() + assert set(state_dict_a.keys()) == set(state_dict_b.keys()) + for name in state_dict_a.keys(): + assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True + + +def test_base(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + ref_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + compare_model(model, ref_model) diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py index 97800696..a0618315 100644 --- a/tests/model/test_freeze.py +++ b/tests/model/test_freeze.py @@ -2,7 +2,7 @@ import os import torch -from llamafactory.hparams import get_train_args +from llamafactory.hparams import get_infer_args, get_train_args from llamafactory.model import load_model, load_tokenizer @@ -23,8 +23,15 @@ TRAIN_ARGS = { "fp16": True, } +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA, + "finetuning_type": "freeze", + "template": "llama3", + "infer_dtype": "float16", +} -def test_freeze_all_modules(): + +def test_freeze_train_all_modules(): model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) @@ -37,7 +44,7 @@ def test_freeze_all_modules(): assert param.dtype == torch.float16 -def test_freeze_extra_modules(): +def test_freeze_train_extra_modules(): model_args, _, _, finetuning_args, _ = get_train_args( {"freeze_trainable_layers": 1, "freeze_extra_modules": "embed_tokens,lm_head", **TRAIN_ARGS} ) @@ -50,3 +57,12 @@ def test_freeze_extra_modules(): else: assert param.requires_grad is False assert param.dtype == torch.float16 + + +def test_freeze_inference(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + for param in model.parameters(): + assert param.requires_grad is False + assert param.dtype == torch.float16 diff --git a/tests/model/test_full.py b/tests/model/test_full.py index 6cb78f37..802b987c 100644 --- a/tests/model/test_full.py +++ b/tests/model/test_full.py @@ -2,7 +2,7 @@ import os import torch -from llamafactory.hparams import get_train_args +from llamafactory.hparams import get_infer_args, get_train_args from llamafactory.model import load_model, load_tokenizer @@ -23,11 +23,27 @@ TRAIN_ARGS = { "fp16": True, } +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA, + "finetuning_type": "full", + "template": "llama3", + "infer_dtype": "float16", +} -def test_full(): + +def test_full_train(): model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) for param in model.parameters(): assert param.requires_grad is True assert param.dtype == torch.float32 + + +def test_full_inference(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + for param in model.parameters(): + assert param.requires_grad is False + assert param.dtype == torch.float16 diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index 2e2b89d9..3e2503f1 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -1,13 +1,18 @@ import os +from typing import Sequence import torch +from peft import LoraModel, PeftModel +from transformers import AutoModelForCausalLM -from llamafactory.hparams import get_train_args +from llamafactory.hparams import get_infer_args, get_train_args from llamafactory.model import load_model, load_tokenizer TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") +TINY_LLAMA_ADAPTER = os.environ.get("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora") + TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", @@ -23,8 +28,32 @@ TRAIN_ARGS = { "fp16": True, } +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA, + "adapter_name_or_path": TINY_LLAMA_ADAPTER, + "finetuning_type": "lora", + "template": "llama3", + "infer_dtype": "float16", +} -def test_lora_all_modules(): + +def load_reference_model() -> "torch.nn.Module": + model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA) + return PeftModel.from_pretrained(model, TINY_LLAMA_ADAPTER) + + +def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_keys: Sequence[str] = []): + state_dict_a = model_a.state_dict() + state_dict_b = model_b.state_dict() + assert set(state_dict_a.keys()) == set(state_dict_b.keys()) + for name in state_dict_a.keys(): + if any(key in name for key in diff_keys): + assert torch.allclose(state_dict_a[name], state_dict_b[name]) is False + else: + assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True + + +def test_lora_train_all_modules(): model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) @@ -41,7 +70,7 @@ def test_lora_all_modules(): assert linear_modules == {"q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"} -def test_lora_extra_modules(): +def test_lora_train_extra_modules(): model_args, _, _, finetuning_args, _ = get_train_args( {"lora_target": "all", "additional_target": "embed_tokens,lm_head", **TRAIN_ARGS} ) @@ -61,3 +90,51 @@ def test_lora_extra_modules(): assert param.dtype == torch.float16 assert extra_modules == {"embed_tokens", "lm_head"} + + +def test_lora_train_old_adapters(): + model_args, _, _, finetuning_args, _ = get_train_args( + {"adapter_name_or_path": TINY_LLAMA_ADAPTER, "create_new_adapter": False, **TRAIN_ARGS} + ) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + + base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) + for param in filter(lambda p: p.requires_grad, ref_model.parameters()): + param.data = param.data.to(torch.float32) + + compare_model(model, ref_model) + + +def test_lora_train_new_adapters(): + model_args, _, _, finetuning_args, _ = get_train_args( + {"adapter_name_or_path": TINY_LLAMA_ADAPTER, "create_new_adapter": True, **TRAIN_ARGS} + ) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + + base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) + for param in filter(lambda p: p.requires_grad, ref_model.parameters()): + param.data = param.data.to(torch.float32) + + compare_model( + model, ref_model, diff_keys=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"] + ) + + +def test_lora_inference(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + + base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER) + ref_model = ref_model.merge_and_unload() + compare_model(model, ref_model) + + for name, param in model.named_parameters(): + assert param.requires_grad is False + assert param.dtype == torch.float16 + assert "lora" not in name From 43cfbde0d181bb6af1abff4e615cc0a034ba5d55 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 04:34:55 +0800 Subject: [PATCH 146/162] fix #4295 Former-commit-id: 08f657868f9d605b837c5d8c2946a25cc05c8735 --- src/llamafactory/train/sft/trainer.py | 10 +++++++--- src/llamafactory/train/sft/workflow.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index c063b214..6bf5b7c0 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -13,6 +13,7 @@ from ..trainer_utils import create_custom_optimzer, create_custom_scheduler if TYPE_CHECKING: + from torch.utils.data import Dataset from transformers import ProcessorMixin from transformers.trainer import PredictionOutput @@ -94,7 +95,7 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): padded_tensor[:, -src_tensor.shape[-1] :] = src_tensor # adopt left-padding return padded_tensor.contiguous() # in contiguous memory - def save_predictions(self, predict_results: "PredictionOutput") -> None: + def save_predictions(self, dataset: "Dataset", predict_results: "PredictionOutput") -> None: r""" Saves model predictions to `output_dir`. @@ -120,6 +121,9 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): (preds[i][pad_len[0] :], preds[i][: pad_len[0]]), axis=-1 ) # move pad token to last + decoded_inputs = self.tokenizer.batch_decode( + dataset["input_ids"], skip_special_tokens=True, clean_up_tokenization_spaces=False + ) decoded_labels = self.tokenizer.batch_decode( labels, skip_special_tokens=True, clean_up_tokenization_spaces=False ) @@ -127,6 +131,6 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): with open(output_prediction_file, "w", encoding="utf-8") as writer: res: List[str] = [] - for label, pred in zip(decoded_labels, decoded_preds): - res.append(json.dumps({"label": label, "predict": pred}, ensure_ascii=False)) + for text, label, pred in zip(decoded_inputs, decoded_labels, decoded_preds): + res.append(json.dumps({"prompt": text, "label": label, "predict": pred}, ensure_ascii=False)) writer.write("\n".join(res)) diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py index f09b5173..a989b3f7 100644 --- a/src/llamafactory/train/sft/workflow.py +++ b/src/llamafactory/train/sft/workflow.py @@ -93,7 +93,7 @@ def run_sft( predict_results.metrics.pop("predict_loss", None) trainer.log_metrics("predict", predict_results.metrics) trainer.save_metrics("predict", predict_results.metrics) - trainer.save_predictions(predict_results) + trainer.save_predictions(dataset, predict_results) # Create model card create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args) From d2137c7099436f0ce42b16f2b838de09942602fb Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 04:47:13 +0800 Subject: [PATCH 147/162] fix #4292 Former-commit-id: 4cd4c179d24eab0fcaec2b29b9dd71970f877fe8 --- src/llamafactory/webui/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index 37b38df0..3b8d5378 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -39,7 +39,11 @@ def get_save_dir(*paths: str) -> os.PathLike: r""" Gets the path to saved model checkpoints. """ - paths = (path.replace(os.path.sep, "").replace(" ", "").strip() for path in paths) + if os.path.sep in paths[-1]: + logger.warning("Found complex path, some features may be not available.") + return paths[-1] + + paths = (path.replace(" ", "").strip() for path in paths) return os.path.join(DEFAULT_SAVE_DIR, *paths) From c9959df3c7f7e0867a322654735357df5c10aa48 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 04:57:19 +0800 Subject: [PATCH 148/162] disable DP Former-commit-id: c18fd609d268389f3e65274992045a6c9f8e6c1f --- src/llamafactory/hparams/parser.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index ec5dd62c..3476a726 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -8,6 +8,7 @@ import transformers from transformers import HfArgumentParser, Seq2SeqTrainingArguments from transformers.integrations import is_deepspeed_zero3_enabled from transformers.trainer_utils import get_last_checkpoint +from transformers.training_args import ParallelMode from transformers.utils import is_torch_bf16_gpu_available from transformers.utils.versions import require_version @@ -162,6 +163,9 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: ): raise ValueError("PPO only accepts wandb or tensorboard logger.") + if training_args.parallel_mode == ParallelMode.NOT_DISTRIBUTED: + raise ValueError("Please launch distributed training with `llamafactory-cli` or `torchrun`.") + if training_args.max_steps == -1 and data_args.streaming: raise ValueError("Please specify `max_steps` in streaming mode.") @@ -181,14 +185,14 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if ( finetuning_args.use_galore and finetuning_args.galore_layerwise - and training_args.parallel_mode.value == "distributed" + and training_args.parallel_mode == ParallelMode.DISTRIBUTED ): raise ValueError("Distributed training does not support layer-wise GaLore.") if ( finetuning_args.use_badam and finetuning_args.badam_mode == "layer" - and training_args.parallel_mode.value == "distributed" + and training_args.parallel_mode == ParallelMode.DISTRIBUTED ): raise ValueError("Layer-wise BAdam does not yet support distributed training, use ratio-wise BAdam.") @@ -230,7 +234,7 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: # Post-process training arguments if ( - training_args.parallel_mode.value == "distributed" + training_args.parallel_mode == ParallelMode.DISTRIBUTED and training_args.ddp_find_unused_parameters is None and finetuning_args.finetuning_type == "lora" ): @@ -290,7 +294,7 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: training_args.local_rank, training_args.device, training_args.n_gpu, - training_args.parallel_mode.value == "distributed", + training_args.parallel_mode == ParallelMode.DISTRIBUTED, str(model_args.compute_dtype), ) ) From ab4b3931fd1b6bef0d6ef691ad070b4504f9246a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 05:11:33 +0800 Subject: [PATCH 149/162] fix #4271 Former-commit-id: 03707e78d29bfcf5d395a64bb38632bdb3ff47ce --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 405ac46e..1756551e 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ extra_require = { "torch": ["torch>=1.13.1"], "torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"], "metrics": ["nltk", "jieba", "rouge-chinese"], - "deepspeed": ["deepspeed>=0.10.0,<=0.14.0"], + "deepspeed": ["deepspeed>=0.10.0"], "bitsandbytes": ["bitsandbytes>=0.39.0"], "vllm": ["vllm>=0.4.3"], "galore": ["galore-torch"], From 8b523554d63c86511c64bd68b5ed6c11849e5fc0 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 05:13:16 +0800 Subject: [PATCH 150/162] update readme Former-commit-id: a43d302aa79cbfb9b0606e855b4c1af6865d8e68 --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5dd10d5a..cae79694 100644 --- a/README.md +++ b/README.md @@ -463,7 +463,7 @@ docker-compose exec llamafactory bash ### Deploy with OpenAI-style API and vLLM ```bash -CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml +API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml ``` > [!TIP] diff --git a/README_zh.md b/README_zh.md index 76bd2d89..af3ff8f0 100644 --- a/README_zh.md +++ b/README_zh.md @@ -463,7 +463,7 @@ docker-compose exec llamafactory bash ### 利用 vLLM 部署 OpenAI API ```bash -CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml +API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml ``` > [!TIP] From acfae2e677cc6d507671ea241af0236d91b4f7fd Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 17:54:33 +0800 Subject: [PATCH 151/162] add license Former-commit-id: 69cfc98d7c81756a5ab6bf962240e393e449fef0 --- evaluation/ceval/ceval.py | 1 + evaluation/cmmlu/cmmlu.py | 1 + evaluation/mmlu/mmlu.py | 1 + scripts/cal_flops.py | 23 +++++++++-- scripts/cal_lr.py | 23 +++++++++-- scripts/cal_ppl.py | 19 ++++++++- scripts/length_cdf.py | 19 ++++++++- scripts/llama_pro.py | 23 +++++++++-- scripts/llamafy_baichuan2.py | 22 ++++++++-- scripts/llamafy_qwen.py | 21 ++++++++-- scripts/loftq_init.py | 23 +++++++++-- scripts/test_toolcall.py | 15 +++++++ setup.py | 14 +++++++ src/api.py | 14 +++++++ src/llamafactory/__init__.py | 14 +++++++ src/llamafactory/api/app.py | 14 +++++++ src/llamafactory/api/chat.py | 14 +++++++ src/llamafactory/api/common.py | 14 +++++++ src/llamafactory/api/protocol.py | 14 +++++++ src/llamafactory/chat/__init__.py | 14 +++++++ src/llamafactory/chat/base_engine.py | 14 +++++++ src/llamafactory/chat/chat_model.py | 17 ++++++++ src/llamafactory/chat/hf_engine.py | 14 +++++++ src/llamafactory/chat/vllm_engine.py | 14 +++++++ src/llamafactory/cli.py | 14 +++++++ src/llamafactory/data/__init__.py | 14 +++++++ src/llamafactory/data/aligner.py | 14 +++++++ src/llamafactory/data/collator.py | 14 +++++++ src/llamafactory/data/data_utils.py | 14 +++++++ src/llamafactory/data/formatter.py | 14 +++++++ src/llamafactory/data/loader.py | 14 +++++++ src/llamafactory/data/parser.py | 14 +++++++ src/llamafactory/data/preprocess.py | 14 +++++++ src/llamafactory/data/processors/feedback.py | 14 +++++++ src/llamafactory/data/processors/pairwise.py | 14 +++++++ src/llamafactory/data/processors/pretrain.py | 17 ++++++++ .../data/processors/processor_utils.py | 14 +++++++ .../data/processors/supervised.py | 14 +++++++ .../data/processors/unsupervised.py | 14 +++++++ src/llamafactory/data/template.py | 14 +++++++ src/llamafactory/eval/evaluator.py | 39 +++++++++++++++++- src/llamafactory/eval/template.py | 14 +++++++ src/llamafactory/extras/callbacks.py | 14 +++++++ src/llamafactory/extras/constants.py | 14 +++++++ src/llamafactory/extras/env.py | 14 +++++++ src/llamafactory/extras/logging.py | 14 +++++++ src/llamafactory/extras/misc.py | 14 +++++++ src/llamafactory/extras/packages.py | 17 ++++++++ src/llamafactory/extras/ploting.py | 14 +++++++ src/llamafactory/hparams/__init__.py | 14 +++++++ src/llamafactory/hparams/data_args.py | 17 ++++++++ src/llamafactory/hparams/evaluation_args.py | 14 +++++++ src/llamafactory/hparams/finetuning_args.py | 14 +++++++ src/llamafactory/hparams/generating_args.py | 14 +++++++ src/llamafactory/hparams/model_args.py | 17 ++++++++ src/llamafactory/hparams/parser.py | 17 ++++++++ src/llamafactory/launcher.py | 14 +++++++ src/llamafactory/model/__init__.py | 14 +++++++ src/llamafactory/model/adapter.py | 14 +++++++ src/llamafactory/model/loader.py | 14 +++++++ .../model/model_utils/attention.py | 14 +++++++ .../model/model_utils/checkpointing.py | 19 ++++++++- .../model/model_utils/embedding.py | 14 +++++++ .../model/model_utils/longlora.py | 17 ++++++++ src/llamafactory/model/model_utils/misc.py | 14 +++++++ src/llamafactory/model/model_utils/mod.py | 14 +++++++ src/llamafactory/model/model_utils/moe.py | 14 +++++++ .../model/model_utils/quantization.py | 18 ++++++++- src/llamafactory/model/model_utils/rope.py | 18 +++++++++ src/llamafactory/model/model_utils/unsloth.py | 14 +++++++ .../model/model_utils/valuehead.py | 14 +++++++ src/llamafactory/model/model_utils/visual.py | 17 ++++++++ src/llamafactory/model/patcher.py | 14 +++++++ src/llamafactory/train/dpo/__init__.py | 14 +++++++ src/llamafactory/train/dpo/trainer.py | 17 ++++++++ src/llamafactory/train/dpo/workflow.py | 17 +++++++- src/llamafactory/train/kto/__init__.py | 14 +++++++ src/llamafactory/train/kto/trainer.py | 17 ++++++++ src/llamafactory/train/kto/workflow.py | 17 ++++++++ src/llamafactory/train/ppo/__init__.py | 14 +++++++ src/llamafactory/train/ppo/ppo_utils.py | 14 +++++++ src/llamafactory/train/ppo/trainer.py | 17 ++++++++ src/llamafactory/train/ppo/workflow.py | 17 +++++++- src/llamafactory/train/pt/__init__.py | 14 +++++++ src/llamafactory/train/pt/trainer.py | 14 +++++++ src/llamafactory/train/pt/workflow.py | 17 +++++++- src/llamafactory/train/rm/__init__.py | 14 +++++++ src/llamafactory/train/rm/metric.py | 14 +++++++ src/llamafactory/train/rm/trainer.py | 40 ++++++++++++++++++- src/llamafactory/train/rm/workflow.py | 39 +++++++++++++++++- src/llamafactory/train/sft/__init__.py | 14 +++++++ src/llamafactory/train/sft/metric.py | 18 +++++++++ src/llamafactory/train/sft/trainer.py | 17 ++++++++ src/llamafactory/train/sft/workflow.py | 17 +++++++- src/llamafactory/train/trainer_utils.py | 19 +++++++++ src/llamafactory/train/tuner.py | 14 +++++++ src/llamafactory/webui/chatter.py | 14 +++++++ src/llamafactory/webui/common.py | 14 +++++++ src/llamafactory/webui/components/__init__.py | 14 +++++++ src/llamafactory/webui/components/chatbot.py | 14 +++++++ src/llamafactory/webui/components/data.py | 14 +++++++ src/llamafactory/webui/components/eval.py | 14 +++++++ src/llamafactory/webui/components/export.py | 14 +++++++ src/llamafactory/webui/components/infer.py | 14 +++++++ src/llamafactory/webui/components/top.py | 14 +++++++ src/llamafactory/webui/components/train.py | 14 +++++++ src/llamafactory/webui/css.py | 14 +++++++ src/llamafactory/webui/engine.py | 14 +++++++ src/llamafactory/webui/interface.py | 14 +++++++ src/llamafactory/webui/locales.py | 14 +++++++ src/llamafactory/webui/manager.py | 14 +++++++ src/llamafactory/webui/runner.py | 14 +++++++ src/llamafactory/webui/utils.py | 14 +++++++ src/train.py | 14 +++++++ src/webui.py | 14 +++++++ tests/data/test_supervised.py | 14 +++++++ tests/eval/test_eval_template.py | 14 +++++++ tests/model/model_utils/test_attention.py | 14 +++++++ tests/model/test_base.py | 14 +++++++ tests/model/test_freeze.py | 14 +++++++ tests/model/test_full.py | 14 +++++++ tests/model/test_lora.py | 14 +++++++ 122 files changed, 1848 insertions(+), 32 deletions(-) diff --git a/evaluation/ceval/ceval.py b/evaluation/ceval/ceval.py index 4111d6b4..48442d50 100644 --- a/evaluation/ceval/ceval.py +++ b/evaluation/ceval/ceval.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os import datasets diff --git a/evaluation/cmmlu/cmmlu.py b/evaluation/cmmlu/cmmlu.py index 37efb328..5ff548a4 100644 --- a/evaluation/cmmlu/cmmlu.py +++ b/evaluation/cmmlu/cmmlu.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os import datasets diff --git a/evaluation/mmlu/mmlu.py b/evaluation/mmlu/mmlu.py index a4530250..1065fb31 100644 --- a/evaluation/mmlu/mmlu.py +++ b/evaluation/mmlu/mmlu.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os import datasets diff --git a/scripts/cal_flops.py b/scripts/cal_flops.py index ac87e0ab..627b5534 100644 --- a/scripts/cal_flops.py +++ b/scripts/cal_flops.py @@ -1,7 +1,20 @@ # coding=utf-8 -# Calculates the flops of pre-trained models. -# Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512 -# Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/ +# Copyright 2024 Microsoft Corporation and the LlamaFactory team. +# +# This code is inspired by Microsoft's DeepSpeed library. +# https://www.deepspeed.ai/tutorials/flops-profiler/ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import fire import torch @@ -17,6 +30,10 @@ def calculate_flops( seq_length: int = 256, flash_attn: str = "auto", ): + r""" + Calculates the flops of pre-trained models. + Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512 + """ with get_accelerator().device(0): chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn)) fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device) diff --git a/scripts/cal_lr.py b/scripts/cal_lr.py index bfa32cc9..ff21d27c 100644 --- a/scripts/cal_lr.py +++ b/scripts/cal_lr.py @@ -1,7 +1,20 @@ # coding=utf-8 -# Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters. -# Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16 -# Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py +# Copyright 2024 imoneoi and the LlamaFactory team. +# +# This code is inspired by imoneoi's OpenChat library. +# https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import math from typing import Literal @@ -32,6 +45,10 @@ def calculate_lr( cutoff_len: int = 1024, # i.e. maximum input length during training is_mistral: bool = False, # mistral model uses a smaller learning rate, ): + r""" + Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters. + Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16 + """ model_args, data_args, training_args, _, _ = get_train_args( dict( stage=stage, diff --git a/scripts/cal_ppl.py b/scripts/cal_ppl.py index 387b756c..fb503629 100644 --- a/scripts/cal_ppl.py +++ b/scripts/cal_ppl.py @@ -1,6 +1,17 @@ # coding=utf-8 -# Calculates the ppl on the dataset of the pre-trained models. -# Usage: python cal_ppl.py --model_name_or_path path_to_model --save_name ppl.json +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import json from dataclasses import dataclass @@ -56,6 +67,10 @@ def cal_ppl( max_samples: Optional[int] = None, train_on_prompt: bool = False, ): + r""" + Calculates the ppl on the dataset of the pre-trained models. + Usage: python cal_ppl.py --model_name_or_path path_to_model --save_name ppl.json + """ model_args, data_args, training_args, finetuning_args, _ = get_train_args( dict( stage=stage, diff --git a/scripts/length_cdf.py b/scripts/length_cdf.py index 7739dcf0..4cdf01e6 100644 --- a/scripts/length_cdf.py +++ b/scripts/length_cdf.py @@ -1,6 +1,17 @@ # coding=utf-8 -# Calculates the distribution of the input lengths in the dataset. -# Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from collections import defaultdict @@ -19,6 +30,10 @@ def length_cdf( template: str = "default", interval: int = 1000, ): + r""" + Calculates the distribution of the input lengths in the dataset. + Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default + """ model_args, data_args, training_args, _, _ = get_train_args( dict( stage="sft", diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py index 727998ae..f315335a 100644 --- a/scripts/llama_pro.py +++ b/scripts/llama_pro.py @@ -1,7 +1,20 @@ # coding=utf-8 -# Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models. -# Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8 -# Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py +# Copyright 2024 Tencent Inc. and the LlamaFactory team. +# +# This code is inspired by Tencent's LLaMA-Pro library. +# https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import json import os @@ -37,6 +50,10 @@ def block_expansion( shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False, ): + r""" + Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models. + Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8 + """ config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path) num_layers = getattr(config, "num_hidden_layers") setattr(config, "num_hidden_layers", num_layers + num_expand) diff --git a/scripts/llamafy_baichuan2.py b/scripts/llamafy_baichuan2.py index 1ae58879..19284f5f 100644 --- a/scripts/llamafy_baichuan2.py +++ b/scripts/llamafy_baichuan2.py @@ -1,8 +1,17 @@ # coding=utf-8 -# Converts the Baichuan2-7B model in the same format as LLaMA2-7B. -# Usage: python llamafy_baichuan2.py --input_dir input --output_dir output -# Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py -# Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import json import os @@ -79,6 +88,11 @@ def save_config(input_dir: str, output_dir: str): def llamafy_baichuan2( input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False ): + r""" + Converts the Baichuan2-7B model in the same format as LLaMA2-7B. + Usage: python llamafy_baichuan2.py --input_dir input --output_dir output + Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied + """ try: os.makedirs(output_dir, exist_ok=False) except Exception as e: diff --git a/scripts/llamafy_qwen.py b/scripts/llamafy_qwen.py index 69cf3e8e..e5b59483 100644 --- a/scripts/llamafy_qwen.py +++ b/scripts/llamafy_qwen.py @@ -1,7 +1,17 @@ # coding=utf-8 -# Converts the Qwen models in the same format as LLaMA2. -# Usage: python llamafy_qwen.py --input_dir input --output_dir output -# Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import json import os @@ -131,6 +141,11 @@ def save_config(input_dir: str, output_dir: str, torch_dtype: str): def llamafy_qwen( input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False ): + r""" + Converts the Qwen models in the same format as LLaMA2. + Usage: python llamafy_qwen.py --input_dir input --output_dir output + Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied + """ try: os.makedirs(output_dir, exist_ok=False) except Exception as e: diff --git a/scripts/loftq_init.py b/scripts/loftq_init.py index 7f244316..159dea06 100644 --- a/scripts/loftq_init.py +++ b/scripts/loftq_init.py @@ -1,7 +1,20 @@ # coding=utf-8 -# Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ) -# Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir -# Inspired by: https://github.com/huggingface/peft/blob/main/examples/loftq_finetuning/quantize_save_load.py +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's PEFT library. +# https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os from typing import TYPE_CHECKING, Optional @@ -49,6 +62,10 @@ def quantize_loftq( lora_target: Optional[str] = "q_proj,v_proj", save_safetensors: Optional[bool] = False, ): + r""" + Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ) + Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir + """ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter) diff --git a/scripts/test_toolcall.py b/scripts/test_toolcall.py index 7e460017..6f6fd06c 100644 --- a/scripts/test_toolcall.py +++ b/scripts/test_toolcall.py @@ -1,3 +1,18 @@ +# coding=utf-8 +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os from typing import Sequence diff --git a/setup.py b/setup.py index 1756551e..3d2ac921 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import re diff --git a/src/api.py b/src/api.py index 3655e393..0f925497 100644 --- a/src/api.py +++ b/src/api.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import uvicorn diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index 78230937..9d732777 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Level: api, webui > chat, eval, train > data, model > hparams > extras from .cli import VERSION diff --git a/src/llamafactory/api/app.py b/src/llamafactory/api/app.py index 21edab2f..c1264617 100644 --- a/src/llamafactory/api/app.py +++ b/src/llamafactory/api/app.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from contextlib import asynccontextmanager from typing import Optional diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py index 98957bc1..a2074dbb 100644 --- a/src/llamafactory/api/chat.py +++ b/src/llamafactory/api/chat.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import base64 import io import json diff --git a/src/llamafactory/api/common.py b/src/llamafactory/api/common.py index 5ad9a071..d1ac94de 100644 --- a/src/llamafactory/api/common.py +++ b/src/llamafactory/api/common.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json from typing import TYPE_CHECKING, Any, Dict diff --git a/src/llamafactory/api/protocol.py b/src/llamafactory/api/protocol.py index 055fa781..a69132ea 100644 --- a/src/llamafactory/api/protocol.py +++ b/src/llamafactory/api/protocol.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import time from enum import Enum, unique from typing import Any, Dict, List, Optional, Union diff --git a/src/llamafactory/chat/__init__.py b/src/llamafactory/chat/__init__.py index a1a79de6..07276d48 100644 --- a/src/llamafactory/chat/__init__.py +++ b/src/llamafactory/chat/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .base_engine import BaseEngine from .chat_model import ChatModel diff --git a/src/llamafactory/chat/base_engine.py b/src/llamafactory/chat/base_engine.py index 65b6c59c..92a51ebe 100644 --- a/src/llamafactory/chat/base_engine.py +++ b/src/llamafactory/chat/base_engine.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from abc import ABC, abstractmethod from dataclasses import dataclass from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Literal, Optional, Sequence, Union diff --git a/src/llamafactory/chat/chat_model.py b/src/llamafactory/chat/chat_model.py index 281ef0c1..fb800106 100644 --- a/src/llamafactory/chat/chat_model.py +++ b/src/llamafactory/chat/chat_model.py @@ -1,3 +1,20 @@ +# Copyright 2024 THUDM and the LlamaFactory team. +# +# This code is inspired by the THUDM's ChatGLM implementation. +# https://github.com/THUDM/ChatGLM-6B/blob/main/cli_demo.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import asyncio from threading import Thread from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, Generator, List, Optional, Sequence diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py index 28e6a409..a7ff7015 100644 --- a/src/llamafactory/chat/hf_engine.py +++ b/src/llamafactory/chat/hf_engine.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import asyncio import concurrent.futures import os diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index f0812a99..d488a039 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import uuid from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index 5042e53c..c7f136b3 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import random import subprocess diff --git a/src/llamafactory/data/__init__.py b/src/llamafactory/data/__init__.py index b08691d3..307853bc 100644 --- a/src/llamafactory/data/__init__.py +++ b/src/llamafactory/data/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .collator import KTODataCollatorWithPadding, PairwiseDataCollatorWithPadding from .data_utils import Role, split_dataset from .loader import get_dataset diff --git a/src/llamafactory/data/aligner.py b/src/llamafactory/data/aligner.py index 3e9d5c46..299bdca3 100644 --- a/src/llamafactory/data/aligner.py +++ b/src/llamafactory/data/aligner.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from functools import partial from typing import TYPE_CHECKING, Any, Dict, List, Union diff --git a/src/llamafactory/data/collator.py b/src/llamafactory/data/collator.py index 1dc8dd8d..e4859ff5 100644 --- a/src/llamafactory/data/collator.py +++ b/src/llamafactory/data/collator.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass from typing import Any, Dict, Sequence diff --git a/src/llamafactory/data/data_utils.py b/src/llamafactory/data/data_utils.py index 9b313112..cc9761b1 100644 --- a/src/llamafactory/data/data_utils.py +++ b/src/llamafactory/data/data_utils.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from enum import Enum, unique from typing import TYPE_CHECKING, Dict, List, Tuple, Union diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index 0cd3d6c1..590e682b 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import re from abc import ABC, abstractmethod diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index ba426f81..f44ef5de 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect import os import sys diff --git a/src/llamafactory/data/parser.py b/src/llamafactory/data/parser.py index ec97bfc1..4bebcd68 100644 --- a/src/llamafactory/data/parser.py +++ b/src/llamafactory/data/parser.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os from dataclasses import dataclass diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py index 875f55d6..9a8b97f3 100644 --- a/src/llamafactory/data/preprocess.py +++ b/src/llamafactory/data/preprocess.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from functools import partial from typing import TYPE_CHECKING, Callable, Literal, Optional, Tuple diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py index 5fba452c..219ab353 100644 --- a/src/llamafactory/data/processors/feedback.py +++ b/src/llamafactory/data/processors/feedback.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.constants import IGNORE_INDEX diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py index db52c6a7..b2939348 100644 --- a/src/llamafactory/data/processors/pairwise.py +++ b/src/llamafactory/data/processors/pairwise.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.constants import IGNORE_INDEX diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index a10ccabd..fb4c840c 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from itertools import chain from typing import TYPE_CHECKING, Any, Dict, List diff --git a/src/llamafactory/data/processors/processor_utils.py b/src/llamafactory/data/processors/processor_utils.py index 9903a053..93df0cd5 100644 --- a/src/llamafactory/data/processors/processor_utils.py +++ b/src/llamafactory/data/processors/processor_utils.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import bisect from typing import TYPE_CHECKING, List, Sequence diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index f59f5371..eb5ffb1a 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from collections import defaultdict from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py index 38497a15..75ad4d51 100644 --- a/src/llamafactory/data/processors/unsupervised.py +++ b/src/llamafactory/data/processors/unsupervised.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.logging import get_logger diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index b600c567..786c679f 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union diff --git a/src/llamafactory/eval/evaluator.py b/src/llamafactory/eval/evaluator.py index 5c6fb104..bbd7a44b 100644 --- a/src/llamafactory/eval/evaluator.py +++ b/src/llamafactory/eval/evaluator.py @@ -1,4 +1,41 @@ -# Inspired by: https://github.com/hendrycks/test/blob/master/evaluate_flan.py +# Copyright 2024 the LlamaFactory team. +# +# This code is inspired by Dan's test library. +# https://github.com/hendrycks/test/blob/master/evaluate_flan.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# MIT License +# +# Copyright (c) 2020 Dan Hendrycks +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. import inspect import json diff --git a/src/llamafactory/eval/template.py b/src/llamafactory/eval/template.py index 2cbb5aaf..7d524e7c 100644 --- a/src/llamafactory/eval/template.py +++ b/src/llamafactory/eval/template.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass from typing import Dict, List, Sequence, Tuple diff --git a/src/llamafactory/extras/callbacks.py b/src/llamafactory/extras/callbacks.py index 441ebbfd..0dff6a69 100644 --- a/src/llamafactory/extras/callbacks.py +++ b/src/llamafactory/extras/callbacks.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import logging import os diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 7d96fb5f..e31e7419 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from collections import OrderedDict, defaultdict from enum import Enum from typing import Dict, Optional diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index a8cb799d..586c24c0 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import platform import accelerate diff --git a/src/llamafactory/extras/logging.py b/src/llamafactory/extras/logging.py index 430b8a48..67622212 100644 --- a/src/llamafactory/extras/logging.py +++ b/src/llamafactory/extras/logging.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import logging import os import sys diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index fc33f77e..3d969df1 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import gc import os from typing import TYPE_CHECKING, Dict, Tuple diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py index 0746bb4f..35f546ab 100644 --- a/src/llamafactory/extras/packages.py +++ b/src/llamafactory/extras/packages.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/utils/import_utils.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import importlib.metadata import importlib.util from functools import lru_cache diff --git a/src/llamafactory/extras/ploting.py b/src/llamafactory/extras/ploting.py index dea23bbe..596d55e7 100644 --- a/src/llamafactory/extras/ploting.py +++ b/src/llamafactory/extras/ploting.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import math import os diff --git a/src/llamafactory/hparams/__init__.py b/src/llamafactory/hparams/__init__.py index d1ee98dd..cfe448c1 100644 --- a/src/llamafactory/hparams/__init__.py +++ b/src/llamafactory/hparams/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .data_args import DataArguments from .evaluation_args import EvaluationArguments from .finetuning_args import FinetuningArguments diff --git a/src/llamafactory/hparams/data_args.py b/src/llamafactory/hparams/data_args.py index 1e0cd08c..95284766 100644 --- a/src/llamafactory/hparams/data_args.py +++ b/src/llamafactory/hparams/data_args.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass, field from typing import Literal, Optional diff --git a/src/llamafactory/hparams/evaluation_args.py b/src/llamafactory/hparams/evaluation_args.py index 5a05f6f6..a7f221ca 100644 --- a/src/llamafactory/hparams/evaluation_args.py +++ b/src/llamafactory/hparams/evaluation_args.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from dataclasses import dataclass, field from typing import Literal, Optional diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index facbe792..52dc299e 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass, field from typing import List, Literal, Optional diff --git a/src/llamafactory/hparams/generating_args.py b/src/llamafactory/hparams/generating_args.py index 0ee17d1a..7ebb4eed 100644 --- a/src/llamafactory/hparams/generating_args.py +++ b/src/llamafactory/hparams/generating_args.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import asdict, dataclass, field from typing import Any, Dict, Optional diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index bbac2e4b..0a91f0fa 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import asdict, dataclass, field from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Union diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 3476a726..1c57567c 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import logging import os import sys diff --git a/src/llamafactory/launcher.py b/src/llamafactory/launcher.py index de154db9..65e0b68f 100644 --- a/src/llamafactory/launcher.py +++ b/src/llamafactory/launcher.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from llamafactory.train.tuner import run_exp diff --git a/src/llamafactory/model/__init__.py b/src/llamafactory/model/__init__.py index 9d23d59f..4abbaa1b 100644 --- a/src/llamafactory/model/__init__.py +++ b/src/llamafactory/model/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .loader import load_config, load_model, load_tokenizer from .model_utils.misc import find_all_linear_modules from .model_utils.valuehead import load_valuehead_params diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index c37f6009..dfa71525 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import re from typing import TYPE_CHECKING diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index 697a04e7..69cccd93 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict, Optional, TypedDict from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForVision2Seq, AutoProcessor, AutoTokenizer diff --git a/src/llamafactory/model/model_utils/attention.py b/src/llamafactory/model/model_utils/attention.py index 2bd36fdc..8ff3807b 100644 --- a/src/llamafactory/model/model_utils/attention.py +++ b/src/llamafactory/model/model_utils/attention.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available diff --git a/src/llamafactory/model/model_utils/checkpointing.py b/src/llamafactory/model/model_utils/checkpointing.py index e0657be8..e4e84b12 100644 --- a/src/llamafactory/model/model_utils/checkpointing.py +++ b/src/llamafactory/model/model_utils/checkpointing.py @@ -1,3 +1,21 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's Transformers and PEFT library. +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/modeling_utils.py +# https://github.com/huggingface/peft/blob/v0.10.0/src/peft/utils/other.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect from functools import partial from types import MethodType @@ -68,7 +86,6 @@ def prepare_model_for_training( (1) cast the layernorm in fp32 (2) make output embedding layer require grads (3) add the upcasting of the lm_head in fp32 - Inspired by: https://github.com/huggingface/peft/blob/v0.7.1/src/peft/utils/other.py#L72 """ if model_args.upcast_layernorm: logger.info("Upcasting layernorm weights in float32.") diff --git a/src/llamafactory/model/model_utils/embedding.py b/src/llamafactory/model/model_utils/embedding.py index 3d9278e3..3ff79828 100644 --- a/src/llamafactory/model/model_utils/embedding.py +++ b/src/llamafactory/model/model_utils/embedding.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math from contextlib import nullcontext from typing import TYPE_CHECKING diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py index 4a8c562a..7af43dcf 100644 --- a/src/llamafactory/model/model_utils/longlora.py +++ b/src/llamafactory/model/model_utils/longlora.py @@ -1,3 +1,20 @@ +# Copyright 2024 EleutherAI, HuggingFace Inc., and the LlamaFactory team. +# +# This code is based on the EleutherAI's GPT-NeoX and HuggingFace's Transformers libraries. +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llama/modeling_llama.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math from typing import TYPE_CHECKING, Optional, Tuple diff --git a/src/llamafactory/model/model_utils/misc.py b/src/llamafactory/model/model_utils/misc.py index 4851bd29..a2812228 100644 --- a/src/llamafactory/model/model_utils/misc.py +++ b/src/llamafactory/model/model_utils/misc.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, List from ...extras.logging import get_logger diff --git a/src/llamafactory/model/model_utils/mod.py b/src/llamafactory/model/model_utils/mod.py index 5708a1a8..ec73af00 100644 --- a/src/llamafactory/model/model_utils/mod.py +++ b/src/llamafactory/model/model_utils/mod.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING from ...extras.constants import MOD_SUPPORTED_MODELS diff --git a/src/llamafactory/model/model_utils/moe.py b/src/llamafactory/model/model_utils/moe.py index 8a73c844..5c7473aa 100644 --- a/src/llamafactory/model/model_utils/moe.py +++ b/src/llamafactory/model/model_utils/moe.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Sequence import torch diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py index 02a54f07..9e6b9da4 100644 --- a/src/llamafactory/model/model_utils/quantization.py +++ b/src/llamafactory/model/model_utils/quantization.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's Optimum library. +# https://github.com/huggingface/optimum/blob/v1.20.0/optimum/gptq/data.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import random from enum import Enum, unique @@ -41,7 +58,6 @@ class QuantizationMethod(str, Enum): def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments") -> List[str]: r""" - Inspired by: https://github.com/huggingface/optimum/blob/v1.16.0/optimum/gptq/data.py#L133 TODO: remove tokenizer.decode() https://github.com/huggingface/optimum/pull/1600 """ if os.path.isfile(model_args.export_quantization_dataset): diff --git a/src/llamafactory/model/model_utils/rope.py b/src/llamafactory/model/model_utils/rope.py index 93ab8929..88303c4d 100644 --- a/src/llamafactory/model/model_utils/rope.py +++ b/src/llamafactory/model/model_utils/rope.py @@ -1,3 +1,21 @@ +# Copyright 2024 LMSYS and the LlamaFactory team. +# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li +# +# This code is inspired by the LMSYS's FastChat library. +# https://github.com/lm-sys/FastChat/blob/v0.2.30/fastchat/train/train.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math from typing import TYPE_CHECKING diff --git a/src/llamafactory/model/model_utils/unsloth.py b/src/llamafactory/model/model_utils/unsloth.py index 8a16409d..9cfaec61 100644 --- a/src/llamafactory/model/model_utils/unsloth.py +++ b/src/llamafactory/model/model_utils/unsloth.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict, Optional from ...extras.logging import get_logger diff --git a/src/llamafactory/model/model_utils/valuehead.py b/src/llamafactory/model/model_utils/valuehead.py index 64333688..9ab3d45a 100644 --- a/src/llamafactory/model/model_utils/valuehead.py +++ b/src/llamafactory/model/model_utils/valuehead.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict import torch diff --git a/src/llamafactory/model/model_utils/visual.py b/src/llamafactory/model/model_utils/visual.py index c8260b7f..37237485 100644 --- a/src/llamafactory/model/model_utils/visual.py +++ b/src/llamafactory/model/model_utils/visual.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llava/modeling_llava.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Tuple import torch diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index b97ff433..053516e4 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from types import MethodType from typing import TYPE_CHECKING, Any, Dict diff --git a/src/llamafactory/train/dpo/__init__.py b/src/llamafactory/train/dpo/__init__.py index 43fe9420..9ce0d089 100644 --- a/src/llamafactory/train/dpo/__init__.py +++ b/src/llamafactory/train/dpo/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .workflow import run_dpo diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index 5bdb9c43..475d08c3 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/dpo_trainer.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import warnings from collections import defaultdict from contextlib import nullcontext diff --git a/src/llamafactory/train/dpo/workflow.py b/src/llamafactory/train/dpo/workflow.py index 992985b0..8c3c2eb1 100644 --- a/src/llamafactory/train/dpo/workflow.py +++ b/src/llamafactory/train/dpo/workflow.py @@ -1,4 +1,19 @@ -# Inspired by: https://github.com/huggingface/trl/blob/main/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/dpo.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from typing import TYPE_CHECKING, List, Optional diff --git a/src/llamafactory/train/kto/__init__.py b/src/llamafactory/train/kto/__init__.py index 34c7905a..a1900368 100644 --- a/src/llamafactory/train/kto/__init__.py +++ b/src/llamafactory/train/kto/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .workflow import run_kto diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 3b4488fc..6e96fc0c 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/kto_trainer.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import warnings from collections import defaultdict from contextlib import nullcontext diff --git a/src/llamafactory/train/kto/workflow.py b/src/llamafactory/train/kto/workflow.py index c79b160b..8a7af6d4 100644 --- a/src/llamafactory/train/kto/workflow.py +++ b/src/llamafactory/train/kto/workflow.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/kto.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, List, Optional from ...data import KTODataCollatorWithPadding, get_dataset, split_dataset diff --git a/src/llamafactory/train/ppo/__init__.py b/src/llamafactory/train/ppo/__init__.py index d17336d5..161f6f5d 100644 --- a/src/llamafactory/train/ppo/__init__.py +++ b/src/llamafactory/train/ppo/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .workflow import run_ppo diff --git a/src/llamafactory/train/ppo/ppo_utils.py b/src/llamafactory/train/ppo/ppo_utils.py index fec3fc1e..05c40946 100644 --- a/src/llamafactory/train/ppo/ppo_utils.py +++ b/src/llamafactory/train/ppo/ppo_utils.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json from contextlib import nullcontext from typing import TYPE_CHECKING, Dict, List, Literal, Optional diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 45f47455..61420f3b 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/ppo_trainer.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math import os import sys diff --git a/src/llamafactory/train/ppo/workflow.py b/src/llamafactory/train/ppo/workflow.py index 111704c6..891d539a 100644 --- a/src/llamafactory/train/ppo/workflow.py +++ b/src/llamafactory/train/ppo/workflow.py @@ -1,4 +1,19 @@ -# Inspired by: https://github.com/lvwerra/trl/blob/main/examples/research_projects/stack_llama/scripts/rl_training.py +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/ppo.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from typing import TYPE_CHECKING, List, Optional diff --git a/src/llamafactory/train/pt/__init__.py b/src/llamafactory/train/pt/__init__.py index bdf397f6..d80e6f22 100644 --- a/src/llamafactory/train/pt/__init__.py +++ b/src/llamafactory/train/pt/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .workflow import run_pt diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py index 1d96e82f..09729f2e 100644 --- a/src/llamafactory/train/pt/trainer.py +++ b/src/llamafactory/train/pt/trainer.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from types import MethodType from typing import TYPE_CHECKING, Dict, Optional diff --git a/src/llamafactory/train/pt/workflow.py b/src/llamafactory/train/pt/workflow.py index 8a635567..f1df314e 100644 --- a/src/llamafactory/train/pt/workflow.py +++ b/src/llamafactory/train/pt/workflow.py @@ -1,4 +1,19 @@ -# Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/language-modeling/run_clm.py +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import math from typing import TYPE_CHECKING, List, Optional diff --git a/src/llamafactory/train/rm/__init__.py b/src/llamafactory/train/rm/__init__.py index dedac35f..48278315 100644 --- a/src/llamafactory/train/rm/__init__.py +++ b/src/llamafactory/train/rm/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .workflow import run_rm diff --git a/src/llamafactory/train/rm/metric.py b/src/llamafactory/train/rm/metric.py index 99dc6ab8..fb880b1c 100644 --- a/src/llamafactory/train/rm/metric.py +++ b/src/llamafactory/train/rm/metric.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Dict, Sequence, Tuple, Union import numpy as np diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py index bfb344dc..14695d7d 100644 --- a/src/llamafactory/train/rm/trainer.py +++ b/src/llamafactory/train/rm/trainer.py @@ -1,3 +1,42 @@ +# Copyright 2024 the LlamaFactory team. +# +# This code is inspired by CarperAI's trlx library. +# https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/reward_model.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# MIT License +# +# Copyright (c) 2022 CarperAI +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import json import os from types import MethodType @@ -79,7 +118,6 @@ class PairwiseTrainer(Trainer): chosen_scores, rejected_scores = [], [] # Compute pairwise loss. Only backprop on the different tokens before padding - # Inspired by: https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/reward_model.py loss = 0 for i in range(batch_size): chosen_length = (chosen_input_ids[i] != self.tokenizer.pad_token_id).nonzero()[-1] + 1 diff --git a/src/llamafactory/train/rm/workflow.py b/src/llamafactory/train/rm/workflow.py index 2e9e194b..75c0a2bf 100644 --- a/src/llamafactory/train/rm/workflow.py +++ b/src/llamafactory/train/rm/workflow.py @@ -1,4 +1,41 @@ -# Inspired by: https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py +# Copyright 2024 the LlamaFactory team. +# +# This code is inspired by CarperAI's trlx library. +# https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# MIT License +# +# Copyright (c) 2022 CarperAI +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. from typing import TYPE_CHECKING, List, Optional diff --git a/src/llamafactory/train/sft/__init__.py b/src/llamafactory/train/sft/__init__.py index f2f84e78..475dfe5f 100644 --- a/src/llamafactory/train/sft/__init__.py +++ b/src/llamafactory/train/sft/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .workflow import run_sft diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py index 923238d6..d2147c22 100644 --- a/src/llamafactory/train/sft/metric.py +++ b/src/llamafactory/train/sft/metric.py @@ -1,3 +1,21 @@ +# Copyright 2024 HuggingFace Inc., THUDM, and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library and THUDM's ChatGLM implementation. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py +# https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass from typing import TYPE_CHECKING, Dict, Sequence, Tuple, Union diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index 6bf5b7c0..6ab6914e 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer_seq2seq.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os from types import MethodType diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py index a989b3f7..dfc71cfb 100644 --- a/src/llamafactory/train/sft/workflow.py +++ b/src/llamafactory/train/sft/workflow.py @@ -1,4 +1,19 @@ -# Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/summarization/run_summarization.py +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from typing import TYPE_CHECKING, List, Optional diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 48944a63..5621d5df 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -1,3 +1,22 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by the GaLore's implementation: https://github.com/jiaweizzhao/GaLore +# and the LoRA+'s implementation: https://github.com/nikhil-ghosh-berkeley/loraplus +# and the BAdam's implementation: https://github.com/Ledzy/BAdam +# and the TRL's implementation: https://github.com/huggingface/trl +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import torch diff --git a/src/llamafactory/train/tuner.py b/src/llamafactory/train/tuner.py index eed875e9..788b4c4f 100644 --- a/src/llamafactory/train/tuner.py +++ b/src/llamafactory/train/tuner.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict, List, Optional import torch diff --git a/src/llamafactory/webui/chatter.py b/src/llamafactory/webui/chatter.py index c82710d3..864c41c7 100644 --- a/src/llamafactory/webui/chatter.py +++ b/src/llamafactory/webui/chatter.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os from typing import TYPE_CHECKING, Dict, Generator, List, Optional, Sequence, Tuple diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index 3b8d5378..980428a4 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os from collections import defaultdict diff --git a/src/llamafactory/webui/components/__init__.py b/src/llamafactory/webui/components/__init__.py index 5c1e21b8..715fb6e4 100644 --- a/src/llamafactory/webui/components/__init__.py +++ b/src/llamafactory/webui/components/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .chatbot import create_chat_box from .eval import create_eval_tab from .export import create_export_tab diff --git a/src/llamafactory/webui/components/chatbot.py b/src/llamafactory/webui/components/chatbot.py index f83694b1..ad74114b 100644 --- a/src/llamafactory/webui/components/chatbot.py +++ b/src/llamafactory/webui/components/chatbot.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict, Tuple from ...data import Role diff --git a/src/llamafactory/webui/components/data.py b/src/llamafactory/webui/components/data.py index 232b973d..88e500cf 100644 --- a/src/llamafactory/webui/components/data.py +++ b/src/llamafactory/webui/components/data.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os from typing import TYPE_CHECKING, Any, Dict, List, Tuple diff --git a/src/llamafactory/webui/components/eval.py b/src/llamafactory/webui/components/eval.py index 0a7a0f44..b522913e 100644 --- a/src/llamafactory/webui/components/eval.py +++ b/src/llamafactory/webui/components/eval.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict from ...extras.packages import is_gradio_available diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py index 9d756a38..14257949 100644 --- a/src/llamafactory/webui/components/export.py +++ b/src/llamafactory/webui/components/export.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict, Generator, List, Union from ...extras.constants import PEFT_METHODS diff --git a/src/llamafactory/webui/components/infer.py b/src/llamafactory/webui/components/infer.py index 970f4629..03bccd7f 100644 --- a/src/llamafactory/webui/components/infer.py +++ b/src/llamafactory/webui/components/infer.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict from ...extras.packages import is_gradio_available diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index fd0ead3d..2515a83d 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict from ...data import TEMPLATES diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 72dfc858..673f6bf4 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict from transformers.trainer_utils import SchedulerType diff --git a/src/llamafactory/webui/css.py b/src/llamafactory/webui/css.py index 36e3d4c2..53982119 100644 --- a/src/llamafactory/webui/css.py +++ b/src/llamafactory/webui/css.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + CSS = r""" .duplicate-button { margin: auto !important; diff --git a/src/llamafactory/webui/engine.py b/src/llamafactory/webui/engine.py index eb6142d3..04893215 100644 --- a/src/llamafactory/webui/engine.py +++ b/src/llamafactory/webui/engine.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict from .chatter import WebChatModel diff --git a/src/llamafactory/webui/interface.py b/src/llamafactory/webui/interface.py index bae3ba76..d25f4d38 100644 --- a/src/llamafactory/webui/interface.py +++ b/src/llamafactory/webui/interface.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from ..extras.packages import is_gradio_available diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index e30feab2..427f01b8 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + LOCALES = { "lang": { "en": { diff --git a/src/llamafactory/webui/manager.py b/src/llamafactory/webui/manager.py index 326fdb8d..7e9b801a 100644 --- a/src/llamafactory/webui/manager.py +++ b/src/llamafactory/webui/manager.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict, Generator, List, Set, Tuple diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 35014628..76982934 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from copy import deepcopy from subprocess import Popen, TimeoutExpired diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index e39f2aa4..6ce2a8e7 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os import signal diff --git a/src/train.py b/src/train.py index b20aa9d2..6703ffdb 100644 --- a/src/train.py +++ b/src/train.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from llamafactory.train.tuner import run_exp diff --git a/src/webui.py b/src/webui.py index bbefb54e..99370af2 100644 --- a/src/webui.py +++ b/src/webui.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from llamafactory.webui.interface import create_ui diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py index 63a3453f..a72800d2 100644 --- a/tests/data/test_supervised.py +++ b/tests/data/test_supervised.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import random diff --git a/tests/eval/test_eval_template.py b/tests/eval/test_eval_template.py index f6a91a67..f85d9d57 100644 --- a/tests/eval/test_eval_template.py +++ b/tests/eval/test_eval_template.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from llamafactory.eval.template import get_eval_template diff --git a/tests/model/model_utils/test_attention.py b/tests/model/model_utils/test_attention.py index 751adda4..97ac9dcc 100644 --- a/tests/model/model_utils/test_attention.py +++ b/tests/model/model_utils/test_attention.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available diff --git a/tests/model/test_base.py b/tests/model/test_base.py index 32a3918e..462e8cfa 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import torch diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py index a0618315..ac5a023c 100644 --- a/tests/model/test_freeze.py +++ b/tests/model/test_freeze.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import torch diff --git a/tests/model/test_full.py b/tests/model/test_full.py index 802b987c..bcd6480f 100644 --- a/tests/model/test_full.py +++ b/tests/model/test_full.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import torch diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index 3e2503f1..e49c026c 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from typing import Sequence From 61aaab22c93fe89145f1bea926b00004352bd7fc Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 17:58:52 +0800 Subject: [PATCH 152/162] add minicpm #4227 Former-commit-id: e1bb18ce60be9a1b203989def30f1b9194286325 --- src/llamafactory/extras/constants.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index e31e7419..73a9969d 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -682,6 +682,21 @@ register_model_group( ) +register_model_group( + models={ + "MiniCPM-2B-SFT-Chat": { + DownloadSource.DEFAULT: "openbmb/MiniCPM-2B-sft-bf16", + DownloadSource.MODELSCOPE: "OpenBMB/miniCPM-bf16", + }, + "MiniCPM-2B-DPO-Chat": { + DownloadSource.DEFAULT: "openbmb/MiniCPM-2B-dpo-bf16", + DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-2B-dpo-bf16", + }, + }, + template="cpm", +) + + register_model_group( models={ "Mistral-7B-v0.1": { From 4851ef85b795f2c03d1d90f9fb57caa2d1f59258 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 19:51:20 +0800 Subject: [PATCH 153/162] add tests Former-commit-id: 484634ee9c982e82e919ff67d507e0210345182d --- Makefile | 2 +- src/llamafactory/extras/misc.py | 9 ++- tests/data/test_supervised.py | 2 +- tests/model/model_utils/test_checkpointing.py | 74 +++++++++++++++++++ tests/model/test_base.py | 30 +++++++- tests/model/test_freeze.py | 3 + tests/model/test_full.py | 2 + tests/model/test_lora.py | 58 +++++++++++++-- 8 files changed, 166 insertions(+), 14 deletions(-) create mode 100644 tests/model/model_utils/test_checkpointing.py diff --git a/Makefile b/Makefile index 65be047b..3f13b215 100644 --- a/Makefile +++ b/Makefile @@ -11,4 +11,4 @@ style: ruff format $(check_dirs) test: - pytest tests/ + CUDA_VISIBLE_DEVICES= pytest tests/ diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 3d969df1..93153b3e 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -22,6 +22,7 @@ from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList, PreTr from transformers.utils import ( SAFE_WEIGHTS_NAME, WEIGHTS_NAME, + is_safetensors_available, is_torch_bf16_gpu_available, is_torch_cuda_available, is_torch_mps_available, @@ -34,6 +35,11 @@ from .constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME from .logging import get_logger +if is_safetensors_available(): + from safetensors import safe_open + from safetensors.torch import save_file + + _is_fp16_available = is_torch_npu_available() or is_torch_cuda_available() try: _is_bf16_available = is_torch_bf16_gpu_available() @@ -128,9 +134,6 @@ def fix_valuehead_checkpoint( return if safe_serialization: - from safetensors import safe_open - from safetensors.torch import save_file - path_to_checkpoint = os.path.join(output_dir, SAFE_WEIGHTS_NAME) with safe_open(path_to_checkpoint, framework="pt", device="cpu") as f: state_dict: Dict[str, torch.Tensor] = {key: f.get_tensor(key) for key in f.keys()} diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py index a72800d2..9f7b2dbf 100644 --- a/tests/data/test_supervised.py +++ b/tests/data/test_supervised.py @@ -41,7 +41,7 @@ TRAIN_ARGS = { } -@pytest.mark.parametrize("num_samples", [10]) +@pytest.mark.parametrize("num_samples", [16]) def test_supervised(num_samples: int): model_args, data_args, training_args, _, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) diff --git a/tests/model/model_utils/test_checkpointing.py b/tests/model/model_utils/test_checkpointing.py new file mode 100644 index 00000000..670e693d --- /dev/null +++ b/tests/model/model_utils/test_checkpointing.py @@ -0,0 +1,74 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch + +from llamafactory.extras.misc import get_current_device +from llamafactory.hparams import get_train_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") + +TRAIN_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "lora", + "lora_target": "all", + "dataset": "llamafactory/tiny-supervised-dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + + +def test_checkpointing_enable(): + model_args, _, _, finetuning_args, _ = get_train_args({"disable_gradient_checkpointing": False, **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()): + assert getattr(module, "gradient_checkpointing") is True + + +def test_checkpointing_disable(): + model_args, _, _, finetuning_args, _ = get_train_args({"disable_gradient_checkpointing": True, **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()): + assert getattr(module, "gradient_checkpointing") is False + + +def test_upcast_layernorm(): + model_args, _, _, finetuning_args, _ = get_train_args({"upcast_layernorm": True, **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): + if param.ndim == 1 and "norm" in name: + assert param.dtype == torch.float32 + + +def test_upcast_lmhead_output(): + model_args, _, _, finetuning_args, _ = get_train_args({"upcast_lmhead_output": True, **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + inputs = torch.randn((1, 16), dtype=torch.float16, device=get_current_device()) + outputs: "torch.Tensor" = model.lm_head(inputs) + assert outputs.dtype == torch.float32 diff --git a/tests/model/test_base.py b/tests/model/test_base.py index 462e8cfa..ee0b2886 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -13,16 +13,21 @@ # limitations under the License. import os +from typing import Dict import torch from transformers import AutoModelForCausalLM +from trl import AutoModelForCausalLMWithValueHead +from llamafactory.extras.misc import get_current_device from llamafactory.hparams import get_infer_args from llamafactory.model import load_model, load_tokenizer TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") +TINY_LLAMA_VALUEHEAD = os.environ.get("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead") + INFER_ARGS = { "model_name_or_path": TINY_LLAMA, "template": "llama3", @@ -38,9 +43,32 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True +def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]): + state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")} + self.v_head.load_state_dict(state_dict, strict=False) + del state_dict + + def test_base(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) - ref_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + + ref_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) + compare_model(model, ref_model) + + +def test_valuehead(): + AutoModelForCausalLMWithValueHead.post_init = post_init # patch for CPU test + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model( + tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False, add_valuehead=True + ) + + ref_model = AutoModelForCausalLMWithValueHead.from_pretrained( + TINY_LLAMA_VALUEHEAD, torch_dtype=torch.float16, device_map=get_current_device() + ) compare_model(model, ref_model) diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py index ac5a023c..5f478af6 100644 --- a/tests/model/test_freeze.py +++ b/tests/model/test_freeze.py @@ -49,6 +49,7 @@ def test_freeze_train_all_modules(): model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): if name.startswith("model.layers.1."): assert param.requires_grad is True @@ -64,6 +65,7 @@ def test_freeze_train_extra_modules(): ) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): if name.startswith("model.layers.1.") or any(module in name for module in ["embed_tokens", "lm_head"]): assert param.requires_grad is True @@ -77,6 +79,7 @@ def test_freeze_inference(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + for param in model.parameters(): assert param.requires_grad is False assert param.dtype == torch.float16 diff --git a/tests/model/test_full.py b/tests/model/test_full.py index bcd6480f..0a6e0743 100644 --- a/tests/model/test_full.py +++ b/tests/model/test_full.py @@ -49,6 +49,7 @@ def test_full_train(): model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for param in model.parameters(): assert param.requires_grad is True assert param.dtype == torch.float32 @@ -58,6 +59,7 @@ def test_full_inference(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + for param in model.parameters(): assert param.requires_grad is False assert param.dtype == torch.float16 diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index e49c026c..4923c8ad 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -18,7 +18,9 @@ from typing import Sequence import torch from peft import LoraModel, PeftModel from transformers import AutoModelForCausalLM +from trl import AutoModelForCausalLMWithValueHead +from llamafactory.extras.misc import get_current_device from llamafactory.hparams import get_infer_args, get_train_args from llamafactory.model import load_model, load_tokenizer @@ -27,6 +29,8 @@ TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA_ADAPTER = os.environ.get("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora") +TINY_LLAMA_VALUEHEAD = os.environ.get("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead") + TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", @@ -67,10 +71,29 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True +def test_lora_train_qv_modules(): + model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "q_proj,v_proj", **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + + linear_modules = set() + for name, param in model.named_parameters(): + if any(module in name for module in ["lora_A", "lora_B"]): + linear_modules.add(name.split(".lora_", maxsplit=1)[0].split(".")[-1]) + assert param.requires_grad is True + assert param.dtype == torch.float32 + else: + assert param.requires_grad is False + assert param.dtype == torch.float16 + + assert linear_modules == {"q_proj", "v_proj"} + + def test_lora_train_all_modules(): model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + linear_modules = set() for name, param in model.named_parameters(): if any(module in name for module in ["lora_A", "lora_B"]): @@ -90,6 +113,7 @@ def test_lora_train_extra_modules(): ) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + extra_modules = set() for name, param in model.named_parameters(): if any(module in name for module in ["lora_A", "lora_B"]): @@ -113,7 +137,9 @@ def test_lora_train_old_adapters(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) - base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) for param in filter(lambda p: p.requires_grad, ref_model.parameters()): param.data = param.data.to(torch.float32) @@ -128,7 +154,9 @@ def test_lora_train_new_adapters(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) - base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) for param in filter(lambda p: p.requires_grad, ref_model.parameters()): param.data = param.data.to(torch.float32) @@ -138,17 +166,31 @@ def test_lora_train_new_adapters(): ) +def test_lora_train_valuehead(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model( + tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True, add_valuehead=True + ) + + ref_model: "AutoModelForCausalLMWithValueHead" = AutoModelForCausalLMWithValueHead.from_pretrained( + TINY_LLAMA_VALUEHEAD, torch_dtype=torch.float16, device_map=get_current_device() + ) + state_dict = model.state_dict() + ref_state_dict = ref_model.state_dict() + + assert torch.allclose(state_dict["v_head.summary.weight"], ref_state_dict["v_head.summary.weight"]) + assert torch.allclose(state_dict["v_head.summary.bias"], ref_state_dict["v_head.summary.bias"]) + + def test_lora_inference(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) - base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER) ref_model = ref_model.merge_and_unload() compare_model(model, ref_model) - - for name, param in model.named_parameters(): - assert param.requires_grad is False - assert param.dtype == torch.float16 - assert "lora" not in name From f9653ac29c36aa44284d07c102d2fe979b796391 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 20:06:17 +0800 Subject: [PATCH 154/162] use fixture Former-commit-id: 10761985691b9f934f7689c1f82aa6dd68febcca --- src/llamafactory/hparams/model_args.py | 2 +- tests/model/test_base.py | 15 ++++++++++----- tests/model/test_lora.py | 14 +++++++++++++- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 0a91f0fa..53bdbdf2 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -163,7 +163,7 @@ class ModelArguments: ) infer_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field( default="auto", - metadata={"help": "Data type for model weights and activations at inference."} + metadata={"help": "Data type for model weights and activations at inference."}, ) hf_hub_token: Optional[str] = field( default=None, diff --git a/tests/model/test_base.py b/tests/model/test_base.py index ee0b2886..2deedde2 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -15,6 +15,7 @@ import os from typing import Dict +import pytest import torch from transformers import AutoModelForCausalLM from trl import AutoModelForCausalLMWithValueHead @@ -43,10 +44,14 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True -def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]): - state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")} - self.v_head.load_state_dict(state_dict, strict=False) - del state_dict +@pytest.fixture +def fix_valuehead_cpu_loading(): + def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]): + state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")} + self.v_head.load_state_dict(state_dict, strict=False) + del state_dict + + AutoModelForCausalLMWithValueHead.post_init = post_init def test_base(): @@ -60,8 +65,8 @@ def test_base(): compare_model(model, ref_model) +@pytest.mark.usefixtures("fix_valuehead_cpu_loading") def test_valuehead(): - AutoModelForCausalLMWithValueHead.post_init = post_init # patch for CPU test model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model( diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index 4923c8ad..fe032332 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -13,8 +13,9 @@ # limitations under the License. import os -from typing import Sequence +from typing import Dict, Sequence +import pytest import torch from peft import LoraModel, PeftModel from transformers import AutoModelForCausalLM @@ -71,6 +72,16 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True +@pytest.fixture +def fix_valuehead_cpu_loading(): + def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]): + state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")} + self.v_head.load_state_dict(state_dict, strict=False) + del state_dict + + AutoModelForCausalLMWithValueHead.post_init = post_init + + def test_lora_train_qv_modules(): model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "q_proj,v_proj", **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) @@ -166,6 +177,7 @@ def test_lora_train_new_adapters(): ) +@pytest.mark.usefixtures("fix_valuehead_cpu_loading") def test_lora_train_valuehead(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) From 640372cb66f0c06e5bb3cd02f322fdaf0f67219b Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 16 Jun 2024 01:06:41 +0800 Subject: [PATCH 155/162] tiny fix Former-commit-id: f7f440986b0ae3b38ea9f2da80789629d4f79ea1 --- scripts/cal_flops.py | 2 +- scripts/cal_lr.py | 2 +- scripts/llama_pro.py | 2 +- src/llamafactory/data/processors/pretrain.py | 2 +- src/llamafactory/eval/evaluator.py | 2 +- src/llamafactory/extras/packages.py | 2 +- src/llamafactory/hparams/data_args.py | 2 +- src/llamafactory/model/model_utils/checkpointing.py | 2 +- src/llamafactory/model/model_utils/longlora.py | 6 ++++-- src/llamafactory/model/model_utils/quantization.py | 2 +- src/llamafactory/model/model_utils/visual.py | 2 +- src/llamafactory/train/dpo/workflow.py | 2 +- src/llamafactory/train/kto/trainer.py | 4 ++-- src/llamafactory/train/kto/workflow.py | 2 +- src/llamafactory/train/ppo/trainer.py | 2 +- src/llamafactory/train/ppo/workflow.py | 2 +- src/llamafactory/train/pt/workflow.py | 2 +- src/llamafactory/train/rm/trainer.py | 4 ++-- src/llamafactory/train/rm/workflow.py | 2 +- src/llamafactory/train/sft/metric.py | 2 +- src/llamafactory/train/sft/workflow.py | 2 +- tests/model/test_base.py | 2 +- 22 files changed, 27 insertions(+), 25 deletions(-) diff --git a/scripts/cal_flops.py b/scripts/cal_flops.py index 627b5534..32526d89 100644 --- a/scripts/cal_flops.py +++ b/scripts/cal_flops.py @@ -1,7 +1,7 @@ # coding=utf-8 # Copyright 2024 Microsoft Corporation and the LlamaFactory team. # -# This code is inspired by Microsoft's DeepSpeed library. +# This code is inspired by the Microsoft's DeepSpeed library. # https://www.deepspeed.ai/tutorials/flops-profiler/ # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/cal_lr.py b/scripts/cal_lr.py index ff21d27c..ad6992cb 100644 --- a/scripts/cal_lr.py +++ b/scripts/cal_lr.py @@ -1,7 +1,7 @@ # coding=utf-8 # Copyright 2024 imoneoi and the LlamaFactory team. # -# This code is inspired by imoneoi's OpenChat library. +# This code is inspired by the imoneoi's OpenChat library. # https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py index f315335a..395375ef 100644 --- a/scripts/llama_pro.py +++ b/scripts/llama_pro.py @@ -1,7 +1,7 @@ # coding=utf-8 # Copyright 2024 Tencent Inc. and the LlamaFactory team. # -# This code is inspired by Tencent's LLaMA-Pro library. +# This code is inspired by the Tencent's LLaMA-Pro library. # https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index fb4c840c..67d6009b 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/eval/evaluator.py b/src/llamafactory/eval/evaluator.py index bbd7a44b..d3140793 100644 --- a/src/llamafactory/eval/evaluator.py +++ b/src/llamafactory/eval/evaluator.py @@ -1,6 +1,6 @@ # Copyright 2024 the LlamaFactory team. # -# This code is inspired by Dan's test library. +# This code is inspired by the Dan's test library. # https://github.com/hendrycks/test/blob/master/evaluate_flan.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py index 35f546ab..0a84a293 100644 --- a/src/llamafactory/extras/packages.py +++ b/src/llamafactory/extras/packages.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/utils/import_utils.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/hparams/data_args.py b/src/llamafactory/hparams/data_args.py index 95284766..39290e21 100644 --- a/src/llamafactory/hparams/data_args.py +++ b/src/llamafactory/hparams/data_args.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/model/model_utils/checkpointing.py b/src/llamafactory/model/model_utils/checkpointing.py index e4e84b12..f5314125 100644 --- a/src/llamafactory/model/model_utils/checkpointing.py +++ b/src/llamafactory/model/model_utils/checkpointing.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's Transformers and PEFT library. +# This code is inspired by the HuggingFace's Transformers and PEFT library. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/modeling_utils.py # https://github.com/huggingface/peft/blob/v0.10.0/src/peft/utils/other.py # diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py index 7af43dcf..af30bd50 100644 --- a/src/llamafactory/model/model_utils/longlora.py +++ b/src/llamafactory/model/model_utils/longlora.py @@ -1,7 +1,9 @@ -# Copyright 2024 EleutherAI, HuggingFace Inc., and the LlamaFactory team. +# Copyright 2024 EleutherAI, HuggingFace Inc., Yukang Chen, and the LlamaFactory team. # -# This code is based on the EleutherAI's GPT-NeoX and HuggingFace's Transformers libraries. +# This code is based on the EleutherAI's GPT-NeoX and the HuggingFace's Transformers libraries. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llama/modeling_llama.py +# This code is also inspired by the original LongLoRA implementation. +# https://github.com/dvlab-research/LongLoRA/blob/main/llama_attn_replace.py # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py index 9e6b9da4..0a0fca34 100644 --- a/src/llamafactory/model/model_utils/quantization.py +++ b/src/llamafactory/model/model_utils/quantization.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's Optimum library. +# This code is inspired by the HuggingFace's Optimum library. # https://github.com/huggingface/optimum/blob/v1.20.0/optimum/gptq/data.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/model/model_utils/visual.py b/src/llamafactory/model/model_utils/visual.py index 37237485..700bf470 100644 --- a/src/llamafactory/model/model_utils/visual.py +++ b/src/llamafactory/model/model_utils/visual.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's Transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llava/modeling_llava.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/dpo/workflow.py b/src/llamafactory/train/dpo/workflow.py index 8c3c2eb1..431b5285 100644 --- a/src/llamafactory/train/dpo/workflow.py +++ b/src/llamafactory/train/dpo/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/dpo.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 6e96fc0c..91d68975 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/kto_trainer.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -114,8 +114,8 @@ class CustomKTOTrainer(KTOTrainer): def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) + output_dir = output_dir if output_dir is not None else self.args.output_dir if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) def forward( diff --git a/src/llamafactory/train/kto/workflow.py b/src/llamafactory/train/kto/workflow.py index 8a7af6d4..8182a184 100644 --- a/src/llamafactory/train/kto/workflow.py +++ b/src/llamafactory/train/kto/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/kto.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 61420f3b..df4a37be 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/ppo_trainer.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/ppo/workflow.py b/src/llamafactory/train/ppo/workflow.py index 891d539a..4f4d2820 100644 --- a/src/llamafactory/train/ppo/workflow.py +++ b/src/llamafactory/train/ppo/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/ppo.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/pt/workflow.py b/src/llamafactory/train/pt/workflow.py index f1df314e..b84a0e7d 100644 --- a/src/llamafactory/train/pt/workflow.py +++ b/src/llamafactory/train/pt/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py index 14695d7d..7f91e5f5 100644 --- a/src/llamafactory/train/rm/trainer.py +++ b/src/llamafactory/train/rm/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 the LlamaFactory team. # -# This code is inspired by CarperAI's trlx library. +# This code is inspired by the CarperAI's trlx library. # https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/reward_model.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -89,8 +89,8 @@ class PairwiseTrainer(Trainer): def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) + output_dir = output_dir if output_dir is not None else self.args.output_dir if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) def compute_loss( diff --git a/src/llamafactory/train/rm/workflow.py b/src/llamafactory/train/rm/workflow.py index 75c0a2bf..6f24e964 100644 --- a/src/llamafactory/train/rm/workflow.py +++ b/src/llamafactory/train/rm/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 the LlamaFactory team. # -# This code is inspired by CarperAI's trlx library. +# This code is inspired by the CarperAI's trlx library. # https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py index d2147c22..95bfcb69 100644 --- a/src/llamafactory/train/sft/metric.py +++ b/src/llamafactory/train/sft/metric.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc., THUDM, and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library and THUDM's ChatGLM implementation. +# This code is inspired by the HuggingFace's transformers library and the THUDM's ChatGLM implementation. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py # https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py # diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py index dfc71cfb..885bc7ac 100644 --- a/src/llamafactory/train/sft/workflow.py +++ b/src/llamafactory/train/sft/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/model/test_base.py b/tests/model/test_base.py index 2deedde2..954492ef 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -41,7 +41,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): state_dict_b = model_b.state_dict() assert set(state_dict_a.keys()) == set(state_dict_b.keys()) for name in state_dict_a.keys(): - assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True + assert torch.allclose(state_dict_a[name], state_dict_b[name]) @pytest.fixture From 0b571f84b457bc6dd35fffa55e5f4658a210f743 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 16 Jun 2024 01:08:12 +0800 Subject: [PATCH 156/162] support pissa Former-commit-id: ef8e45f2eaf466c54e9a671512a2974575677b08 --- README.md | 6 +- README_zh.md | 6 +- examples/README.md | 6 ++ examples/README_zh.md | 6 ++ examples/extras/pissa/llama3_lora_sft.yaml | 42 ++++++++++ scripts/loftq_init.py | 72 +++++++---------- scripts/pissa_init.py | 79 ++++++++++++++++++ src/llamafactory/hparams/finetuning_args.py | 20 ++++- src/llamafactory/hparams/model_args.py | 8 +- src/llamafactory/hparams/parser.py | 5 +- src/llamafactory/model/adapter.py | 25 ++++-- src/llamafactory/train/dpo/trainer.py | 13 ++- src/llamafactory/train/pt/trainer.py | 12 ++- src/llamafactory/train/sft/trainer.py | 13 ++- src/llamafactory/train/trainer_utils.py | 54 ++++++++++++- src/llamafactory/webui/components/train.py | 9 ++- src/llamafactory/webui/locales.py | 14 ++++ src/llamafactory/webui/runner.py | 2 + tests/model/test_pissa.py | 90 +++++++++++++++++++++ 19 files changed, 406 insertions(+), 76 deletions(-) create mode 100644 examples/extras/pissa/llama3_lora_sft.yaml create mode 100644 scripts/pissa_init.py create mode 100644 tests/model/test_pissa.py diff --git a/README.md b/README.md index cae79694..cb9a7222 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Choose your path: - **Various models**: LLaMA, LLaVA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc. - **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO, KTO, ORPO, etc. - **Scalable resources**: 32-bit full-tuning, 16-bit freeze-tuning, 16-bit LoRA and 2/4/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8. -- **Advanced algorithms**: GaLore, BAdam, DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ and Agent tuning. +- **Advanced algorithms**: GaLore, BAdam, DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ, PiSSA and Agent tuning. - **Practical tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune and rsLoRA. - **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, etc. - **Faster inference**: OpenAI-style API, Gradio UI and CLI with vLLM worker. @@ -71,9 +71,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog -[24/06/07] We supported fine-tuning the **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** series models. +[24/06/16] We support **[PiSSA](https://arxiv.org/abs/2404.02948)** algorithm. See [examples](examples/README.md) for usage. -[24/06/05] We supported fine-tuning the **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** models. +[24/06/07] We supported fine-tuning the **[Qwen2](https://qwenlm.github.io/blog/qwen2/)** and **[GLM-4](https://github.com/THUDM/GLM-4)** models. [24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage. diff --git a/README_zh.md b/README_zh.md index af3ff8f0..5c005f30 100644 --- a/README_zh.md +++ b/README_zh.md @@ -49,7 +49,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - **多种模型**:LLaMA、LLaVA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。 - **集成方法**:(增量)预训练、(多模态)指令监督微调、奖励模型训练、PPO 训练、DPO 训练、KTO 训练、ORPO 训练等等。 - **多种精度**:32 比特全参数微调、16 比特冻结微调、16 比特 LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 QLoRA 微调。 -- **先进算法**:GaLore、BAdam、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ 和 Agent 微调。 +- **先进算法**:GaLore、BAdam、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ、PiSSA 和 Agent 微调。 - **实用技巧**:FlashAttention-2、Unsloth、RoPE scaling、NEFTune 和 rsLoRA。 - **实验监控**:LlamaBoard、TensorBoard、Wandb、MLflow 等等。 - **极速推理**:基于 vLLM 的 OpenAI 风格 API、浏览器界面和命令行接口。 @@ -71,9 +71,9 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd ## 更新日志 -[24/06/07] 我们支持了 **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** 系列模型的微调。 +[24/06/16] 我们支持了 **[PiSSA](https://arxiv.org/abs/2404.02948)** 算法。详细用法请参照 [examples](examples/README_zh.md)。 -[24/06/05] 我们支持了 **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** 模型的微调。 +[24/06/07] 我们支持了 **[Qwen2](https://qwenlm.github.io/blog/qwen2/)** 和 **[GLM-4](https://github.com/THUDM/GLM-4)** 模型的微调。 [24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。 diff --git a/examples/README.md b/examples/README.md index a6d78936..902d26b1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -213,3 +213,9 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml ```bash bash examples/extras/fsdp_qlora/single_node.sh ``` + +#### PiSSA Fine-Tuning + +```bash +llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml +``` diff --git a/examples/README_zh.md b/examples/README_zh.md index b6168a95..586e498c 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -213,3 +213,9 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml ```bash bash examples/extras/fsdp_qlora/single_node.sh ``` + +#### PiSSA 微调 + +```bash +llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml +``` diff --git a/examples/extras/pissa/llama3_lora_sft.yaml b/examples/extras/pissa/llama3_lora_sft.yaml new file mode 100644 index 00000000..fd4b9f1d --- /dev/null +++ b/examples/extras/pissa/llama3_lora_sft.yaml @@ -0,0 +1,42 @@ +### model +model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct + +### method +stage: sft +do_train: true +finetuning_type: lora +lora_target: all +pissa_init: true +pissa_iter: 4 +pissa_convert: true + +### dataset +dataset: identity,alpaca_en_demo +template: llama3 +cutoff_len: 1024 +max_samples: 1000 +overwrite_cache: true +preprocessing_num_workers: 16 + +### output +output_dir: saves/llama3-8b/lora/sft +logging_steps: 10 +save_steps: 500 +plot_loss: true +overwrite_output_dir: true + +### train +per_device_train_batch_size: 1 +gradient_accumulation_steps: 8 +learning_rate: 1.0e-4 +num_train_epochs: 3.0 +lr_scheduler_type: cosine +warmup_ratio: 0.1 +fp16: true +ddp_timeout: 180000000 + +### eval +val_size: 0.1 +per_device_eval_batch_size: 1 +eval_strategy: steps +eval_steps: 500 diff --git a/scripts/loftq_init.py b/scripts/loftq_init.py index 159dea06..556f342c 100644 --- a/scripts/loftq_init.py +++ b/scripts/loftq_init.py @@ -1,7 +1,7 @@ # coding=utf-8 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's PEFT library. +# This code is based on the HuggingFace's PEFT library. # https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,11 +17,9 @@ # limitations under the License. import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING import fire -import torch -import torch.nn as nn from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model from transformers import AutoModelForCausalLM, AutoTokenizer @@ -30,41 +28,20 @@ if TYPE_CHECKING: from transformers import PreTrainedModel -class Shell(nn.Module): - def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None): - super().__init__() - self.weight = nn.Parameter(weight, requires_grad=False) - if bias is not None: - self.bias = nn.Parameter(bias, requires_grad=False) - - -def unwrap_model(model: nn.Module, pattern=".base_layer") -> None: - for name in {k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k}: - parent_name = ".".join(name.split(".")[:-1]) - child_name = name.split(".")[-1] - parent_module = model.get_submodule(parent_name) - child_module = getattr(parent_module, child_name) - base_layer = getattr(child_module, "base_layer") - weight = getattr(base_layer, "weight", None) - bias = getattr(base_layer, "bias", None) - setattr(parent_module, child_name, Shell(weight, bias)) - - print("Model unwrapped.") - - def quantize_loftq( model_name_or_path: str, - save_dir: str, - loftq_bits: Optional[int] = 4, - loftq_iter: Optional[int] = 1, - lora_alpha: Optional[int] = None, - lora_rank: Optional[int] = 16, - lora_target: Optional[str] = "q_proj,v_proj", - save_safetensors: Optional[bool] = False, + output_dir: str, + loftq_bits: int = 4, + loftq_iter: int = 4, + lora_alpha: int = None, + lora_rank: int = 16, + lora_dropout: float = 0, + lora_target: str = "q_proj,v_proj", + save_safetensors: bool = True, ): r""" Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ) - Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir + Usage: python loftq_init.py --model_name_or_path path_to_model --output_dir output_dir """ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") @@ -74,25 +51,34 @@ def quantize_loftq( inference_mode=True, r=lora_rank, lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2, - lora_dropout=0.1, + lora_dropout=lora_dropout, target_modules=[name.strip() for name in lora_target.split(",")], init_lora_weights="loftq", loftq_config=loftq_config, ) # Init LoftQ model - lora_model = get_peft_model(model, lora_config) - base_model: "PreTrainedModel" = lora_model.get_base_model() + print("Initializing LoftQ weights, it may be take several minutes, wait patiently.") + peft_model = get_peft_model(model, lora_config) + loftq_dir = os.path.join(output_dir, "loftq_init") # Save LoftQ model - setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir) - setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True) - lora_model.save_pretrained(os.path.join(save_dir, "adapters"), safe_serialization=save_safetensors) + setattr(peft_model.peft_config["default"], "base_model_name_or_path", output_dir) + setattr(peft_model.peft_config["default"], "init_lora_weights", True) # don't apply loftq again + peft_model.save_pretrained(loftq_dir, safe_serialization=save_safetensors) + print("Adapter weights saved in {}".format(loftq_dir)) # Save base model - unwrap_model(base_model) - base_model.save_pretrained(save_dir, safe_serialization=save_safetensors) - tokenizer.save_pretrained(save_dir) + base_model: "PreTrainedModel" = peft_model.unload() + base_model.save_pretrained(output_dir, safe_serialization=save_safetensors) + tokenizer.save_pretrained(output_dir) + print("Model weights saved in {}".format(output_dir)) + + print("Fine-tune this model with:") + print("model_name_or_path: {}".format(output_dir)) + print("adapter_name_or_path: {}".format(loftq_dir)) + print("finetuning_type: lora") + print("quantization_bit: {}".format(loftq_bits)) if __name__ == "__main__": diff --git a/scripts/pissa_init.py b/scripts/pissa_init.py new file mode 100644 index 00000000..1b673c45 --- /dev/null +++ b/scripts/pissa_init.py @@ -0,0 +1,79 @@ +# coding=utf-8 +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is based on the HuggingFace's PEFT library. +# https://github.com/huggingface/peft/blob/v0.11.0/examples/pissa_finetuning/preprocess.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import TYPE_CHECKING + +import fire +from peft import LoraConfig, TaskType, get_peft_model +from transformers import AutoModelForCausalLM, AutoTokenizer + + +if TYPE_CHECKING: + from transformers import PreTrainedModel + + +def quantize_pissa( + model_name_or_path: str, + output_dir: str, + pissa_iter: int = 4, + lora_alpha: int = None, + lora_rank: int = 16, + lora_dropout: float = 0, + lora_target: str = "q_proj,v_proj", + save_safetensors: bool = True, +): + r""" + Initializes LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA) + Usage: python pissa_init.py --model_name_or_path path_to_model --output_dir output_dir + """ + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) + model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") + lora_config = LoraConfig( + task_type=TaskType.CAUSAL_LM, + r=lora_rank, + lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2, + lora_dropout=lora_dropout, + target_modules=[name.strip() for name in lora_target.split(",")], + init_lora_weights="pissa" if pissa_iter == -1 else "pissa_niter_{}".format(pissa_iter) + ) + + # Init PiSSA model + peft_model = get_peft_model(model, lora_config) + pissa_dir = os.path.join(output_dir, "pissa_init") + + # Save PiSSA model + setattr(peft_model.peft_config["default"], "init_lora_weights", True) # don't apply pissa again + peft_model.save_pretrained(pissa_dir, safe_serialization=save_safetensors) + print("Adapter weights saved in {}".format(pissa_dir)) + + # Save base model + base_model: "PreTrainedModel" = peft_model.unload() + base_model.save_pretrained(output_dir, safe_serialization=save_safetensors) + tokenizer.save_pretrained(output_dir) + print("Model weights saved in {}".format(output_dir)) + + print("Fine-tune this model with:") + print("model_name_or_path: {}".format(output_dir)) + print("adapter_name_or_path: {}".format(pissa_dir)) + print("finetuning_type: lora") + print("pissa_convert: true") + + +if __name__ == "__main__": + fire.Fire(quantize_pissa) diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index 52dc299e..1ef46eca 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -108,6 +108,18 @@ class LoraArguments: default=False, metadata={"help": "Whether or not to use the weight-decomposed lora method (DoRA)."}, ) + pissa_init: bool = field( + default=False, + metadata={"help": "Whether or not to initialize a PiSSA adapter."}, + ) + pissa_iter: int = field( + default=4, + metadata={"help": "The number of iteration steps performed by FSVD in PiSSA. Use -1 to disable it."}, + ) + pissa_convert: bool = field( + default=False, + metadata={"help": "Whether or not to convert the PiSSA adapter to a normal LoRA adapter."}, + ) create_new_adapter: bool = field( default=False, metadata={"help": "Whether or not to create a new adapter with randomly initialized weight."}, @@ -340,7 +352,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA self.additional_target: Optional[List[str]] = split_arg(self.additional_target) self.galore_target: List[str] = split_arg(self.galore_target) self.freeze_vision_tower = self.freeze_vision_tower or self.train_mm_proj_only - self.use_ref_model = self.pref_loss not in ["orpo", "simpo"] + self.use_ref_model = (self.stage == "dpo" and self.pref_loss not in ["orpo", "simpo"]) assert self.finetuning_type in ["lora", "freeze", "full"], "Invalid fine-tuning method." assert self.ref_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization." @@ -367,5 +379,11 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA if self.loraplus_lr_ratio is not None and self.finetuning_type != "lora": raise ValueError("`loraplus_lr_ratio` is only valid for LoRA training.") + if self.pissa_convert and self.finetuning_type != "lora": + raise ValueError("`pissa_convert` is only valid for LoRA training.") + + if self.pissa_convert and (self.stage in ["rm", "ppo", "kto"] or self.use_ref_model): + raise ValueError("Cannot use PiSSA for current training stage.") + if self.train_mm_proj_only and self.finetuning_type != "full": raise ValueError("`train_mm_proj_only` is only valid for full training.") diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 53bdbdf2..996e9130 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -45,6 +45,10 @@ class ModelArguments: ) }, ) + adapter_folder: Optional[str] = field( + default=None, + metadata={"help": "The folder containing the adapter weights to load."}, + ) cache_dir: Optional[str] = field( default=None, metadata={"help": "Where to store the pre-trained models downloaded from huggingface.co or modelscope.cn."}, @@ -150,7 +154,7 @@ class ModelArguments: metadata={"help": "Whether or not to disable CUDA graph in the vLLM engine."}, ) vllm_max_lora_rank: int = field( - default=8, + default=32, metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."}, ) offload_folder: str = field( diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 1c57567c..31a805f6 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -90,6 +90,9 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin if finetuning_args.finetuning_type != "lora": raise ValueError("Quantization is only compatible with the LoRA method.") + if finetuning_args.use_pissa: + raise ValueError("Please use scripts/pissa_init.py for quantized PiSSA.") + if model_args.resize_vocab: raise ValueError("Cannot resize embedding layers of a quantized model.") diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index dfa71525..a8f3a256 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -179,8 +179,16 @@ def _setup_lora_tuning( else: adapter_to_merge = model_args.adapter_name_or_path + init_kwargs = { + "subfolder": model_args.adapter_folder, + "offload_folder": model_args.offload_folder, + "cache_dir": model_args.cache_dir, + "revision": model_args.model_revision, + "token": model_args.hf_hub_token, + } + for adapter in adapter_to_merge: - model: "LoraModel" = PeftModel.from_pretrained(model, adapter, offload_folder=model_args.offload_folder) + model: "LoraModel" = PeftModel.from_pretrained(model, adapter, **init_kwargs) model = model.merge_and_unload() if len(adapter_to_merge) > 0: @@ -190,12 +198,7 @@ def _setup_lora_tuning( if model_args.use_unsloth: model = load_unsloth_peft_model(config, model_args, is_trainable=is_trainable) else: - model = PeftModel.from_pretrained( - model, - adapter_to_resume, - is_trainable=is_trainable, - offload_folder=model_args.offload_folder, - ) + model = PeftModel.from_pretrained(model, adapter_to_resume, is_trainable=is_trainable, **init_kwargs) logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path))) @@ -242,6 +245,14 @@ def _setup_lora_tuning( if model_args.use_unsloth: model = get_unsloth_peft_model(model, model_args, peft_kwargs) else: + if finetuning_args.pissa_init: + if finetuning_args.pissa_iter == -1: + logger.info("Using PiSSA initialization.") + peft_kwargs["init_lora_weights"] = "pissa" + else: + logger.info("Using PiSSA initialization with FSVD steps {}.".format(finetuning_args.pissa_iter)) + peft_kwargs["init_lora_weights"] = "pissa_niter_{}".format(finetuning_args.pissa_iter) + lora_config = LoraConfig( task_type=TaskType.CAUSAL_LM, inference_mode=False, diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index 475d08c3..9928d0bc 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/dpo_trainer.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import warnings from collections import defaultdict from contextlib import nullcontext @@ -28,7 +29,7 @@ from trl import DPOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps +from ..trainer_utils import convert_pissa_adapter, create_custom_optimzer, create_custom_scheduler, get_batch_logps if TYPE_CHECKING: @@ -91,6 +92,9 @@ class CustomDPOTrainer(DPOTrainer): self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) self.ref_model.eval() + if finetuning_args.pissa_convert: + self.save_model(os.path.join(self.args.output_dir, "pissa_init")) + if finetuning_args.use_badam: from badam import clip_grad_norm_for_sparse_tensor @@ -109,8 +113,11 @@ class CustomDPOTrainer(DPOTrainer): def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) + output_dir = output_dir if output_dir is not None else self.args.output_dir + if self.finetuning_args.pissa_convert: + convert_pissa_adapter(output_dir, state_dict, self.accelerator, self.model, self.args) + if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) def odds_ratio_loss(self, chosen_logps: "torch.Tensor", rejected_logps: "torch.Tensor") -> "torch.Tensor": diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py index 09729f2e..f9e04cb5 100644 --- a/src/llamafactory/train/pt/trainer.py +++ b/src/llamafactory/train/pt/trainer.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from types import MethodType from typing import TYPE_CHECKING, Dict, Optional from transformers import Trainer from ...extras.logging import get_logger -from ..trainer_utils import create_custom_optimzer, create_custom_scheduler +from ..trainer_utils import convert_pissa_adapter, create_custom_optimzer, create_custom_scheduler if TYPE_CHECKING: @@ -42,6 +43,10 @@ class CustomTrainer(Trainer): super().__init__(**kwargs) self.finetuning_args = finetuning_args self.processor = processor + + if finetuning_args.pissa_convert: + self.save_model(os.path.join(self.args.output_dir, "pissa_init")) + if finetuning_args.use_badam: from badam import clip_grad_norm_for_sparse_tensor @@ -60,6 +65,9 @@ class CustomTrainer(Trainer): def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) + output_dir = output_dir if output_dir is not None else self.args.output_dir + if self.finetuning_args.pissa_convert: + convert_pissa_adapter(output_dir, state_dict, self.accelerator, self.model, self.args) + if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index 6ab6914e..921e49ab 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer_seq2seq.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,7 +26,7 @@ from transformers import Seq2SeqTrainer from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from ..trainer_utils import create_custom_optimzer, create_custom_scheduler +from ..trainer_utils import convert_pissa_adapter, create_custom_optimzer, create_custom_scheduler if TYPE_CHECKING: @@ -51,6 +51,10 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): super().__init__(**kwargs) self.finetuning_args = finetuning_args self.processor = processor + + if finetuning_args.pissa_convert: + self.save_model(os.path.join(self.args.output_dir, "pissa_init")) + if finetuning_args.use_badam: from badam import clip_grad_norm_for_sparse_tensor @@ -69,8 +73,11 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) + output_dir = output_dir if output_dir is not None else self.args.output_dir + if self.finetuning_args.pissa_convert: + convert_pissa_adapter(output_dir, state_dict, self.accelerator, self.model, self.args) + if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) def prediction_step( diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 5621d5df..2d6bab24 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -1,9 +1,9 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by the GaLore's implementation: https://github.com/jiaweizzhao/GaLore -# and the LoRA+'s implementation: https://github.com/nikhil-ghosh-berkeley/loraplus -# and the BAdam's implementation: https://github.com/Ledzy/BAdam -# and the TRL's implementation: https://github.com/huggingface/trl +# This code is inspired by the original GaLore's implementation: https://github.com/jiaweizzhao/GaLore +# and the original LoRA+'s implementation: https://github.com/nikhil-ghosh-berkeley/loraplus +# and the original BAdam's implementation: https://github.com/Ledzy/BAdam +# and the HuggingFace's TRL library: https://github.com/huggingface/trl # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,9 +17,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import torch +from peft import PeftModel from transformers import Trainer from transformers.optimization import get_scheduler from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS @@ -37,6 +39,7 @@ if is_galore_available(): if TYPE_CHECKING: + from accelerate import Accelerator from transformers import PreTrainedModel, Seq2SeqTrainingArguments from trl import AutoModelForCausalLMWithValueHead @@ -171,6 +174,49 @@ def create_reward_model( return reward_model +def convert_pissa_adapter( + output_dir: str, + state_dict: Dict[str, "torch.Tensor"], + accelerator: "Accelerator", + model: "PreTrainedModel", + training_args: "Seq2SeqTrainingArguments", +) -> None: + r""" + Converts the PiSSA adapter to a LoRA adapter. + """ + pissa_init_dir = os.path.join(training_args.output_dir, "pissa_init") + pissa_backup_dir = os.path.join(output_dir, "pissa_backup") + if output_dir == pissa_init_dir: + logger.info("Initial PiSSA adatper will be saved at: {}.".format(pissa_init_dir)) + unwrapped_model = accelerator.unwrap_model(model) + if isinstance(unwrapped_model, PeftModel): + init_lora_weights = getattr(unwrapped_model.peft_config["default"], "init_lora_weights") + setattr(unwrapped_model.peft_config["default"], "init_lora_weights", True) + unwrapped_model.save_pretrained( + output_dir, + state_dict=state_dict, + safe_serialization=training_args.save_safetensors, + ) + setattr(unwrapped_model.peft_config["default"], "init_lora_weights", init_lora_weights) + elif output_dir == training_args.output_dir: # at the end of training + logger.info("Converted PiSSA adapter will be saved at: {}.".format(output_dir)) + unwrapped_model = accelerator.unwrap_model(model) + if isinstance(unwrapped_model, PeftModel): # backup the pissa adapter for further use + unwrapped_model.save_pretrained( + pissa_backup_dir, + state_dict=state_dict, + safe_serialization=training_args.save_safetensors, + ) + unwrapped_model.save_pretrained( + output_dir, + state_dict=state_dict, + safe_serialization=training_args.save_safetensors, + convert_pissa_to_lora=pissa_init_dir, + ) + unwrapped_model.load_adapter(pissa_backup_dir, "default", is_trainable=True) + unwrapped_model.set_adapter("default") + + def _get_decay_parameter_names(model: "PreTrainedModel") -> List[str]: r""" Returns a list of names of parameters with weight decay. (weights in non-layernorm layers) diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 673f6bf4..874f3c5e 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -163,10 +163,9 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: create_new_adapter = gr.Checkbox() with gr.Row(): - with gr.Column(scale=1): - use_rslora = gr.Checkbox() - use_dora = gr.Checkbox() - + use_rslora = gr.Checkbox() + use_dora = gr.Checkbox() + use_pissa = gr.Checkbox() lora_target = gr.Textbox(scale=2) additional_target = gr.Textbox(scale=2) @@ -179,6 +178,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: create_new_adapter, use_rslora, use_dora, + use_pissa, lora_target, additional_target, } @@ -193,6 +193,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: create_new_adapter=create_new_adapter, use_rslora=use_rslora, use_dora=use_dora, + use_pissa=use_pissa, lora_target=lora_target, additional_target=additional_target, ) diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index 427f01b8..8e8d6fce 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -732,6 +732,20 @@ LOCALES = { "info": "使用权重分解的 LoRA。", }, }, + "use_pissa": { + "en": { + "label": "Use PiSSA", + "info": "Use PiSSA method.", + }, + "ru": { + "label": "используйте PiSSA", + "info": "Используйте метод PiSSA.", + }, + "zh": { + "label": "使用 PiSSA", + "info": "使用 PiSSA 方法。", + }, + }, "lora_target": { "en": { "label": "LoRA modules (optional)", diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 76982934..13dbba03 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -173,6 +173,8 @@ class Runner: args["create_new_adapter"] = get("train.create_new_adapter") args["use_rslora"] = get("train.use_rslora") args["use_dora"] = get("train.use_dora") + args["pissa_init"] = get("train.use_pissa") + args["pissa_convert"] = get("train.use_pissa") args["lora_target"] = get("train.lora_target") or "all" args["additional_target"] = get("train.additional_target") or None diff --git a/tests/model/test_pissa.py b/tests/model/test_pissa.py new file mode 100644 index 00000000..70c424fd --- /dev/null +++ b/tests/model/test_pissa.py @@ -0,0 +1,90 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch +from peft import LoraModel, PeftModel +from transformers import AutoModelForCausalLM + +from llamafactory.extras.misc import get_current_device +from llamafactory.hparams import get_infer_args, get_train_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") + +TINY_LLAMA_PISSA = os.environ.get("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-pissa") + +TRAIN_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "lora", + "pissa_init": True, + "pissa_iter": -1, + "dataset": "llamafactory/tiny-supervised-dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA_PISSA, + "adapter_name_or_path": TINY_LLAMA_PISSA, + "adapter_folder": "pissa_init", + "finetuning_type": "lora", + "template": "llama3", + "infer_dtype": "float16", +} + + +def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): + state_dict_a = model_a.state_dict() + state_dict_b = model_b.state_dict() + assert set(state_dict_a.keys()) == set(state_dict_b.keys()) + for name in state_dict_a.keys(): + assert torch.allclose(state_dict_a[name], state_dict_b[name]) + + +def test_pissa_init(): + model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA_PISSA, torch_dtype=torch.float16, device_map=get_current_device() + ) + ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_PISSA, subfolder="pissa_init", is_trainable=True) + for param in filter(lambda p: p.requires_grad, ref_model.parameters()): + param.data = param.data.to(torch.float32) + + compare_model(model, ref_model) + + +def test_pissa_inference(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA_PISSA, torch_dtype=torch.float16, device_map=get_current_device() + ) + ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_PISSA, subfolder="pissa_init") + ref_model = ref_model.merge_and_unload() + compare_model(model, ref_model) From bf46a8ca3e47057cb481077da778ca77f9f94a7f Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 16 Jun 2024 01:21:06 +0800 Subject: [PATCH 157/162] increase tol Former-commit-id: c29071445e34aed23123fdf883a4d877744a1b0e --- tests/model/test_pissa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/model/test_pissa.py b/tests/model/test_pissa.py index 70c424fd..41d02752 100644 --- a/tests/model/test_pissa.py +++ b/tests/model/test_pissa.py @@ -59,7 +59,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): state_dict_b = model_b.state_dict() assert set(state_dict_a.keys()) == set(state_dict_b.keys()) for name in state_dict_a.keys(): - assert torch.allclose(state_dict_a[name], state_dict_b[name]) + assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-3, atol=1e-3) def test_pissa_init(): From 1a66bb1438a5dfe86eac73662002a4ea8fee5207 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 16 Jun 2024 01:22:23 +0800 Subject: [PATCH 158/162] Update tests.yml Former-commit-id: 82e83615a706293abbf266d11c57caedafdd4c5b --- .github/workflows/tests.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 96092662..98bd9455 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -9,8 +9,6 @@ on: - "requirements.txt" - ".github/workflows/*.yml" pull_request: - types: - - review_requested branches: - main paths: From d5a0cc93a26e45ed81178ad614259de03bdeb662 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 16 Jun 2024 01:38:44 +0800 Subject: [PATCH 159/162] fix tol Former-commit-id: bdb54bcb477126687db789bd89f2df84e424a2a3 --- src/llamafactory/train/trainer_utils.py | 1 + tests/model/test_base.py | 2 +- tests/model/test_lora.py | 4 ++-- tests/model/test_pissa.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 2d6bab24..9052c96d 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -213,6 +213,7 @@ def convert_pissa_adapter( safe_serialization=training_args.save_safetensors, convert_pissa_to_lora=pissa_init_dir, ) + # TODO: the model is applied pissa again unexpectedly unwrapped_model.load_adapter(pissa_backup_dir, "default", is_trainable=True) unwrapped_model.set_adapter("default") diff --git a/tests/model/test_base.py b/tests/model/test_base.py index 954492ef..e1991b20 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -41,7 +41,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): state_dict_b = model_b.state_dict() assert set(state_dict_a.keys()) == set(state_dict_b.keys()) for name in state_dict_a.keys(): - assert torch.allclose(state_dict_a[name], state_dict_b[name]) + assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-4, atol=1e-5) @pytest.fixture diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index fe032332..64566fe8 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -67,9 +67,9 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k assert set(state_dict_a.keys()) == set(state_dict_b.keys()) for name in state_dict_a.keys(): if any(key in name for key in diff_keys): - assert torch.allclose(state_dict_a[name], state_dict_b[name]) is False + assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-4, atol=1e-5) is False else: - assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True + assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-4, atol=1e-5) is True @pytest.fixture diff --git a/tests/model/test_pissa.py b/tests/model/test_pissa.py index 41d02752..030310d0 100644 --- a/tests/model/test_pissa.py +++ b/tests/model/test_pissa.py @@ -59,7 +59,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): state_dict_b = model_b.state_dict() assert set(state_dict_a.keys()) == set(state_dict_b.keys()) for name in state_dict_a.keys(): - assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-3, atol=1e-3) + assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-4, atol=1e-5) def test_pissa_init(): From 18af6f0d3b757c4b15e55cb665baa7c05147d120 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 16 Jun 2024 01:43:43 +0800 Subject: [PATCH 160/162] update pr template Former-commit-id: 0b7c29674fda10c0ac87e0a0c75990feabb5a3de --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index b31e9d19..d23d6be3 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -5,3 +5,4 @@ Fixes # (issue) ## Before submitting - [ ] Did you read the [contributor guideline](https://github.com/hiyouga/LLaMA-Factory/blob/main/.github/CONTRIBUTING.md)? +- [ ] Did you write any new necessary tests? From 6b729cccb9f231aa60944f734bd100d2a06cb83f Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sun, 16 Jun 2024 02:57:00 +0800 Subject: [PATCH 161/162] Update parser.py Former-commit-id: d10c97193d08bd368aca1a72f0d1d8a96c76765d --- src/llamafactory/hparams/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 31a805f6..467fc43d 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -90,8 +90,8 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin if finetuning_args.finetuning_type != "lora": raise ValueError("Quantization is only compatible with the LoRA method.") - if finetuning_args.use_pissa: - raise ValueError("Please use scripts/pissa_init.py for quantized PiSSA.") + if finetuning_args.pissa_init: + raise ValueError("Please use scripts/pissa_init.py to initialize PiSSA for a quantized model.") if model_args.resize_vocab: raise ValueError("Cannot resize embedding layers of a quantized model.") From aea2e4083fe4117a8b394b8ad649d4838dd31e54 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 17 Jun 2024 17:47:25 +0800 Subject: [PATCH 162/162] tiny fix Former-commit-id: 2289436567a7860d25d9da0afb39e4a3e5e83839 --- examples/README.md | 14 ++++---- examples/README_zh.md | 14 ++++---- .../fsdp_qlora/{single_node.sh => train.sh} | 0 scripts/llama_pro.py | 2 +- scripts/loftq_init.py | 2 +- scripts/pissa_init.py | 5 ++- tests/model/test_lora.py | 34 +++++++------------ 7 files changed, 32 insertions(+), 39 deletions(-) rename examples/extras/fsdp_qlora/{single_node.sh => train.sh} (100%) diff --git a/examples/README.md b/examples/README.md index 902d26b1..007a81ab 100644 --- a/examples/README.md +++ b/examples/README.md @@ -195,6 +195,12 @@ llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml ``` +#### PiSSA Fine-Tuning + +```bash +llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml +``` + #### Mixture-of-Depths Fine-Tuning ```bash @@ -211,11 +217,5 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml #### FSDP+QLoRA Fine-Tuning ```bash -bash examples/extras/fsdp_qlora/single_node.sh -``` - -#### PiSSA Fine-Tuning - -```bash -llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml +bash examples/extras/fsdp_qlora/train.sh ``` diff --git a/examples/README_zh.md b/examples/README_zh.md index 586e498c..b9d90f25 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -195,6 +195,12 @@ llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml ``` +#### PiSSA 微调 + +```bash +llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml +``` + #### 深度混合微调 ```bash @@ -211,11 +217,5 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml #### FSDP+QLoRA 微调 ```bash -bash examples/extras/fsdp_qlora/single_node.sh -``` - -#### PiSSA 微调 - -```bash -llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml +bash examples/extras/fsdp_qlora/train.sh ``` diff --git a/examples/extras/fsdp_qlora/single_node.sh b/examples/extras/fsdp_qlora/train.sh similarity index 100% rename from examples/extras/fsdp_qlora/single_node.sh rename to examples/extras/fsdp_qlora/train.sh diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py index 395375ef..17bf6fc2 100644 --- a/scripts/llama_pro.py +++ b/scripts/llama_pro.py @@ -120,7 +120,7 @@ def block_expansion( json.dump(index, f, indent=2, sort_keys=True) print("Model weights saved in {}".format(output_dir)) - print("Fine-tune this model with:") + print("- Fine-tune this model with:") print("model_name_or_path: {}".format(output_dir)) print("finetuning_type: freeze") print("freeze_trainable_layers: {}".format(num_expand)) diff --git a/scripts/loftq_init.py b/scripts/loftq_init.py index 556f342c..b9506fa3 100644 --- a/scripts/loftq_init.py +++ b/scripts/loftq_init.py @@ -74,7 +74,7 @@ def quantize_loftq( tokenizer.save_pretrained(output_dir) print("Model weights saved in {}".format(output_dir)) - print("Fine-tune this model with:") + print("- Fine-tune this model with:") print("model_name_or_path: {}".format(output_dir)) print("adapter_name_or_path: {}".format(loftq_dir)) print("finetuning_type: lora") diff --git a/scripts/pissa_init.py b/scripts/pissa_init.py index 1b673c45..10b81efc 100644 --- a/scripts/pissa_init.py +++ b/scripts/pissa_init.py @@ -68,11 +68,14 @@ def quantize_pissa( tokenizer.save_pretrained(output_dir) print("Model weights saved in {}".format(output_dir)) - print("Fine-tune this model with:") + print("- Fine-tune this model with:") print("model_name_or_path: {}".format(output_dir)) print("adapter_name_or_path: {}".format(pissa_dir)) print("finetuning_type: lora") + print("pissa_init: false") print("pissa_convert: true") + print("- and optionally with:") + print("quantization_bit: 4") if __name__ == "__main__": diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index 64566fe8..630e5f75 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -56,9 +56,15 @@ INFER_ARGS = { } -def load_reference_model() -> "torch.nn.Module": - model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA) - return PeftModel.from_pretrained(model, TINY_LLAMA_ADAPTER) +def load_reference_model(is_trainable: bool = False) -> "LoraModel": + model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) + lora_model = PeftModel.from_pretrained(model, TINY_LLAMA_ADAPTER, is_trainable=is_trainable) + for param in filter(lambda p: p.requires_grad, lora_model.parameters()): + param.data = param.data.to(torch.float32) + + return lora_model def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_keys: Sequence[str] = []): @@ -148,13 +154,7 @@ def test_lora_train_old_adapters(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) - base_model = AutoModelForCausalLM.from_pretrained( - TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() - ) - ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) - for param in filter(lambda p: p.requires_grad, ref_model.parameters()): - param.data = param.data.to(torch.float32) - + ref_model = load_reference_model(is_trainable=True) compare_model(model, ref_model) @@ -165,13 +165,7 @@ def test_lora_train_new_adapters(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) - base_model = AutoModelForCausalLM.from_pretrained( - TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() - ) - ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) - for param in filter(lambda p: p.requires_grad, ref_model.parameters()): - param.data = param.data.to(torch.float32) - + ref_model = load_reference_model(is_trainable=True) compare_model( model, ref_model, diff_keys=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"] ) @@ -200,9 +194,5 @@ def test_lora_inference(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) - base_model = AutoModelForCausalLM.from_pretrained( - TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() - ) - ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER) - ref_model = ref_model.merge_and_unload() + ref_model = load_reference_model().merge_and_unload() compare_model(model, ref_model)