From 5e1cb05b95b8a5801edc1aa8e0894bd6bceb6435 Mon Sep 17 00:00:00 2001
From: zhangzc <2608882093@qq.com>
Date: Wed, 27 Mar 2024 14:22:50 +0800
Subject: [PATCH 001/162] Supports custom data set sampling quantity

Former-commit-id: fa8325401df27595de4611a89dfcc14644956abd
---
 data/README.md              |  5 +++--
 data/README_zh.md           |  3 ++-
 src/llmtuner/data/loader.py | 13 +++++++++++++
 src/llmtuner/data/parser.py |  4 +++-
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/data/README.md b/data/README.md
index fa2c9ee0..c4a1b298 100644
--- a/data/README.md
+++ b/data/README.md
@@ -27,8 +27,9 @@ If you are using a custom dataset, please provide your dataset definition in the
     "assistant_tag": "the value of the role_tag represents the assistant. (default: gpt)",
     "observation_tag": "the value of the role_tag represents the tool results. (default: observation)",
     "function_tag": "the value of the role_tag represents the function call. (default: function_call)",
-    "system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)"
-  }
+    "system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)",
+  },
+  "sample_num": "the number of samples from this dataset can be greater than the total amount of the dataset. (default: None)"
 }
 ```
 
diff --git a/data/README_zh.md b/data/README_zh.md
index e0004f4a..6396688a 100644
--- a/data/README_zh.md
+++ b/data/README_zh.md
@@ -28,7 +28,8 @@
     "observation_tag": "消息中代表工具返回结果的 role_tag（默认：observation）",
     "function_tag": "消息中代表工具调用的 role_tag（默认：function_call）",
     "system_tag": "消息中代表系统提示的 role_tag（默认：system，会覆盖 system 列）"
-  }
+  },
+  "sample_num": "从该数据集采样的数量，可大于该数据集总量（默认：None）"
 }
 ```
 
diff --git a/src/llmtuner/data/loader.py b/src/llmtuner/data/loader.py
index 935695ad..bebe5718 100644
--- a/src/llmtuner/data/loader.py
+++ b/src/llmtuner/data/loader.py
@@ -1,5 +1,7 @@
 import inspect
 import os
+import numpy as np
+from numpy.random import RandomState
 from typing import TYPE_CHECKING, Literal, Union
 
 from datasets import load_dataset, load_from_disk
@@ -108,6 +110,17 @@ def load_single_dataset(
         num_samples = min(data_args.max_samples, len(dataset))
         dataset = dataset.select(range(num_samples))
 
+    if dataset_attr.sample_num:
+        dataset_sample_num = dataset_attr.sample_num
+        logger.info(f"从 {dataset_attr.dataset_name} 采样 {dataset_sample_num} 条训练样本")
+        random_state = RandomState(42)
+        idx = random_state.permutation(len(dataset))[:dataset_sample_num]
+        dataset_sample_num -= len(idx)
+        if dataset_sample_num > 0:
+            idx2 = random_state.choice(len(dataset), dataset_sample_num)
+            idx = np.concatenate([idx, idx2], axis=0)
+        dataset = dataset.select(idx)
+
     return align_dataset(dataset, dataset_attr, data_args)
 
 
diff --git a/src/llmtuner/data/parser.py b/src/llmtuner/data/parser.py
index 861396a0..9746b5b2 100644
--- a/src/llmtuner/data/parser.py
+++ b/src/llmtuner/data/parser.py
@@ -44,6 +44,7 @@ class DatasetAttr:
     observation_tag: Optional[str] = "observation"
     function_tag: Optional[str] = "function_call"
     system_tag: Optional[str] = "system"
+    sample_num: Optional[int] = None
 
     def __repr__(self) -> str:
         return self.dataset_name
@@ -90,7 +91,8 @@ def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]:
         dataset_attr.set_attr("folder", dataset_info[name])
         dataset_attr.set_attr("ranking", dataset_info[name], default=False)
         dataset_attr.set_attr("formatting", dataset_info[name], default="alpaca")
-
+        dataset_attr.set_attr("sample_num", dataset_info[name])
+        
         if "columns" in dataset_info[name]:
             column_names = ["system"]
             if dataset_attr.formatting == "alpaca":

From aaadaa18f6d276db026037ce4718b8745420d380 Mon Sep 17 00:00:00 2001
From: BUAADreamer <1428195643@qq.com>
Date: Tue, 21 May 2024 08:57:14 +0800
Subject: [PATCH 002/162] support pretraining of llava

Former-commit-id: 6a4c8cf0a6a1674c693b9337f018ff8df7477f8f
---
 data/dataset_info.json                 | 14 ++++
 data/mllm_pt_demo.json                 | 92 ++++++++++++++++++++++++++
 src/llamafactory/hparams/model_args.py |  4 ++
 src/llamafactory/model/loader.py       |  5 ++
 4 files changed, 115 insertions(+)
 create mode 100644 data/mllm_pt_demo.json

diff --git a/data/dataset_info.json b/data/dataset_info.json
index b985582e..5a90e077 100644
--- a/data/dataset_info.json
+++ b/data/dataset_info.json
@@ -38,6 +38,20 @@
       "assistant_tag": "assistant"
     }
   },
+  "mllm_pt_demo": {
+    "file_name": "mllm_pt_demo.json",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "messages",
+      "images": "images"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "user",
+      "assistant_tag": "assistant"
+    }
+  },
   "alpaca_en": {
     "hf_hub_url": "llamafactory/alpaca_en",
     "ms_hub_url": "llamafactory/alpaca_en"
diff --git a/data/mllm_pt_demo.json b/data/mllm_pt_demo.json
new file mode 100644
index 00000000..2ee01ce6
--- /dev/null
+++ b/data/mllm_pt_demo.json
@@ -0,0 +1,92 @@
+[
+  {
+    "messages": [
+      {
+        "content": "Render a clear and concise summary of the photo.",
+        "role": "user"
+      },
+      {
+        "content": "There are two soccer players on the field.",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/1.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "Write a terse but informative summary of the picture.",
+        "role": "user"
+      },
+      {
+        "content": "A soccer player is sliding on his knees to celebrate",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/2.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "What is this?",
+        "role": "user"
+      },
+      {
+        "content": "A man is giving a speech.",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/3.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "对照片进行简明扼要的概括。",
+        "role": "user"
+      },
+      {
+        "content": "两个足球运动员在场上",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/1.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "为图片写一个简短但内容丰富的摘要。",
+        "role": "user"
+      },
+      {
+        "content": "一个足球运动员在跪地滑行庆祝",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/2.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "这是什么？",
+        "role": "user"
+      },
+      {
+        "content": "一个男人在演讲",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/3.jpg"
+    ]
+  }
+]
\ No newline at end of file
diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 5885bb09..255051dc 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -85,6 +85,10 @@ class ModelArguments:
         default=False,
         metadata={"help": "Whethor or not to use multimodal LLM that accepts visual inputs."},
     )
+    tune_mm_proj: bool = field(
+        default=False,
+        metadata={"help": "Whethor or not only finetune mm_projector for MLLM."},
+    )
     moe_aux_loss_coef: Optional[float] = field(
         default=None,
         metadata={"help": "Coefficient of the auxiliary router loss in mixture-of-experts model."},
diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py
index 49b347d5..d9784593 100644
--- a/src/llamafactory/model/loader.py
+++ b/src/llamafactory/model/loader.py
@@ -163,6 +163,11 @@ def load_model(
     else:
         model.train()
 
+    if model_args.visual_inputs and model_args.tune_mm_proj:
+        lm_params = [param for name, param in model.named_parameters() if "language_model" in name]
+        for param in lm_params:
+            param.requires_grad_(False)
+
     trainable_params, all_param = count_parameters(model)
     if is_trainable:
         param_stats = "trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format(

From 765cd370daba83dcbdaec0f99b9a86e7baaa6708 Mon Sep 17 00:00:00 2001
From: BUAADreamer <1428195643@qq.com>
Date: Mon, 27 May 2024 18:59:00 +0800
Subject: [PATCH 003/162] add regex of only tune lm and mm_proj

Former-commit-id: 38d540b3e69bceabafafab524fcfc78aeb05612d
---
 sites/paligemma-pt.yaml                | 49 ++++++++++++++++++++++++++
 sites/paligemma.yaml                   | 49 ++++++++++++++++++++++++++
 sites/paligemma_lora.yaml              | 40 +++++++++++++++++++++
 src/llamafactory/model/adapter.py      |  7 ++++
 src/llamafactory/model/loader.py       |  5 ---
 src/llamafactory/model/utils/visual.py |  7 +++-
 6 files changed, 151 insertions(+), 6 deletions(-)
 create mode 100644 sites/paligemma-pt.yaml
 create mode 100644 sites/paligemma.yaml
 create mode 100644 sites/paligemma_lora.yaml

diff --git a/sites/paligemma-pt.yaml b/sites/paligemma-pt.yaml
new file mode 100644
index 00000000..4305cf5f
--- /dev/null
+++ b/sites/paligemma-pt.yaml
@@ -0,0 +1,49 @@
+# model
+model_name_or_path: google/paligemma-3b-mix-448
+visual_inputs: true
+tune_mm_proj: true
+#print_param_status: true
+
+# method
+stage: sft
+do_train: true
+finetuning_type: full
+
+# ddp
+ddp_timeout: 180000000
+deepspeed: examples/deepspeed/ds_z2_offload_config.json
+
+# dataset
+dataset: mllm_pt_demo
+dataset_dir: data
+template: gemma
+cutoff_len: 2048
+max_samples: 3
+#val_size: 0.0001
+overwrite_cache: true
+preprocessing_num_workers: 16
+
+# output
+output_dir: saves/paligemma/full/sft_llava_pt_test
+logging_steps: 1
+save_steps: 50
+plot_loss: true
+overwrite_output_dir: true
+#save_strategy: epoch
+#save_total_limit: 2
+
+# train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 16
+learning_rate: 0.00001
+num_train_epochs: 100
+lr_scheduler_type: cosine
+warmup_steps: 0.1
+#bf16: true
+pure_bf16: true
+
+# eval
+do_eval: false
+#per_device_eval_batch_size: 1
+#evaluation_strategy: steps
+#eval_steps: 500
diff --git a/sites/paligemma.yaml b/sites/paligemma.yaml
new file mode 100644
index 00000000..f3257cfc
--- /dev/null
+++ b/sites/paligemma.yaml
@@ -0,0 +1,49 @@
+# model
+model_name_or_path: google/paligemma-3b-mix-448
+visual_inputs: true
+#print_param_status: true
+use_fast_tokenizer: false
+
+# method
+stage: sft
+do_train: true
+finetuning_type: full
+
+# ddp
+ddp_timeout: 180000000
+deepspeed: examples/deepspeed/ds_z2_offload_config.json
+
+# dataset
+dataset: mllm_demo
+dataset_dir: data
+template: gemma
+cutoff_len: 2048
+max_samples: 3
+#val_size: 0.0001
+overwrite_cache: true
+preprocessing_num_workers: 16
+
+# output
+output_dir: saves/paligemma/full/sft_llava_1k
+logging_steps: 1
+save_steps: 50
+plot_loss: true
+overwrite_output_dir: true
+#save_strategy: epoch
+#save_total_limit: 2
+
+# train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 16
+learning_rate: 0.00001
+num_train_epochs: 100
+lr_scheduler_type: cosine
+warmup_steps: 0.1
+#bf16: true
+pure_bf16: true
+
+# eval
+do_eval: false
+#per_device_eval_batch_size: 1
+#evaluation_strategy: steps
+#eval_steps: 500
diff --git a/sites/paligemma_lora.yaml b/sites/paligemma_lora.yaml
new file mode 100644
index 00000000..0693a6ae
--- /dev/null
+++ b/sites/paligemma_lora.yaml
@@ -0,0 +1,40 @@
+### model
+model_name_or_path: google/paligemma-3b-mix-448
+visual_inputs: true
+use_fast_tokenizer: false
+
+### method
+stage: sft
+do_train: true
+finetuning_type: lora
+lora_target: q_proj,v_proj
+
+### dataset
+dataset: mllm_demo
+template: gemma
+cutoff_len: 1024
+max_samples: 1000
+overwrite_cache: true
+preprocessing_num_workers: 16
+
+### output
+output_dir: saves/paligemma/lora/sft_mllm
+logging_steps: 10
+save_steps: 500
+plot_loss: true
+overwrite_output_dir: true
+
+### train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 8
+learning_rate: 0.0001
+num_train_epochs: 3.0
+lr_scheduler_type: cosine
+warmup_steps: 0.1
+fp16: true
+
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+evaluation_strategy: steps
+eval_steps: 500
diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py
index f37f3bbb..015db8a0 100644
--- a/src/llamafactory/model/adapter.py
+++ b/src/llamafactory/model/adapter.py
@@ -10,6 +10,7 @@ from ..extras.logging import get_logger
 from .utils.misc import find_all_linear_modules, find_expanded_modules
 from .utils.quantization import QuantizationMethod
 from .utils.unsloth import get_unsloth_peft_model, load_unsloth_peft_model
+from .utils.visual import filter_vision_tower_linear
 
 
 if TYPE_CHECKING:
@@ -58,6 +59,9 @@ def init_adapter(
         if model_args.visual_inputs and hasattr(model, "vision_tower"):  # freeze vision model
             model.vision_tower.requires_grad_(False)
 
+        if model_args.visual_inputs and hasattr(model, "language_model") and model_args.tune_mm_proj:  # freeze language model if only tune mm_proj
+            model.language_model.requires_grad_(False)
+
     if finetuning_args.finetuning_type == "freeze" and is_trainable:
         logger.info("Fine-tuning method: Freeze")
         num_layers = (
@@ -180,6 +184,9 @@ def init_adapter(
             if finetuning_args.use_llama_pro:
                 target_modules = find_expanded_modules(model, target_modules, finetuning_args.num_layer_trainable)
 
+            if model_args.visual_inputs:
+                target_modules = filter_vision_tower_linear(target_modules)
+
             if (
                 finetuning_args.use_dora
                 and getattr(model, "quantization_method", None) is not None
diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py
index d9784593..49b347d5 100644
--- a/src/llamafactory/model/loader.py
+++ b/src/llamafactory/model/loader.py
@@ -163,11 +163,6 @@ def load_model(
     else:
         model.train()
 
-    if model_args.visual_inputs and model_args.tune_mm_proj:
-        lm_params = [param for name, param in model.named_parameters() if "language_model" in name]
-        for param in lm_params:
-            param.requires_grad_(False)
-
     trainable_params, all_param = count_parameters(model)
     if is_trainable:
         param_stats = "trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format(
diff --git a/src/llamafactory/model/utils/visual.py b/src/llamafactory/model/utils/visual.py
index c8260b7f..a91777ba 100644
--- a/src/llamafactory/model/utils/visual.py
+++ b/src/llamafactory/model/utils/visual.py
@@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Tuple
+from typing import TYPE_CHECKING, Tuple, List
 
 import torch
 import transformers.models
@@ -82,3 +82,8 @@ def configure_visual_model(config: "PretrainedConfig") -> None:
     if getattr(config, "is_yi_vl_derived_model", None):
         logger.info("Detected Yi-VL model, applying projector patch.")
         transformers.models.llava.modeling_llava.LlavaMultiModalProjector = LlavaMultiModalProjectorForYiVL
+
+
+def filter_vision_tower_linear(target_modules: List[str]) -> str:
+    target_modules = f"^(?!.*vision_tower).*(?:{'|'.join(target_modules)}).*"
+    return target_modules

From f67e4f14ab22d0c5215e3fd76cad31a2bd3dda74 Mon Sep 17 00:00:00 2001
From: BUAADreamer <1428195643@qq.com>
Date: Mon, 27 May 2024 19:00:15 +0800
Subject: [PATCH 004/162] add only tune lm and mm_proj

Former-commit-id: ba12ca430ec527fbfe4cd1eace0adb5c7712146a
---
 sites/paligemma-pt.yaml   | 49 ---------------------------------------
 sites/paligemma.yaml      | 49 ---------------------------------------
 sites/paligemma_lora.yaml | 40 --------------------------------
 3 files changed, 138 deletions(-)
 delete mode 100644 sites/paligemma-pt.yaml
 delete mode 100644 sites/paligemma.yaml
 delete mode 100644 sites/paligemma_lora.yaml

diff --git a/sites/paligemma-pt.yaml b/sites/paligemma-pt.yaml
deleted file mode 100644
index 4305cf5f..00000000
--- a/sites/paligemma-pt.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-# model
-model_name_or_path: google/paligemma-3b-mix-448
-visual_inputs: true
-tune_mm_proj: true
-#print_param_status: true
-
-# method
-stage: sft
-do_train: true
-finetuning_type: full
-
-# ddp
-ddp_timeout: 180000000
-deepspeed: examples/deepspeed/ds_z2_offload_config.json
-
-# dataset
-dataset: mllm_pt_demo
-dataset_dir: data
-template: gemma
-cutoff_len: 2048
-max_samples: 3
-#val_size: 0.0001
-overwrite_cache: true
-preprocessing_num_workers: 16
-
-# output
-output_dir: saves/paligemma/full/sft_llava_pt_test
-logging_steps: 1
-save_steps: 50
-plot_loss: true
-overwrite_output_dir: true
-#save_strategy: epoch
-#save_total_limit: 2
-
-# train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 16
-learning_rate: 0.00001
-num_train_epochs: 100
-lr_scheduler_type: cosine
-warmup_steps: 0.1
-#bf16: true
-pure_bf16: true
-
-# eval
-do_eval: false
-#per_device_eval_batch_size: 1
-#evaluation_strategy: steps
-#eval_steps: 500
diff --git a/sites/paligemma.yaml b/sites/paligemma.yaml
deleted file mode 100644
index f3257cfc..00000000
--- a/sites/paligemma.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-# model
-model_name_or_path: google/paligemma-3b-mix-448
-visual_inputs: true
-#print_param_status: true
-use_fast_tokenizer: false
-
-# method
-stage: sft
-do_train: true
-finetuning_type: full
-
-# ddp
-ddp_timeout: 180000000
-deepspeed: examples/deepspeed/ds_z2_offload_config.json
-
-# dataset
-dataset: mllm_demo
-dataset_dir: data
-template: gemma
-cutoff_len: 2048
-max_samples: 3
-#val_size: 0.0001
-overwrite_cache: true
-preprocessing_num_workers: 16
-
-# output
-output_dir: saves/paligemma/full/sft_llava_1k
-logging_steps: 1
-save_steps: 50
-plot_loss: true
-overwrite_output_dir: true
-#save_strategy: epoch
-#save_total_limit: 2
-
-# train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 16
-learning_rate: 0.00001
-num_train_epochs: 100
-lr_scheduler_type: cosine
-warmup_steps: 0.1
-#bf16: true
-pure_bf16: true
-
-# eval
-do_eval: false
-#per_device_eval_batch_size: 1
-#evaluation_strategy: steps
-#eval_steps: 500
diff --git a/sites/paligemma_lora.yaml b/sites/paligemma_lora.yaml
deleted file mode 100644
index 0693a6ae..00000000
--- a/sites/paligemma_lora.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-### model
-model_name_or_path: google/paligemma-3b-mix-448
-visual_inputs: true
-use_fast_tokenizer: false
-
-### method
-stage: sft
-do_train: true
-finetuning_type: lora
-lora_target: q_proj,v_proj
-
-### dataset
-dataset: mllm_demo
-template: gemma
-cutoff_len: 1024
-max_samples: 1000
-overwrite_cache: true
-preprocessing_num_workers: 16
-
-### output
-output_dir: saves/paligemma/lora/sft_mllm
-logging_steps: 10
-save_steps: 500
-plot_loss: true
-overwrite_output_dir: true
-
-### train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 8
-learning_rate: 0.0001
-num_train_epochs: 3.0
-lr_scheduler_type: cosine
-warmup_steps: 0.1
-fp16: true
-
-### eval
-val_size: 0.1
-per_device_eval_batch_size: 1
-evaluation_strategy: steps
-eval_steps: 500

From 136e64081f2a994d61bdbc92fadd90a5ae63a500 Mon Sep 17 00:00:00 2001
From: BUAADreamer <1428195643@qq.com>
Date: Mon, 27 May 2024 20:10:31 +0800
Subject: [PATCH 005/162] remove mllm_pt_demo.json

Former-commit-id: 5402589f021056f9c9e7b68421282039a508d5b9
---
 data/dataset_info.json |  2 +-
 data/mllm_pt_demo.json | 92 ------------------------------------------
 2 files changed, 1 insertion(+), 93 deletions(-)
 delete mode 100644 data/mllm_pt_demo.json

diff --git a/data/dataset_info.json b/data/dataset_info.json
index 5a90e077..1deb3d6d 100644
--- a/data/dataset_info.json
+++ b/data/dataset_info.json
@@ -39,7 +39,7 @@
     }
   },
   "mllm_pt_demo": {
-    "file_name": "mllm_pt_demo.json",
+    "hf_hub_url": "BUAADreamer/mllm_pt_demo",
     "formatting": "sharegpt",
     "columns": {
       "messages": "messages",
diff --git a/data/mllm_pt_demo.json b/data/mllm_pt_demo.json
deleted file mode 100644
index 2ee01ce6..00000000
--- a/data/mllm_pt_demo.json
+++ /dev/null
@@ -1,92 +0,0 @@
-[
-  {
-    "messages": [
-      {
-        "content": "Render a clear and concise summary of the photo.",
-        "role": "user"
-      },
-      {
-        "content": "There are two soccer players on the field.",
-        "role": "assistant"
-      }
-    ],
-    "images": [
-      "mllm_demo_data/1.jpg"
-    ]
-  },
-  {
-    "messages": [
-      {
-        "content": "Write a terse but informative summary of the picture.",
-        "role": "user"
-      },
-      {
-        "content": "A soccer player is sliding on his knees to celebrate",
-        "role": "assistant"
-      }
-    ],
-    "images": [
-      "mllm_demo_data/2.jpg"
-    ]
-  },
-  {
-    "messages": [
-      {
-        "content": "What is this?",
-        "role": "user"
-      },
-      {
-        "content": "A man is giving a speech.",
-        "role": "assistant"
-      }
-    ],
-    "images": [
-      "mllm_demo_data/3.jpg"
-    ]
-  },
-  {
-    "messages": [
-      {
-        "content": "对照片进行简明扼要的概括。",
-        "role": "user"
-      },
-      {
-        "content": "两个足球运动员在场上",
-        "role": "assistant"
-      }
-    ],
-    "images": [
-      "mllm_demo_data/1.jpg"
-    ]
-  },
-  {
-    "messages": [
-      {
-        "content": "为图片写一个简短但内容丰富的摘要。",
-        "role": "user"
-      },
-      {
-        "content": "一个足球运动员在跪地滑行庆祝",
-        "role": "assistant"
-      }
-    ],
-    "images": [
-      "mllm_demo_data/2.jpg"
-    ]
-  },
-  {
-    "messages": [
-      {
-        "content": "这是什么？",
-        "role": "user"
-      },
-      {
-        "content": "一个男人在演讲",
-        "role": "assistant"
-      }
-    ],
-    "images": [
-      "mllm_demo_data/3.jpg"
-    ]
-  }
-]
\ No newline at end of file

From a72387666319e2e5c7fd62079d1b0c9f96d8d0ad Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 27 May 2024 20:23:24 +0800
Subject: [PATCH 006/162] support Aya23

Former-commit-id: 071935b90006e2c79e39bb9ee0c5d48c6c910501
---
 src/llamafactory/extras/constants.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 087612fc..32ca5387 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -86,6 +86,19 @@ def register_model_group(
         VISION_MODELS.add(prefix)
 
 
+register_model_group(
+    models={
+        "Aya-23-8B-Chat": {
+            DownloadSource.DEFAULT: "CohereForAI/aya-23-8B",
+        },
+        "Aya-23-35B-Chat": {
+            DownloadSource.DEFAULT: "CohereForAI/aya-23-35B",
+        },
+    },
+    template="cohere",
+)
+
+
 register_model_group(
     models={
         "Baichuan-7B-Base": {

From a3dd6f887c7c268c09265a3c8d9194f0c5ed89b0 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 27 May 2024 20:37:57 +0800
Subject: [PATCH 007/162] fix full/freeze tuning for mllm

Former-commit-id: df5860ddb593d5b82163a585d12160b41dbce0f3
---
 data/dataset_info.json                       | 28 +++++------
 src/llamafactory/hparams/finetuning_args.py  | 16 ++++++-
 src/llamafactory/hparams/model_args.py       |  4 --
 src/llamafactory/model/adapter.py            | 50 +++++++++++++-------
 src/llamafactory/model/utils/misc.py         | 30 +++++-------
 src/llamafactory/model/utils/quantization.py |  2 +
 src/llamafactory/model/utils/visual.py       |  7 +--
 7 files changed, 76 insertions(+), 61 deletions(-)

diff --git a/data/dataset_info.json b/data/dataset_info.json
index 0a148431..2d9b0c83 100644
--- a/data/dataset_info.json
+++ b/data/dataset_info.json
@@ -38,20 +38,6 @@
       "assistant_tag": "assistant"
     }
   },
-  "mllm_pt_demo": {
-    "hf_hub_url": "BUAADreamer/mllm_pt_demo",
-    "formatting": "sharegpt",
-    "columns": {
-      "messages": "messages",
-      "images": "images"
-    },
-    "tags": {
-      "role_tag": "role",
-      "content_tag": "content",
-      "user_tag": "user",
-      "assistant_tag": "assistant"
-    }
-  },
   "alpaca_en": {
     "hf_hub_url": "llamafactory/alpaca_en",
     "ms_hub_url": "llamafactory/alpaca_en"
@@ -322,6 +308,20 @@
       "assistant_tag": "assistant"
     }
   },
+  "mllm_pt_demo": {
+    "hf_hub_url": "BUAADreamer/mllm_pt_demo",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "messages",
+      "images": "images"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "user",
+      "assistant_tag": "assistant"
+    }
+  },
   "oasst_de": {
     "hf_hub_url": "mayflowergmbh/oasst_de"
   },
diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py
index 05b246ae..b9322f18 100644
--- a/src/llamafactory/hparams/finetuning_args.py
+++ b/src/llamafactory/hparams/finetuning_args.py
@@ -311,6 +311,14 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA
         default=False,
         metadata={"help": "Whether or not to make only the parameters in the expanded blocks trainable."},
     )
+    freeze_vision_tower: bool = field(
+        default=True,
+        metadata={"help": "Whether ot not to freeze vision tower in MLLM training."},
+    )
+    train_mm_proj_only: bool = field(
+        default=False,
+        metadata={"help": "Whether or not to train the multimodal projector for MLLM only."},
+    )
     plot_loss: bool = field(
         default=False,
         metadata={"help": "Whether or not to save the training loss curves."},
@@ -328,6 +336,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA
         self.lora_target = split_arg(self.lora_target)
         self.additional_target = split_arg(self.additional_target)
         self.galore_target = split_arg(self.galore_target)
+        self.freeze_vision_tower = self.freeze_vision_tower or self.train_mm_proj_only
 
         assert self.finetuning_type in ["lora", "freeze", "full"], "Invalid fine-tuning method."
         assert self.ref_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization."
@@ -345,7 +354,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA
             raise ValueError("`dpo_label_smoothing` is only valid for sigmoid loss function.")
 
         if self.use_llama_pro and self.finetuning_type == "full":
-            raise ValueError("`use_llama_pro` is only valid for the Freeze or LoRA training.")
+            raise ValueError("`use_llama_pro` is only valid for Freeze or LoRA training.")
 
         if self.finetuning_type == "lora" and (self.use_galore or self.use_badam):
             raise ValueError("Cannot use LoRA with GaLore or BAdam together.")
@@ -354,4 +363,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA
             raise ValueError("Cannot use GaLore with BAdam together.")
 
         if self.loraplus_lr_ratio is not None and self.finetuning_type != "lora":
-            raise ValueError("`loraplus_lr_ratio` is only valid for the LoRA training.")
+            raise ValueError("`loraplus_lr_ratio` is only valid for LoRA training.")
+
+        if self.train_mm_proj_only and self.finetuning_type != "full":
+            raise ValueError("`train_mm_proj_only` is only valid for full training.")
diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 781c7e99..650d1c22 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -85,10 +85,6 @@ class ModelArguments:
         default=False,
         metadata={"help": "Whethor or not to use multimodal LLM that accepts visual inputs."},
     )
-    tune_mm_proj: bool = field(
-        default=False,
-        metadata={"help": "Whethor or not only finetune mm_projector for MLLM."},
-    )
     moe_aux_loss_coef: Optional[float] = field(
         default=None,
         metadata={"help": "Coefficient of the auxiliary router loss in mixture-of-experts model."},
diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py
index 015db8a0..a9204ef0 100644
--- a/src/llamafactory/model/adapter.py
+++ b/src/llamafactory/model/adapter.py
@@ -10,7 +10,6 @@ from ..extras.logging import get_logger
 from .utils.misc import find_all_linear_modules, find_expanded_modules
 from .utils.quantization import QuantizationMethod
 from .utils.unsloth import get_unsloth_peft_model, load_unsloth_peft_model
-from .utils.visual import filter_vision_tower_linear
 
 
 if TYPE_CHECKING:
@@ -53,21 +52,33 @@ def init_adapter(
 
     if finetuning_args.finetuning_type == "full" and is_trainable:
         logger.info("Fine-tuning method: Full")
-        if cast_trainable_params_to_fp32:
-            model = model.float()
 
-        if model_args.visual_inputs and hasattr(model, "vision_tower"):  # freeze vision model
-            model.vision_tower.requires_grad_(False)
+        forbidden_modules = set()
+        if model_args.visual_inputs and finetuning_args.freeze_vision_tower:
+            forbidden_modules.add("vision_tower")
 
-        if model_args.visual_inputs and hasattr(model, "language_model") and model_args.tune_mm_proj:  # freeze language model if only tune mm_proj
-            model.language_model.requires_grad_(False)
+        if model_args.visual_inputs and finetuning_args.train_mm_proj_only:
+            forbidden_modules.add("language_model")
+
+        for name, param in model.named_parameters():
+            if not any(forbidden_module in name for forbidden_module in forbidden_modules):
+                if cast_trainable_params_to_fp32:
+                    param.data = param.data.to(torch.float32)
+            else:
+                param.requires_grad_(False)
 
     if finetuning_args.finetuning_type == "freeze" and is_trainable:
         logger.info("Fine-tuning method: Freeze")
+
+        if model_args.visual_inputs:
+            config = model.config.text_config
+        else:
+            config = model.config
+
         num_layers = (
-            getattr(model.config, "num_hidden_layers", None)
-            or getattr(model.config, "num_layers", None)
-            or getattr(model.config, "n_layer", None)
+            getattr(config, "num_hidden_layers", None)
+            or getattr(config, "num_layers", None)
+            or getattr(config, "n_layer", None)
         )
         if not num_layers:
             raise ValueError("Current model does not support freeze tuning.")
@@ -119,16 +130,19 @@ def init_adapter(
 
                 trainable_layers.append(module_name)
 
+        forbidden_modules = set()
+        if model_args.visual_inputs and finetuning_args.freeze_vision_tower:
+            forbidden_modules.add("vision_tower")
+
         for name, param in model.named_parameters():
-            if any(trainable_layer in name for trainable_layer in trainable_layers):
+            if any(trainable_layer in name for trainable_layer in trainable_layers) and not any(
+                forbidden_module in name for forbidden_module in forbidden_modules
+            ):
                 if cast_trainable_params_to_fp32:
                     param.data = param.data.to(torch.float32)
             else:
                 param.requires_grad_(False)
 
-        if model_args.visual_inputs and hasattr(model, "vision_tower"):  # freeze vision model
-            model.vision_tower.requires_grad_(False)
-
         logger.info("Set trainable layers: {}".format(",".join(map(str, trainable_layer_ids))))
 
     if finetuning_args.finetuning_type == "lora":
@@ -177,15 +191,15 @@ def init_adapter(
 
         if is_trainable and adapter_to_resume is None:  # create new lora weights while training
             if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all":
-                target_modules = find_all_linear_modules(model)
+                target_modules = find_all_linear_modules(model, finetuning_args.freeze_vision_tower)
             else:
                 target_modules = finetuning_args.lora_target
 
             if finetuning_args.use_llama_pro:
-                target_modules = find_expanded_modules(model, target_modules, finetuning_args.num_layer_trainable)
+                target_modules = find_expanded_modules(model, target_modules, finetuning_args.freeze_trainable_layers)
 
-            if model_args.visual_inputs:
-                target_modules = filter_vision_tower_linear(target_modules)
+            if model_args.visual_inputs and finetuning_args.freeze_vision_tower:
+                target_modules = "^(?!.*vision_tower).*(?:{}).*".format("|".join(target_modules))
 
             if (
                 finetuning_args.use_dora
diff --git a/src/llamafactory/model/utils/misc.py b/src/llamafactory/model/utils/misc.py
index eca68866..4851bd29 100644
--- a/src/llamafactory/model/utils/misc.py
+++ b/src/llamafactory/model/utils/misc.py
@@ -1,9 +1,6 @@
 from typing import TYPE_CHECKING, List
 
-import torch
-
 from ...extras.logging import get_logger
-from .quantization import QuantizationMethod
 
 
 if TYPE_CHECKING:
@@ -13,29 +10,28 @@ if TYPE_CHECKING:
 logger = get_logger(__name__)
 
 
-def find_all_linear_modules(model: "PreTrainedModel") -> List[str]:
+def find_all_linear_modules(model: "PreTrainedModel", freeze_vision_tower: bool) -> List[str]:
     r"""
     Finds all available modules to apply lora or galore.
     """
-    quantization_method = getattr(model, "quantization_method", None)
-    if quantization_method is None:
-        linear_cls = torch.nn.Linear
-    elif quantization_method == QuantizationMethod.BITS_AND_BYTES:
-        import bitsandbytes as bnb
+    forbidden_modules = {"lm_head"}
 
-        linear_cls = bnb.nn.Linear4bit if getattr(model, "is_loaded_in_4bit", False) else bnb.nn.Linear8bitLt
-    else:
-        raise ValueError("Finding linear modules for {} models is not supported.".format(quantization_method))
-
-    output_layer_names = ["lm_head"]
     if model.config.model_type == "chatglm":
-        output_layer_names.append("output_layer")
+        forbidden_modules.add("output_layer")
     elif model.config.model_type == "internlm2":
-        output_layer_names.append("output")
+        forbidden_modules.add("output")
+    elif model.config.model_type in ["llava", "paligemma"]:
+        forbidden_modules.add("multi_modal_projector")
+
+    if freeze_vision_tower:
+        forbidden_modules.add("vision_tower")
 
     module_names = set()
     for name, module in model.named_modules():
-        if isinstance(module, linear_cls) and not any(output_layer in name for output_layer in output_layer_names):
+        if any(forbidden_module in name for forbidden_module in forbidden_modules):
+            continue
+
+        if "Linear" in module.__class__.__name__ and "Embedding" not in module.__class__.__name__:
             module_names.add(name.split(".")[-1])
 
     logger.info("Found linear modules: {}".format(",".join(module_names)))
diff --git a/src/llamafactory/model/utils/quantization.py b/src/llamafactory/model/utils/quantization.py
index 161ad5aa..02a54f07 100644
--- a/src/llamafactory/model/utils/quantization.py
+++ b/src/llamafactory/model/utils/quantization.py
@@ -35,6 +35,8 @@ class QuantizationMethod(str, Enum):
     AWQ = "awq"
     AQLM = "aqlm"
     QUANTO = "quanto"
+    EETQ = "eetq"
+    HQQ = "hqq"
 
 
 def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments") -> List[str]:
diff --git a/src/llamafactory/model/utils/visual.py b/src/llamafactory/model/utils/visual.py
index a91777ba..c8260b7f 100644
--- a/src/llamafactory/model/utils/visual.py
+++ b/src/llamafactory/model/utils/visual.py
@@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Tuple, List
+from typing import TYPE_CHECKING, Tuple
 
 import torch
 import transformers.models
@@ -82,8 +82,3 @@ def configure_visual_model(config: "PretrainedConfig") -> None:
     if getattr(config, "is_yi_vl_derived_model", None):
         logger.info("Detected Yi-VL model, applying projector patch.")
         transformers.models.llava.modeling_llava.LlavaMultiModalProjector = LlavaMultiModalProjectorForYiVL
-
-
-def filter_vision_tower_linear(target_modules: List[str]) -> str:
-    target_modules = f"^(?!.*vision_tower).*(?:{'|'.join(target_modules)}).*"
-    return target_modules

From db745355bb0aaa5a132ccc52929c26eeddbe86c4 Mon Sep 17 00:00:00 2001
From: Jianbai Ye <jianbaiye@outlook.com>
Date: Mon, 27 May 2024 20:42:08 +0800
Subject: [PATCH 008/162] add openchat-3.6-8B support

Former-commit-id: b66f39d50d896d7597a1506e67ec210b31c9b700
---
 src/llamafactory/data/template.py    | 17 +++++++++++++++++
 src/llamafactory/extras/constants.py |  9 +++++++++
 2 files changed, 26 insertions(+)

diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 979390ce..eba23271 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -756,6 +756,23 @@ _register_template(
     force_system=True,
 )
 
+_register_template(
+    name="openchat-3.6",
+    format_user=StringFormatter(
+        slots=[
+            (
+                "<|start_header_id|>GPT4 Correct User<|end_header_id|>\n\n{{content}}<|eot_id|>"
+                "<|start_header_id|>GPT4 Correct Assistant<|end_header_id|>\n\n"
+            )
+        ]
+    ),
+    format_system=StringFormatter(
+        slots=[{"bos_token"}, "<|start_header_id|>System<|end_header_id|>\n\n{{content}}<|eot_id|>"]
+    ),
+    stop_words=["<|eot_id|>"],
+    replace_eos=True,
+)
+
 
 _register_template(
     name="orion",
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 087612fc..503df641 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -706,6 +706,15 @@ register_model_group(
     template="openchat",
 )
 
+register_model_group(
+    models={
+        "OpenChat3.6-8B-Chat": {
+            DownloadSource.DEFAULT: "openchat/openchat-3.6-8b-20240522",
+        }
+    },
+    template="openchat-3.6",
+)
+
 
 register_model_group(
     models={

From 234b4a4f2e60788e86aaee5f02568eafb8716543 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Mon, 27 May 2024 20:51:26 +0800
Subject: [PATCH 009/162] Update template.py

Former-commit-id: af869e4c48eb426c4078415533f6dab89123a9d8
---
 src/llamafactory/data/template.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index eba23271..3d719e5b 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -761,16 +761,15 @@ _register_template(
     format_user=StringFormatter(
         slots=[
             (
-                "<|start_header_id|>GPT4 Correct User<|end_header_id|>\n\n{{content}}<|eot_id|>"
+                "<|start_header_id|>GPT4 Correct User<|end_header_id|>\n\n{{content}}<|eot_id|>",
                 "<|start_header_id|>GPT4 Correct Assistant<|end_header_id|>\n\n"
             )
         ]
     ),
-    format_system=StringFormatter(
-        slots=[{"bos_token"}, "<|start_header_id|>System<|end_header_id|>\n\n{{content}}<|eot_id|>"]
-    ),
+    format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
     stop_words=["<|eot_id|>"],
     replace_eos=True,
+    force_system=True,
 )
 
 
From f734d04f41e3453580dac5183511b3d4974cc271 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Mon, 27 May 2024 20:51:56 +0800
Subject: [PATCH 010/162] Update template.py

Former-commit-id: f4dabce0a71c9978e051e70886941b64b928ffe2
---
 src/llamafactory/data/template.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 3d719e5b..5a12524d 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -761,7 +761,7 @@ _register_template(
     format_user=StringFormatter(
         slots=[
             (
-                "<|start_header_id|>GPT4 Correct User<|end_header_id|>\n\n{{content}}<|eot_id|>",
+                "<|start_header_id|>GPT4 Correct User<|end_header_id|>\n\n{{content}}<|eot_id|>"
                 "<|start_header_id|>GPT4 Correct Assistant<|end_header_id|>\n\n"
             )
         ]

From 7e9372bb2f685be2a68253356cc97a1d31009337 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 27 May 2024 20:54:26 +0800
Subject: [PATCH 011/162] tiny fix

Former-commit-id: 4c47b3dcef9e400a1c35fce1ad53619a0a86fe81
---
 src/llamafactory/data/template.py    | 3 +--
 src/llamafactory/extras/constants.py | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 5a12524d..8332b0cb 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -742,7 +742,6 @@ _register_template(
 _register_template(
     name="olmo",
     format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>\n"]),
-    format_assistant=StringFormatter(slots=["{{content}}", {"eos_token"}]),
     format_system=StringFormatter(slots=[{"eos_token"}, "{{content}}"]),
     force_system=True,
 )
@@ -751,11 +750,11 @@ _register_template(
 _register_template(
     name="openchat",
     format_user=StringFormatter(slots=["GPT4 Correct User: {{content}}", {"eos_token"}, "GPT4 Correct Assistant:"]),
-    format_assistant=StringFormatter(slots=["{{content}}", {"eos_token"}]),
     format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
     force_system=True,
 )
 
+
 _register_template(
     name="openchat-3.6",
     format_user=StringFormatter(
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 3cb221bf..d1fcd4f6 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -719,6 +719,7 @@ register_model_group(
     template="openchat",
 )
 
+
 register_model_group(
     models={
         "OpenChat3.6-8B-Chat": {

From 7aae43aa0e977ce99cd1f0829fa40a767b40a95e Mon Sep 17 00:00:00 2001
From: Yimi81 <1548222878@qq.com>
Date: Mon, 27 May 2024 13:11:25 +0000
Subject: [PATCH 012/162] fix yi template

Former-commit-id: b3669c8989c3adda305416245e32e9e5a3b7caac
---
 src/llamafactory/data/template.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 8332b0cb..f13b303b 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -872,6 +872,7 @@ _register_template(
 _register_template(
     name="yi",
     format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
     format_separator=EmptyFormatter(slots=["\n"]),
     stop_words=["<|im_end|>"],
     replace_eos=True,

From 60f6fc39351867add400d767ca9886f7dc487bcb Mon Sep 17 00:00:00 2001
From: MengqingCao <cmq0113@163.com>
Date: Tue, 28 May 2024 01:33:54 +0000
Subject: [PATCH 013/162] add Ascend npu doc and dependency

Former-commit-id: 803d9f142a294f8c1e0b4e2046c214b0857ccfd6
---
 README.md    | 20 +++++++++++++++++++-
 README_zh.md | 20 +++++++++++++++++++-
 setup.py     |  1 +
 3 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index fcc96882..ca70110f 100644
--- a/README.md
+++ b/README.md
@@ -347,7 +347,25 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec
 
 Join [NPU user group](assets/wechat_npu.jpg).
 
-To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[torch-npu](https://gitee.com/ascend/pytorch)** library and the **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**.
+Use `pip install -e .[torch_npu]` to install LLaMA-Factory with **[torch-npu](https://gitee.com/ascend/pytorch)** library.
+
+To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. You can follow chapter **[install CANN](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)** in the installation tutorial to install CANN Toolkit and the kernels, or use the fast installation as following:
+
+```bash
+# replace the url according to your choice
+# install CANN Toolkit
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run
+chmod +x Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run
+./Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install
+
+# install CANN Kernels
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C18B800TP015/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
+chmod +x Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
+./Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install
+
+# set env variables
+source /usr/local/Ascend/ascend-toolkit/set_env.sh
+```
 
 | Requirement  | Minimum | Recommend |
 | ------------ | ------- | --------- |
diff --git a/README_zh.md b/README_zh.md
index 2e0b4f34..4f659588 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -347,7 +347,25 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
 
 加入 [NPU 用户群](assets/wechat_npu.jpg)。
 
-如果使用昇腾 NPU 设备进行（分布式）训练或推理，需要安装 **[torch-npu](https://gitee.com/ascend/pytorch)** 库和 **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。
+使用 `pip install -e .[torch_npu]` 来安装带有 **[torch-npu](https://gitee.com/ascend/pytorch)** 的 LLaMA-Factory。
+
+如果使用昇腾 NPU 设备进行（分布式）训练或推理，需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。可以参考安装教程中的**[安装 CANN](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)**章节来进行安装，或使用以下快速安装指令：
+
+```bash
+# 请替换 URL 为你需要的 CANN 版本对应的 URL
+# 安装 CANN Toolkit
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run
+chmod +x Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run
+./Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install
+
+# 安装 CANN Kernels
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C18B800TP015/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
+chmod +x Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
+./Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install
+
+# 设置环境变量
+source /usr/local/Ascend/ascend-toolkit/set_env.sh
+```
 
 | 依赖项       | 至少     | 推荐      |
 | ------------ | ------- | --------- |
diff --git a/setup.py b/setup.py
index 4d948450..87620c37 100644
--- a/setup.py
+++ b/setup.py
@@ -33,6 +33,7 @@ extra_require = {
     "qwen": ["tiktoken", "transformers_stream_generator"],
     "modelscope": ["modelscope"],
     "quality": ["ruff"],
+    "torch_npu": ["torch==2.2.0", "torch_npu==2.2.0", "decorator"]
 }
 
 
From f867958f91fef88ddd91dcf275d83f204497a84f Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 28 May 2024 13:44:22 +0800
Subject: [PATCH 014/162] fix #3931

Former-commit-id: 47e0072416b545d9718af4fa266a83f747b9a4f7
---
 .dockerignore | 2 ++
 README.md     | 2 +-
 README_zh.md  | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index ce67d58a..2ac0e11d 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -4,6 +4,8 @@
 .venv
 cache
 data
+hf_cache
+output
 examples
 .dockerignore
 .gitattributes
diff --git a/README.md b/README.md
index 78312e07..974b30d0 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@ Choose your path:
 ## Features
 
 - **Various models**: LLaMA, LLaVA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc.
-- **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO, KTO and ORPO.
+- **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO, KTO, ORPO, etc.
 - **Scalable resources**: 32-bit full-tuning, 16-bit freeze-tuning, 16-bit LoRA and 2/4/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8.
 - **Advanced algorithms**: GaLore, BAdam, DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ and Agent tuning.
 - **Practical tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune and rsLoRA.
diff --git a/README_zh.md b/README_zh.md
index 5acf3dd1..7106bbab 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -45,7 +45,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 ## 项目特色
 
 - **多种模型**：LLaMA、LLaVA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。
-- **集成方法**：（增量）预训练、（多模态）指令监督微调、奖励模型训练、PPO 训练、DPO 训练、KTO 训练和 ORPO 训练。
+- **集成方法**：（增量）预训练、（多模态）指令监督微调、奖励模型训练、PPO 训练、DPO 训练、KTO 训练、ORPO 训练等等。
 - **多种精度**：32 比特全参数微调、16 比特冻结微调、16 比特 LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 QLoRA 微调。
 - **先进算法**：GaLore、BAdam、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ 和 Agent 微调。
 - **实用技巧**：FlashAttention-2、Unsloth、RoPE scaling、NEFTune 和 rsLoRA。

From dbd4ba35c4645f53da0f1da7474f889ef29e2b39 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 28 May 2024 16:19:56 +0800
Subject: [PATCH 015/162] update readme

Former-commit-id: 30ef8ee1e86136f38f105b67f70c417d20552f41
---
 README.md    | 15 ++++++++++++---
 README_zh.md | 15 ++++++++++++---
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 974b30d0..9a4bd934 100644
--- a/README.md
+++ b/README.md
@@ -174,9 +174,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [Yuan](https://huggingface.co/IEITYuan)                  | 2B/51B/102B                      | q_proj,v_proj     | yuan      |
 
 > [!NOTE]
-> **Default module** is used for the `--lora_target` argument, you can use `--lora_target all` to specify all the available modules for better convergence.
+> **Default module** is used for the `lora_target` argument, you can use `lora_target: all` to specify all the available modules for better convergence.
 >
-> For the "base" models, the `--template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models.
+> For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models.
 >
 > Remember to use the **SAME** template in training and inference.
 
@@ -448,7 +448,16 @@ If you have trouble with downloading models and datasets from Hugging Face, you
 export USE_MODELSCOPE_HUB=1 # `set USE_MODELSCOPE_HUB=1` for Windows
 ```
 
-Train the model by specifying a model ID of the ModelScope Hub as the `--model_name_or_path`. You can find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models), e.g., `LLM-Research/Meta-Llama-3-8B-Instruct`.
+Train the model by specifying a model ID of the ModelScope Hub as the `model_name_or_path`. You can find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models), e.g., `LLM-Research/Meta-Llama-3-8B-Instruct`.
+
+### Use W&B Logging
+
+To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments.
+
+```yaml
+report_to: wandb
+run_name: test_run # optional
+```
 
 ## Projects using LLaMA Factory
 
diff --git a/README_zh.md b/README_zh.md
index 7106bbab..73426a7f 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -174,9 +174,9 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 | [Yuan](https://huggingface.co/IEITYuan)                  | 2B/51B/102B                      | q_proj,v_proj     | yuan      |
 
 > [!NOTE]
-> **默认模块**应作为 `--lora_target` 参数的默认值，可使用 `--lora_target all` 参数指定全部模块以取得更好的效果。
+> **默认模块**应作为 `lora_target` 参数的默认值，可使用 `lora_target: all` 参数指定全部模块以取得更好的效果。
 >
-> 对于所有“基座”（Base）模型，`--template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”（Instruct/Chat）模型请务必使用**对应的模板**。
+> 对于所有“基座”（Base）模型，`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”（Instruct/Chat）模型请务必使用**对应的模板**。
 >
 > 请务必在训练和推理时使用**完全一致**的模板。
 
@@ -446,7 +446,16 @@ CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/l
 export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1`
 ```
 
-将 `--model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔搭社区](https://modelscope.cn/models)查看所有可用的模型，例如 `LLM-Research/Meta-Llama-3-8B-Instruct`。
+将 `model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔搭社区](https://modelscope.cn/models)查看所有可用的模型，例如 `LLM-Research/Meta-Llama-3-8B-Instruct`。
+
+### 使用 W&B 面板
+
+若要使用 [Weights & Biases](https://wandb.ai) 记录实验数据，请添加下面的参数。
+
+```yaml
+report_to: wandb
+run_name: test_run # 可选
+```
 
 ## 使用了 LLaMA Factory 的项目
 

From 1d5f6960060f7d8d4433e32e02312e94c15412f7 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 28 May 2024 16:41:34 +0800
Subject: [PATCH 016/162] update readme

Former-commit-id: e2c7de1b5147801b301cfc5da0e2866273da18f5
---
 README.md    | 3 ++-
 README_zh.md | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 9a4bd934..b9225a94 100644
--- a/README.md
+++ b/README.md
@@ -8,9 +8,10 @@
 [![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls)
 [![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK)
 [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
+[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)
+[![Open in DSW](https://gallery.pai-ml.com/assets/open-in-dsw.svg)](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)
 [![Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue)](https://huggingface.co/spaces/hiyouga/LLaMA-Board)
 [![Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
-[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)
 
 [![GitHub Tread](https://trendshift.io/api/badge/repositories/4535)](https://trendshift.io/repositories/4535)
 
diff --git a/README_zh.md b/README_zh.md
index 73426a7f..0f47b88e 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -8,9 +8,10 @@
 [![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls)
 [![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK)
 [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
+[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing)
+[![Open in DSW](https://gallery.pai-ml.com/assets/open-in-dsw.svg)](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)
 [![Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue)](https://huggingface.co/spaces/hiyouga/LLaMA-Board)
 [![Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
-[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing)
 
 [![GitHub Tread](https://trendshift.io/api/badge/repositories/4535)](https://trendshift.io/repositories/4535)
 

From edbc4bdac43fcdcdee0565cb5bc0ef7257bd4544 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 28 May 2024 19:24:22 +0800
Subject: [PATCH 017/162] support DDP in webui

Former-commit-id: d059262ff8dc857f597d2657546ec625726a664a
---
 examples/README.md                     | 16 +++++++-----
 examples/README_zh.md                  | 16 +++++++-----
 examples/accelerate/master_config.yaml | 18 -------------
 examples/accelerate/single_config.yaml | 16 ------------
 examples/accelerate/slave_config.yaml  | 18 -------------
 examples/full_multi_gpu/multi_node.sh  | 15 -----------
 examples/full_multi_gpu/predict.sh     |  5 ----
 examples/full_multi_gpu/single_node.sh | 15 -----------
 examples/lora_multi_gpu/ds_zero3.sh    | 15 -----------
 examples/lora_multi_gpu/multi_node.sh  |  6 -----
 examples/lora_multi_gpu/single_node.sh |  5 ----
 examples/lora_multi_npu/ds_zero0.sh    | 15 -----------
 src/llamafactory/cli.py                | 36 +++++++++++++++++++++++++-
 src/llamafactory/extras/misc.py        | 17 +++++++++---
 src/llamafactory/hparams/parser.py     |  5 ----
 src/llamafactory/launcher.py           |  9 +++++++
 src/llamafactory/webui/locales.py      |  5 ----
 src/llamafactory/webui/runner.py       |  9 ++-----
 src/llamafactory/webui/utils.py        |  3 +--
 19 files changed, 78 insertions(+), 166 deletions(-)
 delete mode 100644 examples/accelerate/master_config.yaml
 delete mode 100644 examples/accelerate/single_config.yaml
 delete mode 100644 examples/accelerate/slave_config.yaml
 delete mode 100644 examples/full_multi_gpu/multi_node.sh
 delete mode 100644 examples/full_multi_gpu/predict.sh
 delete mode 100644 examples/full_multi_gpu/single_node.sh
 delete mode 100644 examples/lora_multi_gpu/ds_zero3.sh
 delete mode 100644 examples/lora_multi_gpu/multi_node.sh
 delete mode 100644 examples/lora_multi_gpu/single_node.sh
 delete mode 100644 examples/lora_multi_npu/ds_zero0.sh
 create mode 100644 src/llamafactory/launcher.py

diff --git a/examples/README.md b/examples/README.md
index 9c6d5fb0..727b27c8 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -110,19 +110,20 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_l
 #### Supervised Fine-Tuning with Accelerate on Single Node
 
 ```bash
-bash examples/lora_multi_gpu/single_node.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
 ```
 
 #### Supervised Fine-Tuning with Accelerate on Multiple Nodes
 
 ```bash
-bash examples/lora_multi_gpu/multi_node.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
+CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
 ```
 
 #### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding)
 
 ```bash
-bash examples/lora_multi_gpu/ds_zero3.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
 ```
 
 ### LoRA Fine-Tuning on Multiple NPUs
@@ -130,7 +131,7 @@ bash examples/lora_multi_gpu/ds_zero3.sh
 #### Supervised Fine-Tuning with DeepSpeed ZeRO-0
 
 ```bash
-bash examples/lora_multi_npu/ds_zero0.sh
+ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml
 ```
 
 ### Full-Parameter Fine-Tuning on Multiple GPUs
@@ -138,19 +139,20 @@ bash examples/lora_multi_npu/ds_zero0.sh
 #### Supervised Fine-Tuning with Accelerate on Single Node
 
 ```bash
-bash examples/full_multi_gpu/single_node.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
 ```
 
 #### Supervised Fine-Tuning with Accelerate on Multiple Nodes
 
 ```bash
-bash examples/full_multi_gpu/multi_node.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
+CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
 ```
 
 #### Batch Predicting and Computing BLEU and ROUGE Scores
 
 ```bash
-bash examples/full_multi_gpu/predict.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_predict.yaml
 ```
 
 ### Merging LoRA Adapters and Quantization
diff --git a/examples/README_zh.md b/examples/README_zh.md
index 0ff33398..6974faa9 100644
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@@ -110,19 +110,20 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_l
 #### 使用 Accelerate 进行单节点训练
 
 ```bash
-bash examples/lora_multi_gpu/single_node.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
 ```
 
 #### 使用 Accelerate 进行多节点训练
 
 ```bash
-bash examples/lora_multi_gpu/multi_node.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
+CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
 ```
 
 #### 使用 DeepSpeed ZeRO-3 平均分配显存
 
 ```bash
-bash examples/lora_multi_gpu/ds_zero3.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
 ```
 
 ### 多 NPU LoRA 微调
@@ -130,7 +131,7 @@ bash examples/lora_multi_gpu/ds_zero3.sh
 #### 使用 DeepSpeed ZeRO-0 训练
 
 ```bash
-bash examples/lora_multi_npu/ds_zero0.sh
+ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml
 ```
 
 ### 多 GPU 全参数微调
@@ -138,19 +139,20 @@ bash examples/lora_multi_npu/ds_zero0.sh
 #### 使用 DeepSpeed 进行单节点训练
 
 ```bash
-bash examples/full_multi_gpu/single_node.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
 ```
 
 #### 使用 DeepSpeed 进行多节点训练
 
 ```bash
-bash examples/full_multi_gpu/multi_node.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
+CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
 ```
 
 #### 批量预测并计算 BLEU 和 ROUGE 分数
 
 ```bash
-bash examples/full_multi_gpu/predict.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_predict.yaml
 ```
 
 ### 合并 LoRA 适配器与模型量化
diff --git a/examples/accelerate/master_config.yaml b/examples/accelerate/master_config.yaml
deleted file mode 100644
index a1018313..00000000
--- a/examples/accelerate/master_config.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-compute_environment: LOCAL_MACHINE
-debug: false
-distributed_type: MULTI_GPU
-downcast_bf16: 'no'
-gpu_ids: all
-machine_rank: 0
-main_process_ip: 192.168.0.1
-main_process_port: 29555
-main_training_function: main
-mixed_precision: fp16
-num_machines: 2 # the number of nodes
-num_processes: 8 # the number of GPUs in all nodes
-rdzv_backend: static
-same_network: true
-tpu_env: []
-tpu_use_cluster: false
-tpu_use_sudo: false
-use_cpu: false
diff --git a/examples/accelerate/single_config.yaml b/examples/accelerate/single_config.yaml
deleted file mode 100644
index 97f8c633..00000000
--- a/examples/accelerate/single_config.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-compute_environment: LOCAL_MACHINE
-debug: false
-distributed_type: MULTI_GPU
-downcast_bf16: 'no'
-gpu_ids: all
-machine_rank: 0
-main_training_function: main
-mixed_precision: fp16
-num_machines: 1 # the number of nodes
-num_processes: 4 # the number of GPUs in all nodes
-rdzv_backend: static
-same_network: true
-tpu_env: []
-tpu_use_cluster: false
-tpu_use_sudo: false
-use_cpu: false
diff --git a/examples/accelerate/slave_config.yaml b/examples/accelerate/slave_config.yaml
deleted file mode 100644
index e610fd0e..00000000
--- a/examples/accelerate/slave_config.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-compute_environment: LOCAL_MACHINE
-debug: false
-distributed_type: MULTI_GPU
-downcast_bf16: 'no'
-gpu_ids: all
-machine_rank: 1
-main_process_ip: 192.168.0.1
-main_process_port: 29555
-main_training_function: main
-mixed_precision: fp16
-num_machines: 2 # the number of nodes
-num_processes: 8 # the number of GPUs in all nodes
-rdzv_backend: static
-same_network: true
-tpu_env: []
-tpu_use_cluster: false
-tpu_use_sudo: false
-use_cpu: false
diff --git a/examples/full_multi_gpu/multi_node.sh b/examples/full_multi_gpu/multi_node.sh
deleted file mode 100644
index 34c038d4..00000000
--- a/examples/full_multi_gpu/multi_node.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-NPROC_PER_NODE=4
-NNODES=2
-RANK=0
-MASTER_ADDR=192.168.0.1
-MASTER_PORT=29500
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun \
-    --nproc_per_node $NPROC_PER_NODE \
-    --nnodes $NNODES \
-    --node_rank $RANK \
-    --master_addr $MASTER_ADDR \
-    --master_port $MASTER_PORT \
-    src/train.py examples/full_multi_gpu/llama3_full_sft.yaml
diff --git a/examples/full_multi_gpu/predict.sh b/examples/full_multi_gpu/predict.sh
deleted file mode 100644
index 2445f444..00000000
--- a/examples/full_multi_gpu/predict.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
-    --config_file examples/accelerate/single_config.yaml \
-    src/train.py examples/full_multi_gpu/llama3_full_predict.yaml
diff --git a/examples/full_multi_gpu/single_node.sh b/examples/full_multi_gpu/single_node.sh
deleted file mode 100644
index ac29c097..00000000
--- a/examples/full_multi_gpu/single_node.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-NPROC_PER_NODE=4
-NNODES=1
-RANK=0
-MASTER_ADDR=127.0.0.1
-MASTER_PORT=29500
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun \
-    --nproc_per_node $NPROC_PER_NODE \
-    --nnodes $NNODES \
-    --node_rank $RANK \
-    --master_addr $MASTER_ADDR \
-    --master_port $MASTER_PORT \
-    src/train.py examples/full_multi_gpu/llama3_full_sft.yaml
diff --git a/examples/lora_multi_gpu/ds_zero3.sh b/examples/lora_multi_gpu/ds_zero3.sh
deleted file mode 100644
index 90ea00dd..00000000
--- a/examples/lora_multi_gpu/ds_zero3.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-NPROC_PER_NODE=4
-NNODES=1
-RANK=0
-MASTER_ADDR=127.0.0.1
-MASTER_PORT=29500
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun \
-    --nproc_per_node $NPROC_PER_NODE \
-    --nnodes $NNODES \
-    --node_rank $RANK \
-    --master_addr $MASTER_ADDR \
-    --master_port $MASTER_PORT \
-    src/train.py examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
diff --git a/examples/lora_multi_gpu/multi_node.sh b/examples/lora_multi_gpu/multi_node.sh
deleted file mode 100644
index 401fac5f..00000000
--- a/examples/lora_multi_gpu/multi_node.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-# also launch it on slave machine using slave_config.yaml
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
-    --config_file examples/accelerate/master_config.yaml \
-    src/train.py examples/lora_multi_gpu/llama3_lora_sft.yaml
diff --git a/examples/lora_multi_gpu/single_node.sh b/examples/lora_multi_gpu/single_node.sh
deleted file mode 100644
index 885a0e8c..00000000
--- a/examples/lora_multi_gpu/single_node.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
-    --config_file examples/accelerate/single_config.yaml \
-    src/train.py examples/lora_multi_gpu/llama3_lora_sft.yaml
diff --git a/examples/lora_multi_npu/ds_zero0.sh b/examples/lora_multi_npu/ds_zero0.sh
deleted file mode 100644
index 4ffaa1b0..00000000
--- a/examples/lora_multi_npu/ds_zero0.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-NPROC_PER_NODE=4
-NNODES=1
-RANK=0
-MASTER_ADDR=127.0.0.1
-MASTER_PORT=29500
-
-ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 torchrun \
-    --nproc_per_node $NPROC_PER_NODE \
-    --nnodes $NNODES \
-    --node_rank $RANK \
-    --master_addr $MASTER_ADDR \
-    --master_port $MASTER_PORT \
-    src/train.py examples/lora_multi_npu/llama3_lora_sft_ds.yaml
diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py
index f9b63ded..26975f3c 100644
--- a/src/llamafactory/cli.py
+++ b/src/llamafactory/cli.py
@@ -1,9 +1,16 @@
+import os
+import random
+import subprocess
 import sys
 from enum import Enum, unique
 
+from llamafactory import launcher
+
 from .api.app import run_api
 from .chat.chat_model import run_chat
 from .eval.evaluator import run_eval
+from .extras.logging import get_logger
+from .extras.misc import get_device_count
 from .train.tuner import export_model, run_exp
 from .webui.interface import run_web_demo, run_web_ui
 
@@ -37,6 +44,8 @@ WELCOME = (
     + "-" * 58
 )
 
+logger = get_logger(__name__)
+
 
 @unique
 class Command(str, Enum):
@@ -62,7 +71,32 @@ def main():
     elif command == Command.EXPORT:
         export_model()
     elif command == Command.TRAIN:
-        run_exp()
+        if get_device_count() > 1:
+            nnodes = os.environ.get("NNODES", "1")
+            node_rank = os.environ.get("RANK", "0")
+            nproc_per_node = os.environ.get("NPROC_PER_NODE", str(get_device_count()))
+            master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
+            master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
+            logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))
+            subprocess.run(
+                [
+                    "torchrun",
+                    "--nnodes",
+                    nnodes,
+                    "--node_rank",
+                    node_rank,
+                    "--nproc_per_node",
+                    nproc_per_node,
+                    "--master_addr",
+                    master_addr,
+                    "--master_port",
+                    master_port,
+                    launcher.__file__,
+                    *sys.argv[1:],
+                ]
+            )
+        else:
+            run_exp()
     elif command == Command.WEBDEMO:
         run_web_demo()
     elif command == Command.WEBUI:
diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py
index 0dc07d28..2c7f170c 100644
--- a/src/llamafactory/extras/misc.py
+++ b/src/llamafactory/extras/misc.py
@@ -165,13 +165,15 @@ def get_current_device() -> torch.device:
 
 def get_device_count() -> int:
     r"""
-    Gets the number of available GPU devices.
+    Gets the number of available GPU or NPU devices.
     """
-    if not torch.cuda.is_available():
+    if is_torch_npu_available():
+        return torch.npu.device_count()
+    elif is_torch_cuda_available():
+        return torch.cuda.device_count()
+    else:
         return 0
 
-    return torch.cuda.device_count()
-
 
 def get_logits_processor() -> "LogitsProcessorList":
     r"""
@@ -194,6 +196,13 @@ def infer_optim_dtype(model_dtype: torch.dtype) -> torch.dtype:
         return torch.float32
 
 
+def is_gpu_or_npu_available() -> bool:
+    r"""
+    Checks if the GPU or NPU is available.
+    """
+    return is_torch_npu_available() or is_torch_cuda_available()
+
+
 def has_tokenized_data(path: os.PathLike) -> bool:
     r"""
     Checks if the path has a tokenized dataset.
diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py
index 6311297e..c6869e4c 100644
--- a/src/llamafactory/hparams/parser.py
+++ b/src/llamafactory/hparams/parser.py
@@ -10,7 +10,6 @@ from transformers.trainer_utils import get_last_checkpoint
 from transformers.utils import is_torch_bf16_gpu_available
 from transformers.utils.versions import require_version
 
-from ..extras.constants import TRAINER_CONFIG
 from ..extras.logging import get_logger
 from ..extras.misc import check_dependencies, get_current_device
 from .data_args import DataArguments
@@ -252,10 +251,6 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
         and can_resume_from_checkpoint
     ):
         last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        files = os.listdir(training_args.output_dir)
-        if last_checkpoint is None and len(files) > 0 and (len(files) != 1 or files[0] != TRAINER_CONFIG):
-            raise ValueError("Output directory already exists and is not empty. Please set `overwrite_output_dir`.")
-
         if last_checkpoint is not None:
             training_args.resume_from_checkpoint = last_checkpoint
             logger.info(
diff --git a/src/llamafactory/launcher.py b/src/llamafactory/launcher.py
new file mode 100644
index 00000000..de154db9
--- /dev/null
+++ b/src/llamafactory/launcher.py
@@ -0,0 +1,9 @@
+from llamafactory.train.tuner import run_exp
+
+
+def launch():
+    run_exp()
+
+
+if __name__ == "__main__":
+    launch()
diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py
index bd4a4205..570a8b42 100644
--- a/src/llamafactory/webui/locales.py
+++ b/src/llamafactory/webui/locales.py
@@ -1469,11 +1469,6 @@ ALERTS = {
         "ru": "Обучение недоступно в демонстрационном режиме, сначала скопируйте пространство в частное.",
         "zh": "展示模式不支持训练，请先复制到私人空间。",
     },
-    "err_device_count": {
-        "en": "Multiple GPUs are not supported yet.",
-        "ru": "Пока не поддерживается множественные GPU.",
-        "zh": "尚不支持多 GPU 训练。",
-    },
     "err_tool_name": {
         "en": "Tool name not found.",
         "ru": "Имя инструмента не найдено.",
diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index 57595a08..1310b999 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -6,10 +6,9 @@ from typing import TYPE_CHECKING, Any, Dict, Generator, Optional
 
 import psutil
 from transformers.trainer import TRAINING_ARGS_NAME
-from transformers.utils import is_torch_cuda_available
 
 from ..extras.constants import TRAINING_STAGES
-from ..extras.misc import get_device_count, torch_gc
+from ..extras.misc import is_gpu_or_npu_available, torch_gc
 from ..extras.packages import is_gradio_available
 from .common import get_module, get_save_dir, load_args, load_config, save_args
 from .locales import ALERTS
@@ -64,16 +63,13 @@ class Runner:
         if not from_preview and self.demo_mode:
             return ALERTS["err_demo"][lang]
 
-        if not from_preview and get_device_count() > 1:
-            return ALERTS["err_device_count"][lang]
-
         if do_train:
             stage = TRAINING_STAGES[get("train.training_stage")]
             reward_model = get("train.reward_model")
             if stage == "ppo" and not reward_model:
                 return ALERTS["err_no_reward_model"][lang]
 
-        if not from_preview and not is_torch_cuda_available():
+        if not from_preview and not is_gpu_or_npu_available():
             gr.Warning(ALERTS["warn_no_cuda"][lang])
 
         return ""
@@ -273,7 +269,6 @@ class Runner:
             self.do_train, self.running_data = do_train, data
             args = self._parse_train_args(data) if do_train else self._parse_eval_args(data)
             env = deepcopy(os.environ)
-            env["CUDA_VISIBLE_DEVICES"] = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
             env["LLAMABOARD_ENABLED"] = "1"
             self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True)
             yield from self.monitor()
diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py
index 3d34f0d2..ceeb9352 100644
--- a/src/llamafactory/webui/utils.py
+++ b/src/llamafactory/webui/utils.py
@@ -42,8 +42,7 @@ def clean_cmd(args: Dict[str, Any]) -> Dict[str, Any]:
 
 
 def gen_cmd(args: Dict[str, Any]) -> str:
-    current_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
-    cmd_lines = ["CUDA_VISIBLE_DEVICES={} llamafactory-cli train ".format(current_devices)]
+    cmd_lines = ["llamafactory-cli train "]
     for k, v in clean_cmd(args).items():
         cmd_lines.append("    --{} {} ".format(k, str(v)))
 

From 89776097bc9d8bd626d450c1a22c3e0c51e4dacb Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 28 May 2024 19:35:52 +0800
Subject: [PATCH 018/162] update readme

Former-commit-id: bc861f76706df3f643028f1dfc8ec2044b067a08
---
 README.md               | 7 +++----
 README_zh.md            | 5 ++---
 src/llamafactory/cli.py | 3 +--
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index b9225a94..fdc77777 100644
--- a/README.md
+++ b/README.md
@@ -395,9 +395,6 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr
 
 ### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio))
 
-> [!IMPORTANT]
-> LLaMA Board GUI only supports training on a single GPU.
-
 #### Use local environment
 
 ```bash
@@ -451,7 +448,7 @@ export USE_MODELSCOPE_HUB=1 # `set USE_MODELSCOPE_HUB=1` for Windows
 
 Train the model by specifying a model ID of the ModelScope Hub as the `model_name_or_path`. You can find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models), e.g., `LLM-Research/Meta-Llama-3-8B-Instruct`.
 
-### Use W&B Logging
+### Use W&B Logger
 
 To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments.
 
@@ -460,6 +457,8 @@ report_to: wandb
 run_name: test_run # optional
 ```
 
+Set `WANDB_API_KEY` to [your key](https://wandb.ai/authorize) when launching training tasks to log in with your W&B account.
+
 ## Projects using LLaMA Factory
 
 If you have a project that should be incorporated, please contact via email or create a pull request.
diff --git a/README_zh.md b/README_zh.md
index 0f47b88e..65a87705 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -395,9 +395,6 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s
 
 ### LLaMA Board 可视化微调（由 [Gradio](https://github.com/gradio-app/gradio) 驱动）
 
-> [!IMPORTANT]
-> LLaMA Board 可视化界面目前仅支持单 GPU 训练。
-
 #### 使用本地环境
 
 ```bash
@@ -458,6 +455,8 @@ report_to: wandb
 run_name: test_run # 可选
 ```
 
+在启动训练任务时，将 `WANDB_API_KEY` 设置为[密钥](https://wandb.ai/authorize)来登录 W&B 账户。
+
 ## 使用了 LLaMA Factory 的项目
 
 如果您有项目希望添加至下述列表，请通过邮件联系或者创建一个 PR。
diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py
index 26975f3c..a74445a6 100644
--- a/src/llamafactory/cli.py
+++ b/src/llamafactory/cli.py
@@ -4,8 +4,7 @@ import subprocess
 import sys
 from enum import Enum, unique
 
-from llamafactory import launcher
-
+from . import launcher
 from .api.app import run_api
 from .chat.chat_model import run_chat
 from .eval.evaluator import run_eval

From fdfb5e548583593e505fa698d94a513b3265772d Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 28 May 2024 21:27:27 +0800
Subject: [PATCH 019/162] bump vllm version to 0.4.1

Former-commit-id: a00fd39a4c2f270620711f2bfbad8d460fb4aa89
---
 setup.py                           | 2 +-
 src/llamafactory/hparams/parser.py | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 4d948450..b44e83ac 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@ extra_require = {
     "metrics": ["nltk", "jieba", "rouge-chinese"],
     "deepspeed": ["deepspeed>=0.10.0,<=0.14.0"],
     "bitsandbytes": ["bitsandbytes>=0.39.0"],
-    "vllm": ["vllm>=0.4.0"],
+    "vllm": ["vllm>=0.4.1"],
     "galore": ["galore-torch"],
     "badam": ["badam"],
     "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"],
diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py
index c6869e4c..adb206f8 100644
--- a/src/llamafactory/hparams/parser.py
+++ b/src/llamafactory/hparams/parser.py
@@ -6,6 +6,7 @@ from typing import Any, Dict, Optional, Tuple
 import torch
 import transformers
 from transformers import HfArgumentParser, Seq2SeqTrainingArguments
+from transformers.integrations import is_deepspeed_zero3_enabled
 from transformers.trainer_utils import get_last_checkpoint
 from transformers.utils import is_torch_bf16_gpu_available
 from transformers.utils.versions import require_version
@@ -63,6 +64,9 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin
     if model_args.adapter_name_or_path is not None and finetuning_args.finetuning_type != "lora":
         raise ValueError("Adapter is only valid for the LoRA method.")
 
+    if model_args.use_unsloth and is_deepspeed_zero3_enabled():
+        raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.")
+
     if model_args.quantization_bit is not None:
         if finetuning_args.finetuning_type != "lora":
             raise ValueError("Quantization is only compatible with the LoRA method.")
@@ -89,7 +93,7 @@ def _check_extra_dependencies(
         require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6")
 
     if model_args.infer_backend == "vllm":
-        require_version("vllm>=0.4.0", "To fix: pip install vllm>=0.4.0")
+        require_version("vllm>=0.4.1", "To fix: pip install vllm>=0.4.1")
 
     if finetuning_args.use_galore:
         require_version("galore_torch", "To fix: pip install galore_torch")

From e15389be7d393b2fadbf49dac9890a01b5b3120c Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 28 May 2024 21:43:26 +0800
Subject: [PATCH 020/162] clean kto trainer

Former-commit-id: 76402bd78cbd3a99a544f0ac019468b569b0e1d1
---
 src/llamafactory/train/kto/trainer.py | 95 ++++++++++++++-------------
 1 file changed, 50 insertions(+), 45 deletions(-)

diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py
index 096fd935..292e61c7 100644
--- a/src/llamafactory/train/kto/trainer.py
+++ b/src/llamafactory/train/kto/trainer.py
@@ -1,7 +1,7 @@
 from collections import defaultdict
 from contextlib import nullcontext
 from types import MethodType
-from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple, Union
 
 import torch
 from transformers import Trainer
@@ -101,42 +101,39 @@ class CustomKTOTrainer(KTOTrainer):
         return -all_logps
 
     def forward(
-        self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"]
-    ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]:
-        with torch.no_grad():
-            kl_model_inputs = {"input_ids": batch["kl_input_ids"], "attention_mask": batch["kl_attention_mask"]}
-            if "pixel_values" in batch:
-                kl_model_inputs["pixel_values"] = batch["pixel_values"]
-
-            if "kl_token_type_ids" in batch:
-                kl_model_inputs["token_type_ids"] = batch["kl_token_type_ids"]
-
-            kl_logits = model(**kl_model_inputs, return_dict=True, use_cache=False).logits.to(torch.float32)
-
-        model_inputs = {"input_ids": batch["input_ids"], "attention_mask": batch["attention_mask"]}
+        self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"], prefix: Literal["", "kl_"] = ""
+    ) -> Tuple["torch.Tensor", "torch.Tensor"]:
+        r"""
+        Runs forward pass and computes the log probabilities.
+        """
+        batch = {k: v.detach().clone() for k, v in batch.items()}  # avoid error
+        model_inputs = {
+            "input_ids": batch["{}input_ids".format(prefix)],
+            "attention_mask": batch["{}attention_mask".format(prefix)],
+        }
         if "pixel_values" in batch:
             model_inputs["pixel_values"] = batch["pixel_values"]
 
-        if "token_type_ids" in batch:
-            model_inputs["token_type_ids"] = batch["token_type_ids"]
+        if "{}token_type_ids".format(prefix) in batch:
+            model_inputs["token_type_ids"] = batch["{}token_type_ids".format(prefix)]
 
-        target_logits = model(**model_inputs, return_dict=True, use_cache=False).logits.to(torch.float32)
+        logits = model(**model_inputs, return_dict=True, use_cache=False).logits.to(torch.float32)
 
-        target_logps = self.get_batch_logps(
-            logits=target_logits,
-            labels=batch["labels"],
+        logps = self.get_batch_logps(
+            logits=logits,
+            labels=batch["{}labels".format(prefix)],
             average_log_prob=False,
             is_encoder_decoder=self.is_encoder_decoder,
             label_pad_token_id=self.label_pad_token_id,
         )
+        return logits, logps
 
-        kl_logps = self.get_batch_logps(
-            logits=kl_logits,
-            labels=batch["kl_labels"],
-            average_log_prob=False,
-            is_encoder_decoder=self.is_encoder_decoder,
-            label_pad_token_id=self.label_pad_token_id,
-        )
+    def concatenated_forward(
+        self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"]
+    ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]:
+        target_logits, target_logps = self.forward(model, batch)
+        with torch.no_grad():
+            _, kl_logps = self.forward(model, batch, prefix="kl_")
 
         if len(target_logps) != len(batch["kto_tags"]):
             raise ValueError("Mismatched shape of inputs and labels.")
@@ -152,6 +149,30 @@ class CustomKTOTrainer(KTOTrainer):
 
         return chosen_logps, rejected_logps, chosen_logits, rejected_logits, kl_logps
 
+    def compute_reference_log_probs(
+        self, batch: Dict[str, "torch.Tensor"]
+    ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor"]:
+        r"""
+        Computes log probabilities of the reference model.
+        """
+        if self.ref_model is None:
+            ref_model = self.model
+            ref_context = self.accelerator.unwrap_model(self.model).disable_adapter()
+        else:
+            ref_model = self.ref_model
+            ref_context = nullcontext()
+
+        with torch.no_grad(), ref_context:
+            (
+                reference_chosen_logps,
+                reference_rejected_logps,
+                _,
+                _,
+                reference_kl_logps,
+            ) = self.concatenated_forward(ref_model, batch)
+
+        return reference_chosen_logps, reference_rejected_logps, reference_kl_logps
+
     def get_batch_loss_metrics(
         self,
         model: "PreTrainedModel",
@@ -167,25 +188,9 @@ class CustomKTOTrainer(KTOTrainer):
             policy_chosen_logits,
             _,
             policy_kl_logps,
-        ) = self.forward(model, batch)
-
-        with torch.no_grad():
-            if self.ref_model is None:
-                ref_model = self.model
-                ref_context = self.accelerator.unwrap_model(self.model).disable_adapter()
-            else:
-                ref_model = self.ref_model
-                ref_context = nullcontext()
-
-            with ref_context:
-                (
-                    reference_chosen_logps,
-                    reference_rejected_logps,
-                    _,
-                    _,
-                    reference_kl_logps,
-                ) = self.forward(ref_model, batch)
+        ) = self.concatenated_forward(model, batch)
 
+        reference_chosen_logps, reference_rejected_logps, reference_kl_logps = self.compute_reference_log_probs(batch)
         losses, chosen_rewards, rejected_rewards, kl = self.kto_loss(
             policy_chosen_logps,
             policy_rejected_logps,

From 0de2ab5d163368adc728b3465441bf10bdc3901a Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 29 May 2024 00:14:29 +0800
Subject: [PATCH 021/162] update dpo, kto trainer

Former-commit-id: 4a6cc3c7046f8b27d05ea53ef216bab6fa7ebfaf
---
 src/llamafactory/train/dpo/trainer.py | 10 +++++-----
 src/llamafactory/train/kto/trainer.py | 12 +++++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py
index f3c2443c..542335a3 100644
--- a/src/llamafactory/train/dpo/trainer.py
+++ b/src/llamafactory/train/dpo/trainer.py
@@ -7,7 +7,7 @@ import torch
 import torch.nn.functional as F
 from transformers import Trainer
 from trl import DPOTrainer
-from trl.trainer.utils import disable_dropout_in_model
+from trl.trainer import disable_dropout_in_model
 
 from ...extras.constants import IGNORE_INDEX
 from ..utils import create_custom_optimzer, create_custom_scheduler
@@ -179,7 +179,7 @@ class CustomDPOTrainer(DPOTrainer):
         return chosen_logps, rejected_logps, chosen_logits, rejected_logits
 
     def compute_reference_log_probs(
-        self, batch: Dict[str, "torch.Tensor"]
+        self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"]
     ) -> Tuple[Optional["torch.Tensor"], Optional["torch.Tensor"]]:
         r"""
         Computes log probabilities of the reference model.
@@ -188,8 +188,8 @@ class CustomDPOTrainer(DPOTrainer):
             return None, None
 
         if self.ref_model is None:
-            ref_model = self.model
-            ref_context = self.accelerator.unwrap_model(self.model).disable_adapter()
+            ref_model = model
+            ref_context = self.accelerator.unwrap_model(model).disable_adapter()
         else:
             ref_model = self.ref_model
             ref_context = nullcontext()
@@ -221,7 +221,7 @@ class CustomDPOTrainer(DPOTrainer):
             policy_rejected_logits,
         ) = self.concatenated_forward(model, batch)
 
-        reference_chosen_logps, reference_rejected_logps = self.compute_reference_log_probs(batch)
+        reference_chosen_logps, reference_rejected_logps = self.compute_reference_log_probs(model, batch)
         losses, chosen_rewards, rejected_rewards = self.compute_preference_loss(
             policy_chosen_logps,
             policy_rejected_logps,
diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py
index 292e61c7..82ae722d 100644
--- a/src/llamafactory/train/kto/trainer.py
+++ b/src/llamafactory/train/kto/trainer.py
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple, Union
 import torch
 from transformers import Trainer
 from trl import KTOTrainer
-from trl.trainer.utils import disable_dropout_in_model
+from trl.trainer import disable_dropout_in_model
 
 from ...extras.constants import IGNORE_INDEX
 from ..utils import create_custom_optimzer, create_custom_scheduler
@@ -150,14 +150,14 @@ class CustomKTOTrainer(KTOTrainer):
         return chosen_logps, rejected_logps, chosen_logits, rejected_logits, kl_logps
 
     def compute_reference_log_probs(
-        self, batch: Dict[str, "torch.Tensor"]
+        self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"]
     ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor"]:
         r"""
         Computes log probabilities of the reference model.
         """
         if self.ref_model is None:
-            ref_model = self.model
-            ref_context = self.accelerator.unwrap_model(self.model).disable_adapter()
+            ref_model = model
+            ref_context = self.accelerator.unwrap_model(model).disable_adapter()
         else:
             ref_model = self.ref_model
             ref_context = nullcontext()
@@ -190,7 +190,9 @@ class CustomKTOTrainer(KTOTrainer):
             policy_kl_logps,
         ) = self.concatenated_forward(model, batch)
 
-        reference_chosen_logps, reference_rejected_logps, reference_kl_logps = self.compute_reference_log_probs(batch)
+        reference_chosen_logps, reference_rejected_logps, reference_kl_logps = self.compute_reference_log_probs(
+            model, batch
+        )
         losses, chosen_rewards, rejected_rewards, kl = self.kto_loss(
             policy_chosen_logps,
             policy_rejected_logps,

From 3e80365646e83d687b72b2b50cbb7e63b3794f15 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 29 May 2024 00:23:23 +0800
Subject: [PATCH 022/162] 10x generate in ppo w/ zero3

https://github.com/huggingface/trl/pull/1483

Former-commit-id: 5dc43ba8b373d8803bc22d88b3d0d95ef8b9c7f8
---
 src/llamafactory/train/ppo/trainer.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py
index 985664b7..27353c72 100644
--- a/src/llamafactory/train/ppo/trainer.py
+++ b/src/llamafactory/train/ppo/trainer.py
@@ -13,6 +13,7 @@ from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
 from transformers.utils import SAFE_WEIGHTS_NAME, WEIGHTS_NAME
 from trl import PPOConfig, PPOTrainer
 from trl.core import PPODecorators, logprobs_from_logits
+from trl.models.utils import unwrap_model_for_generation
 
 from ...extras.callbacks import FixValueHeadModelCallback, LogCallback
 from ...extras.logging import get_logger
@@ -322,10 +323,10 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
             for k, v in batch.items():
                 batch[k] = v[:, start_index:]
 
-        unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model)
-        generate_output: torch.Tensor = unwrapped_model.generate(
-            generation_config=self.generation_config, logits_processor=get_logits_processor(), **batch
-        )
+        with unwrap_model_for_generation(self.model, self.accelerator) as unwrapped_model:
+            generate_output: torch.Tensor = unwrapped_model.generate(
+                generation_config=self.generation_config, logits_processor=get_logits_processor(), **batch
+            )
 
         if self.model_args.upcast_layernorm:
             restore_layernorm(self.model, layernorm_params)

From 21dbc483dea84da0868cc83e5294e67c33d76eb0 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 29 May 2024 01:13:17 +0800
Subject: [PATCH 023/162] add ds config to webui

Former-commit-id: 66d72b263d36dc81de9f6152077663b613035977
---
 src/llamafactory/webui/components/train.py | 12 ++++-
 src/llamafactory/webui/engine.py           |  4 +-
 src/llamafactory/webui/locales.py          | 42 +++++++++++++++
 src/llamafactory/webui/runner.py           |  8 ++-
 src/llamafactory/webui/utils.py            | 61 ++++++++++++++++++++++
 5 files changed, 123 insertions(+), 4 deletions(-)

diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py
index d399106f..8db5c2ba 100644
--- a/src/llamafactory/webui/components/train.py
+++ b/src/llamafactory/webui/components/train.py
@@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Dict
 from transformers.trainer_utils import SchedulerType
 
 from ...extras.constants import TRAINING_STAGES
+from ...extras.misc import get_device_count
 from ...extras.packages import is_gradio_available
 from ..common import DEFAULT_DATA_DIR, autoset_packing, list_adapters, list_dataset
 from ..components.data import create_preview_box
@@ -258,6 +259,11 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
                 output_dir = gr.Textbox()
                 config_path = gr.Textbox()
 
+            with gr.Row():
+                device_count = gr.Textbox(value=str(get_device_count()), interactive=False)
+                ds_stage = gr.Dropdown(choices=["none", "2", "3"], value="none")
+                ds_offload = gr.Checkbox()
+
             with gr.Row():
                 resume_btn = gr.Checkbox(visible=False, interactive=False)
                 progress_bar = gr.Slider(visible=False, interactive=False)
@@ -268,6 +274,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
         with gr.Column(scale=1):
             loss_viewer = gr.Plot()
 
+    input_elems.update({output_dir, config_path, device_count, ds_stage, ds_offload})
     elem_dict.update(
         dict(
             cmd_preview_btn=cmd_preview_btn,
@@ -277,14 +284,15 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
             stop_btn=stop_btn,
             output_dir=output_dir,
             config_path=config_path,
+            device_count=device_count,
+            ds_stage=ds_stage,
+            ds_offload=ds_offload,
             resume_btn=resume_btn,
             progress_bar=progress_bar,
             output_box=output_box,
             loss_viewer=loss_viewer,
         )
     )
-
-    input_elems.update({output_dir, config_path})
     output_elems = [output_box, progress_bar, loss_viewer]
 
     cmd_preview_btn.click(engine.runner.preview_train, input_elems, output_elems, concurrency_limit=None)
diff --git a/src/llamafactory/webui/engine.py b/src/llamafactory/webui/engine.py
index 964d65a2..fb568737 100644
--- a/src/llamafactory/webui/engine.py
+++ b/src/llamafactory/webui/engine.py
@@ -5,7 +5,7 @@ from .common import get_model_path, list_dataset, load_config
 from .locales import LOCALES
 from .manager import Manager
 from .runner import Runner
-from .utils import get_time
+from .utils import get_time, save_ds_config
 
 
 if TYPE_CHECKING:
@@ -19,6 +19,8 @@ class Engine:
         self.manager = Manager()
         self.runner = Runner(self.manager, demo_mode)
         self.chatter = WebChatModel(self.manager, demo_mode, lazy_init=(not pure_chat))
+        if not demo_mode:
+            save_ds_config()
 
     def _update_component(self, input_dict: Dict[str, Dict[str, Any]]) -> Dict["Component", "Component"]:
         r"""
diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py
index 570a8b42..4657f9a3 100644
--- a/src/llamafactory/webui/locales.py
+++ b/src/llamafactory/webui/locales.py
@@ -1103,6 +1103,48 @@ LOCALES = {
             "info": "保存训练参数的配置文件路径。",
         },
     },
+    "device_count": {
+        "en": {
+            "label": "Device count",
+            "info": "Number of devices available.",
+        },
+        "ru": {
+            "label": "Количество устройств",
+            "info": "Количество доступных устройств.",
+        },
+        "zh": {
+            "label": "设备数量",
+            "info": "当前可用的运算设备数。",
+        },
+    },
+    "ds_stage": {
+        "en": {
+            "label": "DeepSpeed stage",
+            "info": "DeepSpeed stage for distributed training.",
+        },
+        "ru": {
+            "label": "Этап DeepSpeed",
+            "info": "Этап DeepSpeed для распределенного обучения.",
+        },
+        "zh": {
+            "label": "DeepSpeed stage",
+            "info": "多卡训练的 DeepSpeed stage。",
+        },
+    },
+    "ds_offload": {
+        "en": {
+            "label": "Enable offload",
+            "info": "Enable DeepSpeed offload (slow down training).",
+        },
+        "ru": {
+            "label": "Включить выгрузку",
+            "info": "включить выгрузку DeepSpeed (замедлит обучение).",
+        },
+        "zh": {
+            "label": "使用 offload",
+            "info": "使用 DeepSpeed offload（会减慢速度）。",
+        },
+    },
     "output_box": {
         "en": {
             "value": "Ready.",
diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index 1310b999..c2e46e97 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -10,7 +10,7 @@ from transformers.trainer import TRAINING_ARGS_NAME
 from ..extras.constants import TRAINING_STAGES
 from ..extras.misc import is_gpu_or_npu_available, torch_gc
 from ..extras.packages import is_gradio_available
-from .common import get_module, get_save_dir, load_args, load_config, save_args
+from .common import DEFAULT_CACHE_DIR, get_module, get_save_dir, load_args, load_config, save_args
 from .locales import ALERTS
 from .utils import gen_cmd, get_eval_results, get_trainer_info, save_cmd
 
@@ -201,6 +201,12 @@ class Runner:
             args["eval_steps"] = args["save_steps"]
             args["per_device_eval_batch_size"] = args["per_device_train_batch_size"]
 
+        # ds config
+        if get("train.ds_stage") != "none":
+            ds_stage = get("train.ds_stage")
+            ds_offload = "offload_" if get("train.ds_offload") else ""
+            args["deepspeed"] = os.path.join(DEFAULT_CACHE_DIR, "ds_z{}_{}config.json".format(ds_stage, ds_offload))
+
         return args
 
     def _parse_eval_args(self, data: Dict["Component", Any]) -> Dict[str, Any]:
diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py
index ceeb9352..654d1f8d 100644
--- a/src/llamafactory/webui/utils.py
+++ b/src/llamafactory/webui/utils.py
@@ -8,6 +8,7 @@ from yaml import safe_dump
 from ..extras.constants import RUNNING_LOG, TRAINER_CONFIG, TRAINER_LOG
 from ..extras.packages import is_gradio_available, is_matplotlib_available
 from ..extras.ploting import gen_loss_plot
+from .common import DEFAULT_CACHE_DIR
 from .locales import ALERTS
 
 
@@ -103,3 +104,63 @@ def save_cmd(args: Dict[str, Any]) -> str:
         safe_dump(clean_cmd(args), f)
 
     return os.path.join(output_dir, TRAINER_CONFIG)
+
+
+def save_ds_config() -> None:
+    os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True)
+    ds_config = {
+        "train_batch_size": "auto",
+        "train_micro_batch_size_per_gpu": "auto",
+        "gradient_accumulation_steps": "auto",
+        "gradient_clipping": "auto",
+        "zero_allow_untested_optimizer": True,
+        "fp16": {
+            "enabled": "auto",
+            "loss_scale": 0,
+            "loss_scale_window": 1000,
+            "initial_scale_power": 16,
+            "hysteresis": 2,
+            "min_loss_scale": 1,
+        },
+        "bf16": {"enabled": "auto"},
+    }
+    offload_config = {
+        "device": "cpu",
+        "pin_memory": True,
+    }
+    ds_config["zero_optimization"] = {
+        "stage": 2,
+        "allgather_partitions": True,
+        "allgather_bucket_size": 5e8,
+        "overlap_comm": True,
+        "reduce_scatter": True,
+        "reduce_bucket_size": 5e8,
+        "contiguous_gradients": True,
+        "round_robin_gradients": True,
+    }
+    with open(os.path.join(DEFAULT_CACHE_DIR, "ds_z2_config.json"), "w", encoding="utf-8") as f:
+        json.dump(ds_config, f, indent=2)
+
+    ds_config["zero_optimization"]["offload_optimizer"] = offload_config
+    with open(os.path.join(DEFAULT_CACHE_DIR, "ds_z2_offload_config.json"), "w", encoding="utf-8") as f:
+        json.dump(ds_config, f, indent=2)
+
+    ds_config["zero_optimization"] = {
+        "stage": 3,
+        "overlap_comm": True,
+        "contiguous_gradients": True,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": "auto",
+        "stage3_prefetch_bucket_size": "auto",
+        "stage3_param_persistence_threshold": "auto",
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": True,
+    }
+    with open(os.path.join(DEFAULT_CACHE_DIR, "ds_z3_config.json"), "w", encoding="utf-8") as f:
+        json.dump(ds_config, f, indent=2)
+
+    ds_config["zero_optimization"]["offload_optimizer"] = offload_config
+    ds_config["zero_optimization"]["offload_param"] = offload_config
+    with open(os.path.join(DEFAULT_CACHE_DIR, "ds_z3_offload_config.json"), "w", encoding="utf-8") as f:
+        json.dump(ds_config, f, indent=2)

From 51139c5fc4a1e9f0148a65416265870c8ee385ce Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 29 May 2024 01:20:07 +0800
Subject: [PATCH 024/162] fix hf chat engine

Former-commit-id: 76ce52911690ab0dd8ffa5587127afb4ec942abe
---
 src/llamafactory/chat/hf_engine.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py
index 2148f8cd..ad0e90fe 100644
--- a/src/llamafactory/chat/hf_engine.py
+++ b/src/llamafactory/chat/hf_engine.py
@@ -8,6 +8,7 @@ import torch
 from transformers import GenerationConfig, TextIteratorStreamer
 
 from ..data import get_template_and_fix_tokenizer
+from ..extras.logging import get_logger
 from ..extras.misc import get_logits_processor
 from ..model import load_model, load_tokenizer
 from .base_engine import BaseEngine, Response
@@ -23,6 +24,9 @@ if TYPE_CHECKING:
     from ..hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments
 
 
+logger = get_logger(__name__)
+
+
 class HuggingfaceEngine(BaseEngine):
     def __init__(
         self,
@@ -92,7 +96,7 @@ class HuggingfaceEngine(BaseEngine):
         stop: Optional[Union[str, List[str]]] = input_kwargs.pop("stop", None)
 
         if stop is not None:
-            raise ValueError("Stop parameter is not supported in Huggingface engine yet.")
+            logger.warning("Stop parameter is not supported in Huggingface engine yet.")
 
         generating_args = generating_args.copy()
         generating_args.update(

From 8bd3c0bae2dec9d13ed8f2d7b3a9f87a2a6a9c82 Mon Sep 17 00:00:00 2001
From: hzhaoy <hzywong@gmail.com>
Date: Wed, 29 May 2024 15:00:37 +0800
Subject: [PATCH 025/162] add TeleChat-12B/TeleChat-12B-v2 models

Former-commit-id: e0675385c88af03aaef8d51586c8a282829c4051
---
 src/llamafactory/data/template.py    | 10 ++++++++++
 src/llamafactory/extras/constants.py | 16 ++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index f13b303b..1a9f08ff 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -918,3 +918,13 @@ _register_template(
     format_user=StringFormatter(slots=["<human>:{{content}}\n<bot>:"]),
     format_separator=EmptyFormatter(slots=["\n"]),
 )
+
+
+_register_template(
+    name="telechat",
+    format_user=StringFormatter(slots=["<_user>{{content}}<_bot>"]),
+    format_system=StringFormatter(slots=["<_system>{{content}}<_end>"]),
+    default_system="",
+    stop_words=["<_end>"],
+    replace_eos=True,
+)
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index d1fcd4f6..bf2388ab 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -1355,3 +1355,19 @@ register_model_group(
     },
     template="zephyr",
 )
+
+
+register_model_group(
+    models={
+        "TeleChat-12B-Chat": {
+            DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B",
+            DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B",
+        },
+        "TeleChat-12B-v2-Chat": {
+            DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2",
+            DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B-v2",
+        },
+    },
+    module='query,key_value',
+    template="telechat",
+)

From ecada193581d0be9fe471104daf532a37e50b819 Mon Sep 17 00:00:00 2001
From: MengqingCao <cmq0113@163.com>
Date: Wed, 29 May 2024 09:53:31 +0000
Subject: [PATCH 026/162] update cann kernels url

Former-commit-id: 23c65e9d7e8817b5815264e44cbf4a7bcb88d3d7
---
 README.md    | 2 +-
 README_zh.md | 2 +-
 setup.py     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index ca70110f..14a8f4a2 100644
--- a/README.md
+++ b/README.md
@@ -359,7 +359,7 @@ chmod +x Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run
 ./Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install
 
 # install CANN Kernels
-wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C18B800TP015/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
 chmod +x Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
 ./Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install
 
diff --git a/README_zh.md b/README_zh.md
index 4f659588..66a4c031 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -359,7 +359,7 @@ chmod +x Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run
 ./Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install
 
 # 安装 CANN Kernels
-wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C18B800TP015/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
 chmod +x Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
 ./Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install
 
diff --git a/setup.py b/setup.py
index 87620c37..f72eefb6 100644
--- a/setup.py
+++ b/setup.py
@@ -33,7 +33,7 @@ extra_require = {
     "qwen": ["tiktoken", "transformers_stream_generator"],
     "modelscope": ["modelscope"],
     "quality": ["ruff"],
-    "torch_npu": ["torch==2.2.0", "torch_npu==2.2.0", "decorator"]
+    "torch_npu": ["torch==2.1.0", "torch_npu==2.1.0", "decorator"]
 }
 
 
From c21590ca5ec5aa302d902e02816d3ebf8702af02 Mon Sep 17 00:00:00 2001
From: MengqingCao <cmq0113@163.com>
Date: Wed, 29 May 2024 10:05:11 +0000
Subject: [PATCH 027/162] update torch-npu version

Former-commit-id: a70d7fcf2967eb30280a1fb845b39db7878f535c
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index f72eefb6..06e6b750 100644
--- a/setup.py
+++ b/setup.py
@@ -33,7 +33,7 @@ extra_require = {
     "qwen": ["tiktoken", "transformers_stream_generator"],
     "modelscope": ["modelscope"],
     "quality": ["ruff"],
-    "torch_npu": ["torch==2.1.0", "torch_npu==2.1.0", "decorator"]
+    "torch_npu": ["torch==2.1.0", "torch_npu==2.1.0.post3", "decorator"]
 }
 
 
From 0c722c879a90599783b7627ad02f660e487c6ff9 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 29 May 2024 18:39:11 +0800
Subject: [PATCH 028/162] update readme

Former-commit-id: 440e9de66986ef7736361ce8ec3e23ce68655a56
---
 .readthedocs.yaml                    | 19 +++++++++++++++
 README.md                            | 26 +++++++++-----------
 README_zh.md                         | 28 ++++++++++------------
 examples/accelerate/fsdp_config.yaml |  8 +++----
 setup.py                             |  2 +-
 src/llamafactory/data/template.py    | 19 +++++++--------
 src/llamafactory/extras/constants.py | 36 +++++++++++++++-------------
 7 files changed, 76 insertions(+), 62 deletions(-)
 create mode 100644 .readthedocs.yaml

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 00000000..3a9eaea1
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,19 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.8"
+
+sphinx:
+  configuration: docs/source/conf.py
+
+formats:
+  - pdf
+
+python:
+  install:
+  - requirements: docs/requirements-docs.txt
diff --git a/README.md b/README.md
index 3e5cdb91..4ca6d1ec 100644
--- a/README.md
+++ b/README.md
@@ -330,7 +330,7 @@ cd LLaMA-Factory
 pip install -e .[torch,metrics]
 ```
 
-Extra dependencies available: torch, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality
+Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality
 
 > [!TIP]
 > Use `pip install --no-deps -e .` to resolve package conflicts.
@@ -351,32 +351,28 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec
 
 Join [NPU user group](assets/wechat_npu.jpg).
 
-Use `pip install -e .[torch_npu]` to install LLaMA-Factory with **[torch-npu](https://gitee.com/ascend/pytorch)** library.
-
-To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. You can follow chapter **[install CANN](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)** in the installation tutorial to install CANN Toolkit and the kernels, or use the fast installation as following:
+To install LLaMA Factory on Ascend NPU devices, please specify extra dependencies: `pip install -e .[torch_npu,metrics]`. Additionally, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. Please follow the [installation tutorial](https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/softwareinstall/instg/atlasdeploy_03_0031.html) or use the following commands:
 
 ```bash
-# replace the url according to your choice
+# replace the url according to your CANN version and devices
 # install CANN Toolkit
 wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run
-chmod +x Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run
-./Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install
+bash Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install
 
 # install CANN Kernels
 wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
-chmod +x Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
-./Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install
+bash Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install
 
 # set env variables
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 ```
 
-| Requirement  | Minimum | Recommend |
-| ------------ | ------- | --------- |
-| CANN         | 8.0.RC1 | 8.0.RC1   |
-| torch        | 2.2.0   | 2.2.0     |
-| torch-npu    | 2.2.0   | 2.2.0     |
-| deepspeed    | 0.13.2  | 0.13.2    |
+| Requirement  | Minimum | Recommend   |
+| ------------ | ------- | ----------- |
+| CANN         | 8.0.RC1 | 8.0.RC1     |
+| torch        | 2.1.0   | 2.1.0       |
+| torch-npu    | 2.1.0   | 2.1.0.post3 |
+| deepspeed    | 0.13.2  | 0.13.2      |
 
 Docker image:
 
diff --git a/README_zh.md b/README_zh.md
index b50d52ef..70938e38 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -330,7 +330,7 @@ cd LLaMA-Factory
 pip install -e .[torch,metrics]
 ```
 
-可选的额外依赖项：torch、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality
+可选的额外依赖项：torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality
 
 > [!TIP]
 > 遇到包冲突时，可使用 `pip install --no-deps -e .` 解决。
@@ -351,39 +351,35 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
 
 加入 [NPU 用户群](assets/wechat_npu.jpg)。
 
-使用 `pip install -e .[torch_npu]` 来安装带有 **[torch-npu](https://gitee.com/ascend/pytorch)** 的 LLaMA-Factory。
-
-如果使用昇腾 NPU 设备进行（分布式）训练或推理，需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。可以参考安装教程中的**[安装 CANN](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)**章节来进行安装，或使用以下快速安装指令：
+在昇腾 NPU 设备上安装 LLaMA Factory 时，需要指定额外依赖项，使用 `pip install -e .[torch_npu,metrics]` 命令安装。此外，还需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**，安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令：
 
 ```bash
-# 请替换 URL 为你需要的 CANN 版本对应的 URL
+# 请替换 URL 为 CANN 版本和设备型号对应的 URL
 # 安装 CANN Toolkit
 wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run
-chmod +x Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run
-./Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install
+bash Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install
 
 # 安装 CANN Kernels
 wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
-chmod +x Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
-./Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install
+bash Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install
 
 # 设置环境变量
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 ```
 
-| 依赖项       | 至少     | 推荐      |
-| ------------ | ------- | --------- |
-| CANN         | 8.0.RC1 | 8.0.RC1   |
-| torch        | 2.2.0   | 2.2.0     |
-| torch-npu    | 2.2.0   | 2.2.0     |
-| deepspeed    | 0.13.2  | 0.13.2    |
+| 依赖项       | 至少     | 推荐        |
+| ------------ | ------- | ----------- |
+| CANN         | 8.0.RC1 | 8.0.RC1     |
+| torch        | 2.1.0   | 2.1.0       |
+| torch-npu    | 2.1.0   | 2.1.0.post3 |
+| deepspeed    | 0.13.2  | 0.13.2      |
 
 Docker 镜像：
 
 - 32GB：[下载地址](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html)
 - 64GB：[下载地址](http://mirrors.cn-central-221.ovaijisuan.com/detail/131.html)
 
-请记得使用 `ASCEND_RT_VISIBLE_DEVICES` 而非 `CUDA_VISIBLE_DEVICES` 来指定您使用的设备。
+请使用 `ASCEND_RT_VISIBLE_DEVICES` 而非 `CUDA_VISIBLE_DEVICES` 来指定运算设备。
 
 如果遇到无法正常推理的情况，请尝试设置 `do_sample: false`。
 
diff --git a/examples/accelerate/fsdp_config.yaml b/examples/accelerate/fsdp_config.yaml
index 60025597..cd65e074 100644
--- a/examples/accelerate/fsdp_config.yaml
+++ b/examples/accelerate/fsdp_config.yaml
@@ -5,16 +5,16 @@ downcast_bf16: 'no'
 fsdp_config:
   fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
   fsdp_backward_prefetch: BACKWARD_PRE
-  fsdp_cpu_ram_efficient_loading: true
   fsdp_forward_prefetch: false
-  fsdp_offload_params: true
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_offload_params: true # offload may affect training speed
   fsdp_sharding_strategy: FULL_SHARD
   fsdp_state_dict_type: FULL_STATE_DICT
   fsdp_sync_module_states: true
-  fsdp_use_orig_params: false
+  fsdp_use_orig_params: true
 machine_rank: 0
 main_training_function: main
-mixed_precision: fp16
+mixed_precision: fp16 # or bf16
 num_machines: 1 # the number of nodes
 num_processes: 2 # the number of GPUs in all nodes
 rdzv_backend: static
diff --git a/setup.py b/setup.py
index 45268d6e..45e73343 100644
--- a/setup.py
+++ b/setup.py
@@ -21,6 +21,7 @@ def get_requires():
 
 extra_require = {
     "torch": ["torch>=1.13.1"],
+    "torch_npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"],
     "metrics": ["nltk", "jieba", "rouge-chinese"],
     "deepspeed": ["deepspeed>=0.10.0,<=0.14.0"],
     "bitsandbytes": ["bitsandbytes>=0.39.0"],
@@ -33,7 +34,6 @@ extra_require = {
     "qwen": ["tiktoken", "transformers_stream_generator"],
     "modelscope": ["modelscope"],
     "quality": ["ruff"],
-    "torch_npu": ["torch==2.1.0", "torch_npu==2.1.0.post3", "decorator"]
 }
 
 
diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 1a9f08ff..35866e31 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -822,6 +822,15 @@ _register_template(
 )
 
 
+_register_template(
+    name="telechat",
+    format_user=StringFormatter(slots=["<_user>{{content}}<_bot>"]),
+    format_system=StringFormatter(slots=["<_system>{{content}}<_end>"]),
+    stop_words=["<_end>"],
+    replace_eos=True,
+)
+
+
 _register_template(
     name="vicuna",
     format_user=StringFormatter(slots=["USER: {{content}} ASSISTANT:"]),
@@ -918,13 +927,3 @@ _register_template(
     format_user=StringFormatter(slots=["<human>:{{content}}\n<bot>:"]),
     format_separator=EmptyFormatter(slots=["\n"]),
 )
-
-
-_register_template(
-    name="telechat",
-    format_user=StringFormatter(slots=["<_user>{{content}}<_bot>"]),
-    format_system=StringFormatter(slots=["<_system>{{content}}<_end>"]),
-    default_system="",
-    stop_words=["<_end>"],
-    replace_eos=True,
-)
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index bf2388ab..5e2ee3ce 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -1095,6 +1095,26 @@ register_model_group(
 )
 
 
+register_model_group(
+    models={
+        "TeleChat-7B-Chat": {
+            DownloadSource.DEFAULT: "Tele-AI/telechat-7B",
+            DownloadSource.MODELSCOPE: "TeleAI/telechat-7B",
+        },
+        "TeleChat-12B-Chat": {
+            DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B",
+            DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B",
+        },
+        "TeleChat-12B-v2-Chat": {
+            DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2",
+            DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B-v2",
+        },
+    },
+    module="query,key_value",
+    template="telechat",
+)
+
+
 register_model_group(
     models={
         "Vicuna1.5-7B-Chat": {
@@ -1355,19 +1375,3 @@ register_model_group(
     },
     template="zephyr",
 )
-
-
-register_model_group(
-    models={
-        "TeleChat-12B-Chat": {
-            DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B",
-            DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B",
-        },
-        "TeleChat-12B-v2-Chat": {
-            DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2",
-            DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B-v2",
-        },
-    },
-    module='query,key_value',
-    template="telechat",
-)

From a61ee00736c8031c798b7e64677ec6a0c4f945fa Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 29 May 2024 20:55:51 +0800
Subject: [PATCH 029/162] fix #3965

Former-commit-id: 37d15ac55d0be0ff47d6a88f07e2d823117a4a36
---
 src/llamafactory/data/template.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 35866e31..a117b689 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -544,7 +544,7 @@ _register_template(
             )
         ]
     ),
-    format_system=EmptyFormatter(slots=[{"bos_token"}]),
+    format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
     force_system=True,
 )
 

From 50940fe9126de429666edcdcc03764f76bca495b Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 29 May 2024 20:58:23 +0800
Subject: [PATCH 030/162] fix cohere system

Former-commit-id: 5d629b29e705c8ff8dd4521719d9c0e67a3fe0a2
---
 src/llamafactory/data/template.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index a117b689..00527b44 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -544,8 +544,13 @@ _register_template(
             )
         ]
     ),
-    format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
-    force_system=True,
+    format_system=StringFormatter(
+        slots=[{"bos_token"}, "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>"]
+    ),
+    default_system=(
+        "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users "
+        "by providing thorough responses. You are trained by Cohere."
+    ),
 )
 
 
From 219a16130a12bb523ddb78104db1732191c1f550 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 29 May 2024 23:55:38 +0800
Subject: [PATCH 031/162] better llamaboard

* easily resume from checkpoint
* support full and freeze checkpoints
* faster ui


Former-commit-id: 84cfb2452cc86b037ccddee6e833f8eb7c129fa4
---
 src/llamafactory/__init__.py                |   2 +-
 src/llamafactory/extras/constants.py        |  21 +++-
 src/llamafactory/hparams/parser.py          |  13 ++-
 src/llamafactory/webui/chatter.py           |  33 +++---
 src/llamafactory/webui/common.py            | 109 +++++++++++++-------
 src/llamafactory/webui/components/eval.py   |   4 +-
 src/llamafactory/webui/components/export.py |  29 +++---
 src/llamafactory/webui/components/top.py    |  23 ++---
 src/llamafactory/webui/components/train.py  |  33 +++---
 src/llamafactory/webui/engine.py            |  17 ++-
 src/llamafactory/webui/locales.py           |  24 ++---
 src/llamafactory/webui/manager.py           |   2 +-
 src/llamafactory/webui/runner.py            |  79 +++++++-------
 src/llamafactory/webui/utils.py             | 107 ++++++++++++++++---
 14 files changed, 303 insertions(+), 193 deletions(-)

diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py
index b889e268..78230937 100644
--- a/src/llamafactory/__init__.py
+++ b/src/llamafactory/__init__.py
@@ -1,4 +1,4 @@
-# Level: api, webui > chat, eval, train > data, model > extras, hparams
+# Level: api, webui > chat, eval, train > data, model > hparams > extras
 
 from .cli import VERSION
 
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 5e2ee3ce..f365016f 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -2,6 +2,19 @@ from collections import OrderedDict, defaultdict
 from enum import Enum
 from typing import Dict, Optional
 
+from peft.utils import SAFETENSORS_WEIGHTS_NAME as SAFE_ADAPTER_WEIGHTS_NAME
+from peft.utils import WEIGHTS_NAME as ADAPTER_WEIGHTS_NAME
+from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME
+
+
+CHECKPOINT_NAMES = {
+    SAFE_ADAPTER_WEIGHTS_NAME,
+    ADAPTER_WEIGHTS_NAME,
+    SAFE_WEIGHTS_INDEX_NAME,
+    SAFE_WEIGHTS_NAME,
+    WEIGHTS_INDEX_NAME,
+    WEIGHTS_NAME,
+}
 
 CHOICES = ["A", "B", "C", "D"]
 
@@ -26,9 +39,9 @@ LAYERNORM_NAMES = {"norm", "ln"}
 
 METHODS = ["full", "freeze", "lora"]
 
-MOD_SUPPORTED_MODELS = ["bloom", "falcon", "gemma", "llama", "mistral", "mixtral", "phi", "starcoder2"]
+MOD_SUPPORTED_MODELS = {"bloom", "falcon", "gemma", "llama", "mistral", "mixtral", "phi", "starcoder2"}
 
-PEFT_METHODS = ["lora"]
+PEFT_METHODS = {"lora"}
 
 RUNNING_LOG = "running_log.txt"
 
@@ -49,9 +62,9 @@ TRAINING_STAGES = {
     "Pre-Training": "pt",
 }
 
-STAGES_USE_PAIR_DATA = ["rm", "dpo", "orpo"]
+STAGES_USE_PAIR_DATA = {"rm", "dpo"}
 
-SUPPORTED_CLASS_FOR_S2ATTN = ["llama"]
+SUPPORTED_CLASS_FOR_S2ATTN = {"llama"}
 
 V_HEAD_WEIGHTS_NAME = "value_head.bin"
 
diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py
index adb206f8..b3c673be 100644
--- a/src/llamafactory/hparams/parser.py
+++ b/src/llamafactory/hparams/parser.py
@@ -11,6 +11,7 @@ from transformers.trainer_utils import get_last_checkpoint
 from transformers.utils import is_torch_bf16_gpu_available
 from transformers.utils.versions import require_version
 
+from ..extras.constants import CHECKPOINT_NAMES
 from ..extras.logging import get_logger
 from ..extras.misc import check_dependencies, get_current_device
 from .data_args import DataArguments
@@ -255,13 +256,15 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
         and can_resume_from_checkpoint
     ):
         last_checkpoint = get_last_checkpoint(training_args.output_dir)
+        if last_checkpoint is None and any(
+            os.path.isfile(os.path.join(training_args.output_dir, name)) for name in CHECKPOINT_NAMES
+        ):
+            raise ValueError("Output directory already exists and is not empty. Please set `overwrite_output_dir`.")
+
         if last_checkpoint is not None:
             training_args.resume_from_checkpoint = last_checkpoint
-            logger.info(
-                "Resuming training from {}. Change `output_dir` or use `overwrite_output_dir` to avoid.".format(
-                    training_args.resume_from_checkpoint
-                )
-            )
+            logger.info("Resuming training from {}.".format(training_args.resume_from_checkpoint))
+            logger.info("Change `output_dir` or use `overwrite_output_dir` to avoid.")
 
     if (
         finetuning_args.stage in ["rm", "ppo"]
diff --git a/src/llamafactory/webui/chatter.py b/src/llamafactory/webui/chatter.py
index a92f6ef7..c82710d3 100644
--- a/src/llamafactory/webui/chatter.py
+++ b/src/llamafactory/webui/chatter.py
@@ -6,6 +6,7 @@ from numpy.typing import NDArray
 
 from ..chat import ChatModel
 from ..data import Role
+from ..extras.constants import PEFT_METHODS
 from ..extras.misc import torch_gc
 from ..extras.packages import is_gradio_available
 from .common import get_save_dir
@@ -44,13 +45,14 @@ class WebChatModel(ChatModel):
 
     def load_model(self, data) -> Generator[str, None, None]:
         get = lambda elem_id: data[self.manager.get_elem_by_id(elem_id)]
-        lang = get("top.lang")
+        lang, model_name, model_path = get("top.lang"), get("top.model_name"), get("top.model_path")
+        finetuning_type, checkpoint_path = get("top.finetuning_type"), get("top.checkpoint_path")
         error = ""
         if self.loaded:
             error = ALERTS["err_exists"][lang]
-        elif not get("top.model_name"):
+        elif not model_name:
             error = ALERTS["err_no_model"][lang]
-        elif not get("top.model_path"):
+        elif not model_path:
             error = ALERTS["err_no_path"][lang]
         elif self.demo_mode:
             error = ALERTS["err_demo"][lang]
@@ -60,21 +62,10 @@ class WebChatModel(ChatModel):
             yield error
             return
 
-        if get("top.adapter_path"):
-            adapter_name_or_path = ",".join(
-                [
-                    get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter)
-                    for adapter in get("top.adapter_path")
-                ]
-            )
-        else:
-            adapter_name_or_path = None
-
         yield ALERTS["info_loading"][lang]
         args = dict(
-            model_name_or_path=get("top.model_path"),
-            adapter_name_or_path=adapter_name_or_path,
-            finetuning_type=get("top.finetuning_type"),
+            model_name_or_path=model_path,
+            finetuning_type=finetuning_type,
             quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None,
             template=get("top.template"),
             flash_attn="fa2" if get("top.booster") == "flashattn2" else "auto",
@@ -83,8 +74,16 @@ class WebChatModel(ChatModel):
             rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None,
             infer_backend=get("infer.infer_backend"),
         )
-        super().__init__(args)
 
+        if checkpoint_path:
+            if finetuning_type in PEFT_METHODS:  # list
+                args["adapter_name_or_path"] = ",".join(
+                    [get_save_dir(model_name, finetuning_type, adapter) for adapter in checkpoint_path]
+                )
+            else:  # str
+                args["model_name_or_path"] = get_save_dir(model_name, finetuning_type, checkpoint_path)
+
+        super().__init__(args)
         yield ALERTS["info_loaded"][lang]
 
     def unload_model(self, data) -> Generator[str, None, None]:
diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py
index ea82fd88..62004bce 100644
--- a/src/llamafactory/webui/common.py
+++ b/src/llamafactory/webui/common.py
@@ -1,12 +1,12 @@
 import json
 import os
 from collections import defaultdict
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Tuple
 
-from peft.utils import SAFETENSORS_WEIGHTS_NAME, WEIGHTS_NAME
 from yaml import safe_dump, safe_load
 
 from ..extras.constants import (
+    CHECKPOINT_NAMES,
     DATA_CONFIG,
     DEFAULT_MODULE,
     DEFAULT_TEMPLATE,
@@ -29,7 +29,6 @@ if is_gradio_available():
 logger = get_logger(__name__)
 
 
-ADAPTER_NAMES = {WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME}
 DEFAULT_CACHE_DIR = "cache"
 DEFAULT_CONFIG_DIR = "config"
 DEFAULT_DATA_DIR = "data"
@@ -38,19 +37,31 @@ USER_CONFIG = "user_config.yaml"
 
 
 def get_save_dir(*paths: str) -> os.PathLike:
+    r"""
+    Gets the path to saved model checkpoints.
+    """
     paths = (path.replace(os.path.sep, "").replace(" ", "").strip() for path in paths)
     return os.path.join(DEFAULT_SAVE_DIR, *paths)
 
 
 def get_config_path() -> os.PathLike:
+    r"""
+    Gets the path to user config.
+    """
     return os.path.join(DEFAULT_CACHE_DIR, USER_CONFIG)
 
 
-def get_save_path(config_path: str) -> os.PathLike:
+def get_arg_save_path(config_path: str) -> os.PathLike:
+    r"""
+    Gets the path to saved arguments.
+    """
     return os.path.join(DEFAULT_CONFIG_DIR, config_path)
 
 
 def load_config() -> Dict[str, Any]:
+    r"""
+    Loads user config if exists.
+    """
     try:
         with open(get_config_path(), "r", encoding="utf-8") as f:
             return safe_load(f)
@@ -59,6 +70,9 @@ def load_config() -> Dict[str, Any]:
 
 
 def save_config(lang: str, model_name: Optional[str] = None, model_path: Optional[str] = None) -> None:
+    r"""
+    Saves user config.
+    """
     os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True)
     user_config = load_config()
     user_config["lang"] = lang or user_config["lang"]
@@ -69,23 +83,10 @@ def save_config(lang: str, model_name: Optional[str] = None, model_path: Optiona
         safe_dump(user_config, f)
 
 
-def load_args(config_path: str) -> Optional[Dict[str, Any]]:
-    try:
-        with open(get_save_path(config_path), "r", encoding="utf-8") as f:
-            return safe_load(f)
-    except Exception:
-        return None
-
-
-def save_args(config_path: str, config_dict: Dict[str, Any]) -> str:
-    os.makedirs(DEFAULT_CONFIG_DIR, exist_ok=True)
-    with open(get_save_path(config_path), "w", encoding="utf-8") as f:
-        safe_dump(config_dict, f)
-
-    return str(get_save_path(config_path))
-
-
-def get_model_path(model_name: str) -> str:
+def get_model_path(model_name: str) -> Optional[str]:
+    r"""
+    Gets the model path according to the model name.
+    """
     user_config = load_config()
     path_dict: Dict[DownloadSource, str] = SUPPORTED_MODELS.get(model_name, defaultdict(str))
     model_path = user_config["path_dict"].get(model_name, None) or path_dict.get(DownloadSource.DEFAULT, None)
@@ -99,40 +100,71 @@ def get_model_path(model_name: str) -> str:
 
 
 def get_prefix(model_name: str) -> str:
+    r"""
+    Gets the prefix of the model name to obtain the model family.
+    """
     return model_name.split("-")[0]
 
 
+def get_model_info(model_name: str) -> Tuple[str, str, bool]:
+    r"""
+    Gets the necessary information of this model.
+
+    Returns:
+        model_path (str)
+        template (str)
+        visual (bool)
+    """
+    return get_model_path(model_name), get_template(model_name), get_visual(model_name)
+
+
 def get_module(model_name: str) -> str:
-    return DEFAULT_MODULE.get(get_prefix(model_name), "q_proj,v_proj")
+    r"""
+    Gets the LoRA modules of this model.
+    """
+    return DEFAULT_MODULE.get(get_prefix(model_name), "all")
 
 
 def get_template(model_name: str) -> str:
+    r"""
+    Gets the template name if the model is a chat model.
+    """
     if model_name and model_name.endswith("Chat") and get_prefix(model_name) in DEFAULT_TEMPLATE:
         return DEFAULT_TEMPLATE[get_prefix(model_name)]
     return "default"
 
 
 def get_visual(model_name: str) -> bool:
+    r"""
+    Judges if the model is a vision language model.
+    """
     return get_prefix(model_name) in VISION_MODELS
 
 
-def list_adapters(model_name: str, finetuning_type: str) -> "gr.Dropdown":
-    if finetuning_type not in PEFT_METHODS:
-        return gr.Dropdown(value=[], choices=[], interactive=False)
-
-    adapters = []
-    if model_name and finetuning_type == "lora":
+def list_checkpoints(model_name: str, finetuning_type: str) -> "gr.Dropdown":
+    r"""
+    Lists all available checkpoints.
+    """
+    checkpoints = []
+    if model_name:
         save_dir = get_save_dir(model_name, finetuning_type)
         if save_dir and os.path.isdir(save_dir):
-            for adapter in os.listdir(save_dir):
-                if os.path.isdir(os.path.join(save_dir, adapter)) and any(
-                    os.path.isfile(os.path.join(save_dir, adapter, name)) for name in ADAPTER_NAMES
+            for checkpoint in os.listdir(save_dir):
+                if os.path.isdir(os.path.join(save_dir, checkpoint)) and any(
+                    os.path.isfile(os.path.join(save_dir, checkpoint, name)) for name in CHECKPOINT_NAMES
                 ):
-                    adapters.append(adapter)
-    return gr.Dropdown(value=[], choices=adapters, interactive=True)
+                    checkpoints.append(checkpoint)
+
+    if finetuning_type in PEFT_METHODS:
+        return gr.Dropdown(value=[], choices=checkpoints, multiselect=True)
+    else:
+        return gr.Dropdown(value=None, choices=checkpoints, multiselect=False)
 
 
 def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]:
+    r"""
+    Loads dataset_info.json.
+    """
     if dataset_dir == "ONLINE":
         logger.info("dataset_dir is ONLINE, using online dataset.")
         return {}
@@ -145,12 +177,11 @@ def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]:
         return {}
 
 
-def list_dataset(dataset_dir: str = None, training_stage: str = list(TRAINING_STAGES.keys())[0]) -> "gr.Dropdown":
+def list_datasets(dataset_dir: str = None, training_stage: str = list(TRAINING_STAGES.keys())[0]) -> "gr.Dropdown":
+    r"""
+    Lists all available datasets in the dataset dir for the training stage.
+    """
     dataset_info = load_dataset_info(dataset_dir if dataset_dir is not None else DEFAULT_DATA_DIR)
     ranking = TRAINING_STAGES[training_stage] in STAGES_USE_PAIR_DATA
     datasets = [k for k, v in dataset_info.items() if v.get("ranking", False) == ranking]
-    return gr.Dropdown(value=[], choices=datasets)
-
-
-def autoset_packing(training_stage: str = list(TRAINING_STAGES.keys())[0]) -> "gr.Button":
-    return gr.Button(value=(TRAINING_STAGES[training_stage] == "pt"))
+    return gr.Dropdown(choices=datasets)
diff --git a/src/llamafactory/webui/components/eval.py b/src/llamafactory/webui/components/eval.py
index 8b70283b..99215fc2 100644
--- a/src/llamafactory/webui/components/eval.py
+++ b/src/llamafactory/webui/components/eval.py
@@ -1,7 +1,7 @@
 from typing import TYPE_CHECKING, Dict
 
 from ...extras.packages import is_gradio_available
-from ..common import DEFAULT_DATA_DIR, list_dataset
+from ..common import DEFAULT_DATA_DIR, list_datasets
 from .data import create_preview_box
 
 
@@ -74,6 +74,6 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]:
     stop_btn.click(engine.runner.set_abort)
     resume_btn.change(engine.runner.monitor, outputs=output_elems, concurrency_limit=None)
 
-    dataset_dir.change(list_dataset, [dataset_dir], [dataset], queue=False)
+    dataset.focus(list_datasets, [dataset_dir], [dataset], queue=False)
 
     return elem_dict
diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py
index 134b77e0..2f354011 100644
--- a/src/llamafactory/webui/components/export.py
+++ b/src/llamafactory/webui/components/export.py
@@ -1,5 +1,6 @@
-from typing import TYPE_CHECKING, Dict, Generator, List
+from typing import TYPE_CHECKING, Dict, Generator, List, Union
 
+from ...extras.constants import PEFT_METHODS
 from ...extras.misc import torch_gc
 from ...extras.packages import is_gradio_available
 from ...train.tuner import export_model
@@ -24,8 +25,8 @@ def save_model(
     lang: str,
     model_name: str,
     model_path: str,
-    adapter_path: List[str],
     finetuning_type: str,
+    checkpoint_path: Union[str, List[str]],
     template: str,
     visual_inputs: bool,
     export_size: int,
@@ -45,9 +46,9 @@ def save_model(
         error = ALERTS["err_no_export_dir"][lang]
     elif export_quantization_bit in GPTQ_BITS and not export_quantization_dataset:
         error = ALERTS["err_no_dataset"][lang]
-    elif export_quantization_bit not in GPTQ_BITS and not adapter_path:
+    elif export_quantization_bit not in GPTQ_BITS and not checkpoint_path:
         error = ALERTS["err_no_adapter"][lang]
-    elif export_quantization_bit in GPTQ_BITS and adapter_path:
+    elif export_quantization_bit in GPTQ_BITS and isinstance(checkpoint_path, list):
         error = ALERTS["err_gptq_lora"][lang]
 
     if error:
@@ -55,16 +56,8 @@ def save_model(
         yield error
         return
 
-    if adapter_path:
-        adapter_name_or_path = ",".join(
-            [get_save_dir(model_name, finetuning_type, adapter) for adapter in adapter_path]
-        )
-    else:
-        adapter_name_or_path = None
-
     args = dict(
         model_name_or_path=model_path,
-        adapter_name_or_path=adapter_name_or_path,
         finetuning_type=finetuning_type,
         template=template,
         visual_inputs=visual_inputs,
@@ -77,6 +70,14 @@ def save_model(
         export_legacy_format=export_legacy_format,
     )
 
+    if checkpoint_path:
+        if finetuning_type in PEFT_METHODS:  # list
+            args["adapter_name_or_path"] = ",".join(
+                [get_save_dir(model_name, finetuning_type, adapter) for adapter in checkpoint_path]
+            )
+        else:  # str
+            args["model_name_or_path"] = get_save_dir(model_name, finetuning_type, checkpoint_path)
+
     yield ALERTS["info_exporting"][lang]
     export_model(args)
     torch_gc()
@@ -86,7 +87,7 @@ def save_model(
 def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
     with gr.Row():
         export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1)
-        export_quantization_bit = gr.Dropdown(choices=["none", "8", "4", "3", "2"], value="none")
+        export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none")
         export_quantization_dataset = gr.Textbox(value="data/c4_demo.json")
         export_device = gr.Radio(choices=["cpu", "cuda"], value="cpu")
         export_legacy_format = gr.Checkbox()
@@ -104,8 +105,8 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
             engine.manager.get_elem_by_id("top.lang"),
             engine.manager.get_elem_by_id("top.model_name"),
             engine.manager.get_elem_by_id("top.model_path"),
-            engine.manager.get_elem_by_id("top.adapter_path"),
             engine.manager.get_elem_by_id("top.finetuning_type"),
+            engine.manager.get_elem_by_id("top.checkpoint_path"),
             engine.manager.get_elem_by_id("top.template"),
             engine.manager.get_elem_by_id("top.visual_inputs"),
             export_size,
diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py
index a75a4d62..ca093584 100644
--- a/src/llamafactory/webui/components/top.py
+++ b/src/llamafactory/webui/components/top.py
@@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Dict
 from ...data import templates
 from ...extras.constants import METHODS, SUPPORTED_MODELS
 from ...extras.packages import is_gradio_available
-from ..common import get_model_path, get_template, get_visual, list_adapters, save_config
+from ..common import get_model_info, list_checkpoints, save_config
 from ..utils import can_quantize
 
 
@@ -25,8 +25,7 @@ def create_top() -> Dict[str, "Component"]:
 
     with gr.Row():
         finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1)
-        adapter_path = gr.Dropdown(multiselect=True, allow_custom_value=True, scale=5)
-        refresh_btn = gr.Button(scale=1)
+        checkpoint_path = gr.Dropdown(multiselect=True, allow_custom_value=True, scale=6)
 
     with gr.Accordion(open=False) as advanced_tab:
         with gr.Row():
@@ -36,27 +35,17 @@ def create_top() -> Dict[str, "Component"]:
             booster = gr.Radio(choices=["none", "flashattn2", "unsloth"], value="none", scale=3)
             visual_inputs = gr.Checkbox(scale=1)
 
-    model_name.change(list_adapters, [model_name, finetuning_type], [adapter_path], queue=False).then(
-        get_model_path, [model_name], [model_path], queue=False
-    ).then(get_template, [model_name], [template], queue=False).then(
-        get_visual, [model_name], [visual_inputs], queue=False
-    )  # do not save config since the below line will save
-
+    model_name.change(get_model_info, [model_name], [model_path, template, visual_inputs], queue=False)
     model_path.change(save_config, inputs=[lang, model_name, model_path], queue=False)
-
-    finetuning_type.change(list_adapters, [model_name, finetuning_type], [adapter_path], queue=False).then(
-        can_quantize, [finetuning_type], [quantization_bit], queue=False
-    )
-
-    refresh_btn.click(list_adapters, [model_name, finetuning_type], [adapter_path], queue=False)
+    finetuning_type.change(can_quantize, [finetuning_type], [quantization_bit], queue=False)
+    checkpoint_path.focus(list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False)
 
     return dict(
         lang=lang,
         model_name=model_name,
         model_path=model_path,
         finetuning_type=finetuning_type,
-        adapter_path=adapter_path,
-        refresh_btn=refresh_btn,
+        checkpoint_path=checkpoint_path,
         advanced_tab=advanced_tab,
         quantization_bit=quantization_bit,
         template=template,
diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py
index 8db5c2ba..6f742bb1 100644
--- a/src/llamafactory/webui/components/train.py
+++ b/src/llamafactory/webui/components/train.py
@@ -5,8 +5,9 @@ from transformers.trainer_utils import SchedulerType
 from ...extras.constants import TRAINING_STAGES
 from ...extras.misc import get_device_count
 from ...extras.packages import is_gradio_available
-from ..common import DEFAULT_DATA_DIR, autoset_packing, list_adapters, list_dataset
-from ..components.data import create_preview_box
+from ..common import DEFAULT_DATA_DIR, list_checkpoints, list_datasets
+from ..utils import change_stage, check_output_dir, list_output_dirs
+from .data import create_preview_box
 
 
 if is_gradio_available():
@@ -256,11 +257,12 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
     with gr.Row():
         with gr.Column(scale=3):
             with gr.Row():
-                output_dir = gr.Textbox()
+                initial_dir = gr.Textbox(visible=False, interactive=False)
+                output_dir = gr.Dropdown(allow_custom_value=True)
                 config_path = gr.Textbox()
 
             with gr.Row():
-                device_count = gr.Textbox(value=str(get_device_count()), interactive=False)
+                device_count = gr.Textbox(value=str(get_device_count() or 1), interactive=False)
                 ds_stage = gr.Dropdown(choices=["none", "2", "3"], value="none")
                 ds_offload = gr.Checkbox()
 
@@ -282,6 +284,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
             arg_load_btn=arg_load_btn,
             start_btn=start_btn,
             stop_btn=stop_btn,
+            initial_dir=initial_dir,
             output_dir=output_dir,
             config_path=config_path,
             device_count=device_count,
@@ -295,24 +298,24 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
     )
     output_elems = [output_box, progress_bar, loss_viewer]
 
+    lang = engine.manager.get_elem_by_id("top.lang")
+    model_name = engine.manager.get_elem_by_id("top.model_name")
+    finetuning_type = engine.manager.get_elem_by_id("top.finetuning_type")
+
     cmd_preview_btn.click(engine.runner.preview_train, input_elems, output_elems, concurrency_limit=None)
     arg_save_btn.click(engine.runner.save_args, input_elems, output_elems, concurrency_limit=None)
     arg_load_btn.click(
-        engine.runner.load_args,
-        [engine.manager.get_elem_by_id("top.lang"), config_path],
-        list(input_elems) + [output_box],
-        concurrency_limit=None,
+        engine.runner.load_args, [lang, config_path], list(input_elems) + [output_box], concurrency_limit=None
     )
     start_btn.click(engine.runner.run_train, input_elems, output_elems)
     stop_btn.click(engine.runner.set_abort)
     resume_btn.change(engine.runner.monitor, outputs=output_elems, concurrency_limit=None)
 
-    dataset_dir.change(list_dataset, [dataset_dir, training_stage], [dataset], queue=False)
-    training_stage.change(list_dataset, [dataset_dir, training_stage], [dataset], queue=False).then(
-        list_adapters,
-        [engine.manager.get_elem_by_id("top.model_name"), engine.manager.get_elem_by_id("top.finetuning_type")],
-        [reward_model],
-        queue=False,
-    ).then(autoset_packing, [training_stage], [packing], queue=False)
+    training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False)
+    dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False)
+    reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False)
+    output_dir.change(
+        list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], concurrency_limit=None
+    ).then(check_output_dir, inputs=[lang, model_name, finetuning_type, output_dir], concurrency_limit=None)
 
     return elem_dict
diff --git a/src/llamafactory/webui/engine.py b/src/llamafactory/webui/engine.py
index fb568737..00877115 100644
--- a/src/llamafactory/webui/engine.py
+++ b/src/llamafactory/webui/engine.py
@@ -1,11 +1,11 @@
 from typing import TYPE_CHECKING, Any, Dict
 
 from .chatter import WebChatModel
-from .common import get_model_path, list_dataset, load_config
+from .common import load_config
 from .locales import LOCALES
 from .manager import Manager
 from .runner import Runner
-from .utils import get_time, save_ds_config
+from .utils import create_ds_config, get_time
 
 
 if TYPE_CHECKING:
@@ -20,7 +20,7 @@ class Engine:
         self.runner = Runner(self.manager, demo_mode)
         self.chatter = WebChatModel(self.manager, demo_mode, lazy_init=(not pure_chat))
         if not demo_mode:
-            save_ds_config()
+            create_ds_config()
 
     def _update_component(self, input_dict: Dict[str, Dict[str, Any]]) -> Dict["Component", "Component"]:
         r"""
@@ -40,16 +40,15 @@ class Engine:
         init_dict = {"top.lang": {"value": lang}, "infer.chat_box": {"visible": self.chatter.loaded}}
 
         if not self.pure_chat:
-            init_dict["train.dataset"] = {"choices": list_dataset().choices}
-            init_dict["eval.dataset"] = {"choices": list_dataset().choices}
-            init_dict["train.output_dir"] = {"value": "train_{}".format(get_time())}
-            init_dict["train.config_path"] = {"value": "{}.yaml".format(get_time())}
-            init_dict["eval.output_dir"] = {"value": "eval_{}".format(get_time())}
+            current_time = get_time()
+            init_dict["train.initial_dir"] = {"value": "train_{}".format(current_time)}
+            init_dict["train.output_dir"] = {"value": "train_{}".format(current_time)}
+            init_dict["train.config_path"] = {"value": "{}.yaml".format(current_time)}
+            init_dict["eval.output_dir"] = {"value": "eval_{}".format(current_time)}
             init_dict["infer.image_box"] = {"visible": False}
 
             if user_config.get("last_model", None):
                 init_dict["top.model_name"] = {"value": user_config["last_model"]}
-                init_dict["top.model_path"] = {"value": get_model_path(user_config["last_model"])}
 
         yield self._update_component(init_dict)
 
diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py
index 4657f9a3..5b11c853 100644
--- a/src/llamafactory/webui/locales.py
+++ b/src/llamafactory/webui/locales.py
@@ -46,26 +46,15 @@ LOCALES = {
             "label": "微调方法",
         },
     },
-    "adapter_path": {
+    "checkpoint_path": {
         "en": {
-            "label": "Adapter path",
+            "label": "Checkpoint path",
         },
         "ru": {
-            "label": "Путь к адаптеру",
+            "label": "Путь контрольной точки",
         },
         "zh": {
-            "label": "适配器路径",
-        },
-    },
-    "refresh_btn": {
-        "en": {
-            "value": "Refresh adapters",
-        },
-        "ru": {
-            "value": "Обновить адаптеры",
-        },
-        "zh": {
-            "value": "刷新适配器",
+            "label": "检查点路径",
         },
     },
     "advanced_tab": {
@@ -1531,6 +1520,11 @@ ALERTS = {
         "ru": "Среда CUDA не обнаружена.",
         "zh": "未检测到 CUDA 环境。",
     },
+    "warn_output_dir_exists": {
+        "en": "Output dir already exists, will resume training from here.",
+        "ru": "Выходной каталог уже существует, обучение будет продолжено отсюда.",
+        "zh": "输出目录已存在，将从该断点恢复训练。",
+    },
     "info_aborting": {
         "en": "Aborted, wait for terminating...",
         "ru": "Прервано, ожидание завершения...",
diff --git a/src/llamafactory/webui/manager.py b/src/llamafactory/webui/manager.py
index f65fa804..326fdb8d 100644
--- a/src/llamafactory/webui/manager.py
+++ b/src/llamafactory/webui/manager.py
@@ -55,7 +55,7 @@ class Manager:
             self._id_to_elem["top.model_name"],
             self._id_to_elem["top.model_path"],
             self._id_to_elem["top.finetuning_type"],
-            self._id_to_elem["top.adapter_path"],
+            self._id_to_elem["top.checkpoint_path"],
             self._id_to_elem["top.quantization_bit"],
             self._id_to_elem["top.template"],
             self._id_to_elem["top.rope_scaling"],
diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index c2e46e97..7a305d62 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -7,12 +7,12 @@ from typing import TYPE_CHECKING, Any, Dict, Generator, Optional
 import psutil
 from transformers.trainer import TRAINING_ARGS_NAME
 
-from ..extras.constants import TRAINING_STAGES
+from ..extras.constants import PEFT_METHODS, TRAINING_STAGES
 from ..extras.misc import is_gpu_or_npu_available, torch_gc
 from ..extras.packages import is_gradio_available
-from .common import DEFAULT_CACHE_DIR, get_module, get_save_dir, load_args, load_config, save_args
+from .common import DEFAULT_CACHE_DIR, get_module, get_save_dir, load_config
 from .locales import ALERTS
-from .utils import gen_cmd, get_eval_results, get_trainer_info, save_cmd
+from .utils import gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd
 
 
 if is_gradio_available():
@@ -85,26 +85,16 @@ class Runner:
 
     def _parse_train_args(self, data: Dict["Component", Any]) -> Dict[str, Any]:
         get = lambda elem_id: data[self.manager.get_elem_by_id(elem_id)]
+        model_name, finetuning_type = get("top.model_name"), get("top.finetuning_type")
         user_config = load_config()
 
-        if get("top.adapter_path"):
-            adapter_name_or_path = ",".join(
-                [
-                    get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter)
-                    for adapter in get("top.adapter_path")
-                ]
-            )
-        else:
-            adapter_name_or_path = None
-
         args = dict(
             stage=TRAINING_STAGES[get("train.training_stage")],
             do_train=True,
             model_name_or_path=get("top.model_path"),
-            adapter_name_or_path=adapter_name_or_path,
             cache_dir=user_config.get("cache_dir", None),
             preprocessing_num_workers=16,
-            finetuning_type=get("top.finetuning_type"),
+            finetuning_type=finetuning_type,
             quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None,
             template=get("top.template"),
             rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None,
@@ -134,13 +124,23 @@ class Runner:
             report_to="all" if get("train.report_to") else "none",
             use_galore=get("train.use_galore"),
             use_badam=get("train.use_badam"),
-            output_dir=get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("train.output_dir")),
+            output_dir=get_save_dir(model_name, finetuning_type, get("train.output_dir")),
             fp16=(get("train.compute_type") == "fp16"),
             bf16=(get("train.compute_type") == "bf16"),
             pure_bf16=(get("train.compute_type") == "pure_bf16"),
             plot_loss=True,
+            ddp_timeout=180000000,
         )
 
+        # checkpoints
+        if get("top.checkpoint_path"):
+            if finetuning_type in PEFT_METHODS:  # list
+                args["adapter_name_or_path"] = ",".join(
+                    [get_save_dir(model_name, finetuning_type, adapter) for adapter in get("top.checkpoint_path")]
+                )
+            else:  # str
+                args["model_name_or_path"] = get_save_dir(model_name, finetuning_type, get("top.checkpoint_path"))
+
         # freeze config
         if args["finetuning_type"] == "freeze":
             args["freeze_trainable_layers"] = get("train.freeze_trainable_layers")
@@ -156,7 +156,7 @@ class Runner:
             args["create_new_adapter"] = get("train.create_new_adapter")
             args["use_rslora"] = get("train.use_rslora")
             args["use_dora"] = get("train.use_dora")
-            args["lora_target"] = get("train.lora_target") or get_module(get("top.model_name"))
+            args["lora_target"] = get("train.lora_target") or get_module(model_name)
             args["additional_target"] = get("train.additional_target") or None
 
             if args["use_llama_pro"]:
@@ -164,13 +164,14 @@ class Runner:
 
         # rlhf config
         if args["stage"] == "ppo":
-            args["reward_model"] = ",".join(
-                [
-                    get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter)
-                    for adapter in get("train.reward_model")
-                ]
-            )
-            args["reward_model_type"] = "lora" if args["finetuning_type"] == "lora" else "full"
+            if finetuning_type in PEFT_METHODS:
+                args["reward_model"] = ",".join(
+                    [get_save_dir(model_name, finetuning_type, adapter) for adapter in get("train.reward_model")]
+                )
+            else:
+                args["reward_model"] = get_save_dir(model_name, finetuning_type, get("train.reward_model"))
+
+            args["reward_model_type"] = "lora" if finetuning_type == "lora" else "full"
             args["ppo_score_norm"] = get("train.ppo_score_norm")
             args["ppo_whiten_rewards"] = get("train.ppo_whiten_rewards")
             args["top_k"] = 0
@@ -211,25 +212,15 @@ class Runner:
 
     def _parse_eval_args(self, data: Dict["Component", Any]) -> Dict[str, Any]:
         get = lambda elem_id: data[self.manager.get_elem_by_id(elem_id)]
+        model_name, finetuning_type = get("top.model_name"), get("top.finetuning_type")
         user_config = load_config()
 
-        if get("top.adapter_path"):
-            adapter_name_or_path = ",".join(
-                [
-                    get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter)
-                    for adapter in get("top.adapter_path")
-                ]
-            )
-        else:
-            adapter_name_or_path = None
-
         args = dict(
             stage="sft",
             model_name_or_path=get("top.model_path"),
-            adapter_name_or_path=adapter_name_or_path,
             cache_dir=user_config.get("cache_dir", None),
             preprocessing_num_workers=16,
-            finetuning_type=get("top.finetuning_type"),
+            finetuning_type=finetuning_type,
             quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None,
             template=get("top.template"),
             rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None,
@@ -245,7 +236,7 @@ class Runner:
             max_new_tokens=get("eval.max_new_tokens"),
             top_p=get("eval.top_p"),
             temperature=get("eval.temperature"),
-            output_dir=get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("eval.output_dir")),
+            output_dir=get_save_dir(model_name, finetuning_type, get("eval.output_dir")),
         )
 
         if get("eval.predict"):
@@ -253,6 +244,14 @@ class Runner:
         else:
             args["do_eval"] = True
 
+        if get("top.checkpoint_path"):
+            if finetuning_type in PEFT_METHODS:  # list
+                args["adapter_name_or_path"] = ",".join(
+                    [get_save_dir(model_name, finetuning_type, adapter) for adapter in get("top.checkpoint_path")]
+                )
+            else:  # str
+                args["model_name_or_path"] = get_save_dir(model_name, finetuning_type, get("top.checkpoint_path"))
+
         return args
 
     def _preview(self, data: Dict["Component", Any], do_train: bool) -> Generator[Dict["Component", str], None, None]:
@@ -296,9 +295,7 @@ class Runner:
         self.running = True
 
         get = lambda elem_id: self.running_data[self.manager.get_elem_by_id(elem_id)]
-        lang = get("top.lang")
-        model_name = get("top.model_name")
-        finetuning_type = get("top.finetuning_type")
+        lang, model_name, finetuning_type = get("top.lang"), get("top.model_name"), get("top.finetuning_type")
         output_dir = get("{}.output_dir".format("train" if self.do_train else "eval"))
         output_path = get_save_dir(model_name, finetuning_type, output_dir)
 
@@ -356,7 +353,7 @@ class Runner:
         config_dict: Dict[str, Any] = {}
         lang = data[self.manager.get_elem_by_id("top.lang")]
         config_path = data[self.manager.get_elem_by_id("train.config_path")]
-        skip_ids = ["top.lang", "top.model_path", "train.output_dir", "train.config_path"]
+        skip_ids = ["top.lang", "top.model_path", "train.output_dir", "train.config_path", "train.device_count"]
         for elem, value in data.items():
             elem_id = self.manager.get_id_by_elem(elem)
             if elem_id not in skip_ids:
diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py
index 654d1f8d..09cefa0e 100644
--- a/src/llamafactory/webui/utils.py
+++ b/src/llamafactory/webui/utils.py
@@ -3,12 +3,13 @@ import os
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Tuple
 
-from yaml import safe_dump
+from transformers.trainer_utils import get_last_checkpoint
+from yaml import safe_dump, safe_load
 
-from ..extras.constants import RUNNING_LOG, TRAINER_CONFIG, TRAINER_LOG
+from ..extras.constants import PEFT_METHODS, RUNNING_LOG, TRAINER_CONFIG, TRAINER_LOG, TRAINING_STAGES
 from ..extras.packages import is_gradio_available, is_matplotlib_available
 from ..extras.ploting import gen_loss_plot
-from .common import DEFAULT_CACHE_DIR
+from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_arg_save_path, get_save_dir
 from .locales import ALERTS
 
 
@@ -17,13 +18,26 @@ if is_gradio_available():
 
 
 def can_quantize(finetuning_type: str) -> "gr.Dropdown":
-    if finetuning_type != "lora":
+    r"""
+    Judges if the quantization is available in this finetuning type.
+    """
+    if finetuning_type not in PEFT_METHODS:
         return gr.Dropdown(value="none", interactive=False)
     else:
         return gr.Dropdown(interactive=True)
 
 
+def change_stage(training_stage: str = list(TRAINING_STAGES.keys())[0]) -> Tuple[List[str], bool]:
+    r"""
+    Modifys states after changing the training stage.
+    """
+    return [], TRAINING_STAGES[training_stage] == "pt"
+
+
 def check_json_schema(text: str, lang: str) -> None:
+    r"""
+    Checks if the json schema is valid.
+    """
     try:
         tools = json.loads(text)
         if tools:
@@ -38,11 +52,17 @@ def check_json_schema(text: str, lang: str) -> None:
 
 
 def clean_cmd(args: Dict[str, Any]) -> Dict[str, Any]:
+    r"""
+    Removes args with NoneType or False or empty string value.
+    """
     no_skip_keys = ["packing"]
     return {k: v for k, v in args.items() if (k in no_skip_keys) or (v is not None and v is not False and v != "")}
 
 
 def gen_cmd(args: Dict[str, Any]) -> str:
+    r"""
+    Generates arguments for previewing.
+    """
     cmd_lines = ["llamafactory-cli train "]
     for k, v in clean_cmd(args).items():
         cmd_lines.append("    --{} {} ".format(k, str(v)))
@@ -52,17 +72,39 @@ def gen_cmd(args: Dict[str, Any]) -> str:
     return cmd_text
 
 
+def save_cmd(args: Dict[str, Any]) -> str:
+    r"""
+    Saves arguments to launch training.
+    """
+    output_dir = args["output_dir"]
+    os.makedirs(output_dir, exist_ok=True)
+
+    with open(os.path.join(output_dir, TRAINER_CONFIG), "w", encoding="utf-8") as f:
+        safe_dump(clean_cmd(args), f)
+
+    return os.path.join(output_dir, TRAINER_CONFIG)
+
+
 def get_eval_results(path: os.PathLike) -> str:
+    r"""
+    Gets scores after evaluation.
+    """
     with open(path, "r", encoding="utf-8") as f:
         result = json.dumps(json.load(f), indent=4)
     return "```json\n{}\n```\n".format(result)
 
 
 def get_time() -> str:
+    r"""
+    Gets current date and time.
+    """
     return datetime.now().strftime(r"%Y-%m-%d-%H-%M-%S")
 
 
 def get_trainer_info(output_path: os.PathLike, do_train: bool) -> Tuple[str, "gr.Slider", Optional["gr.Plot"]]:
+    r"""
+    Gets training infomation for monitor.
+    """
     running_log = ""
     running_progress = gr.Slider(visible=False)
     running_loss = None
@@ -96,17 +138,56 @@ def get_trainer_info(output_path: os.PathLike, do_train: bool) -> Tuple[str, "gr
     return running_log, running_progress, running_loss
 
 
-def save_cmd(args: Dict[str, Any]) -> str:
-    output_dir = args["output_dir"]
-    os.makedirs(output_dir, exist_ok=True)
-
-    with open(os.path.join(output_dir, TRAINER_CONFIG), "w", encoding="utf-8") as f:
-        safe_dump(clean_cmd(args), f)
-
-    return os.path.join(output_dir, TRAINER_CONFIG)
+def load_args(config_path: str) -> Optional[Dict[str, Any]]:
+    r"""
+    Loads saved arguments.
+    """
+    try:
+        with open(get_arg_save_path(config_path), "r", encoding="utf-8") as f:
+            return safe_load(f)
+    except Exception:
+        return None
 
 
-def save_ds_config() -> None:
+def save_args(config_path: str, config_dict: Dict[str, Any]) -> str:
+    r"""
+    Saves arguments.
+    """
+    os.makedirs(DEFAULT_CONFIG_DIR, exist_ok=True)
+    with open(get_arg_save_path(config_path), "w", encoding="utf-8") as f:
+        safe_dump(config_dict, f)
+
+    return str(get_arg_save_path(config_path))
+
+
+def list_output_dirs(model_name: str, finetuning_type: str, initial_dir: str) -> "gr.Dropdown":
+    r"""
+    Lists all the directories that can resume from.
+    """
+    output_dirs = [initial_dir]
+    if model_name:
+        save_dir = get_save_dir(model_name, finetuning_type)
+        if save_dir and os.path.isdir(save_dir):
+            for folder in os.listdir(save_dir):
+                output_dir = os.path.join(save_dir, folder)
+                if os.path.isdir(output_dir) and get_last_checkpoint(output_dir) is not None:
+                    output_dirs.append(folder)
+
+    return gr.Dropdown(choices=output_dirs)
+
+
+def check_output_dir(lang: str, model_name: str, finetuning_type: str, output_dir: str) -> None:
+    r"""
+    Check if output dir exists.
+    """
+    if os.path.isdir(get_save_dir(model_name, finetuning_type, output_dir)):
+        gr.Warning(ALERTS["warn_output_dir_exists"][lang])
+
+
+def create_ds_config() -> None:
+    r"""
+    Creates deepspeed config.
+    """
     os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True)
     ds_config = {
         "train_batch_size": "auto",

From 04dce0079e927f54091b21a75fa21c7a78194579 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 30 May 2024 00:04:26 +0800
Subject: [PATCH 032/162] Update README.md

Former-commit-id: 65fb69e388c0a04c15ecd11441e567966f51fae5
---
 data/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/data/README.md b/data/README.md
index dd7ca201..5ceae666 100644
--- a/data/README.md
+++ b/data/README.md
@@ -12,6 +12,7 @@ Currently we support datasets in **alpaca** and **sharegpt** format.
   "ranking": "whether the dataset is a preference dataset or not. (default: False)",
   "subset": "the name of the subset. (optional, default: None)",
   "folder": "the name of the folder of the dataset repository on the Hugging Face hub. (optional, default: None)",
+  "num_samples": "the number of samples in the dataset used for training. (optional, default: None)",
   "columns (optional)": {
     "prompt": "the column name in the dataset containing the prompts. (default: instruction)",
     "query": "the column name in the dataset containing the queries. (default: input)",
@@ -32,9 +33,8 @@ Currently we support datasets in **alpaca** and **sharegpt** format.
     "assistant_tag": "the value of the role_tag represents the assistant. (default: gpt)",
     "observation_tag": "the value of the role_tag represents the tool results. (default: observation)",
     "function_tag": "the value of the role_tag represents the function call. (default: function_call)",
-    "system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)",
-  },
-  "sample_num": "the number of samples from this dataset can be greater than the total amount of the dataset. (default: None)"
+    "system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)"
+  }
 }
 ```
 

From 9256750add96407d2bcb5cbe954c1cb651054e81 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 30 May 2024 00:04:47 +0800
Subject: [PATCH 033/162] Update README_zh.md

Former-commit-id: 3007d260ed45169583a74497a53b661337dd5f71
---
 data/README_zh.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/data/README_zh.md b/data/README_zh.md
index 1427e48d..1795f352 100644
--- a/data/README_zh.md
+++ b/data/README_zh.md
@@ -12,6 +12,7 @@
   "ranking": "是否为偏好数据集（可选，默认：False）",
   "subset": "数据集子集的名称（可选，默认：None）",
   "folder": "Hugging Face 仓库的文件夹名称（可选，默认：None）",
+  "num_samples": "该数据集中用于训练的样本数量。（可选，默认：None）",
   "columns（可选）": {
     "prompt": "数据集代表提示词的表头名称（默认：instruction）",
     "query": "数据集代表请求的表头名称（默认：input）",
@@ -32,9 +33,8 @@
     "assistant_tag": "消息中代表助手的 role_tag（默认：gpt）",
     "observation_tag": "消息中代表工具返回结果的 role_tag（默认：observation）",
     "function_tag": "消息中代表工具调用的 role_tag（默认：function_call）",
-    "system_tag": "消息中代表系统提示的 role_tag（默认：system，会覆盖 system 列）"
-  },
-  "sample_num": "从该数据集采样的数量，可大于该数据集总量（默认：None）"
+    "system_tag": "消息中代表系统提示的 role_tag（默认：system，会覆盖 system column）"
+  }
 }
 ```
 

From b221962ccf38f38aa6056ee273f02d4dff8e379f Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 30 May 2024 00:05:20 +0800
Subject: [PATCH 034/162] Update parser.py

Former-commit-id: 310cc11e8c83f16fc5bccc349c38fea347ea9a97
---
 src/llamafactory/data/parser.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/llamafactory/data/parser.py b/src/llamafactory/data/parser.py
index 99b71cf0..ec97bfc1 100644
--- a/src/llamafactory/data/parser.py
+++ b/src/llamafactory/data/parser.py
@@ -20,11 +20,12 @@ class DatasetAttr:
     """ basic configs """
     load_from: Literal["hf_hub", "ms_hub", "script", "file"]
     dataset_name: str
+    formatting: Literal["alpaca", "sharegpt"] = "alpaca"
+    ranking: bool = False
     """ extra configs """
     subset: Optional[str] = None
     folder: Optional[str] = None
-    ranking: bool = False
-    formatting: Literal["alpaca", "sharegpt"] = "alpaca"
+    num_samples: Optional[int] = None
     """ common columns """
     system: Optional[str] = None
     tools: Optional[str] = None
@@ -48,7 +49,6 @@ class DatasetAttr:
     observation_tag: Optional[str] = "observation"
     function_tag: Optional[str] = "function_call"
     system_tag: Optional[str] = "system"
-    sample_num: Optional[int] = None
 
     def __repr__(self) -> str:
         return self.dataset_name
@@ -103,12 +103,12 @@ def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]:
         else:
             dataset_attr = DatasetAttr("file", dataset_name=dataset_info[name]["file_name"])
 
+        dataset_attr.set_attr("formatting", dataset_info[name], default="alpaca")
+        dataset_attr.set_attr("ranking", dataset_info[name], default=False)
         dataset_attr.set_attr("subset", dataset_info[name])
         dataset_attr.set_attr("folder", dataset_info[name])
-        dataset_attr.set_attr("ranking", dataset_info[name], default=False)
-        dataset_attr.set_attr("formatting", dataset_info[name], default="alpaca")
-        dataset_attr.set_attr("sample_num", dataset_info[name])
-        
+        dataset_attr.set_attr("num_samples", dataset_info[name])
+
         if "columns" in dataset_info[name]:
             column_names = ["system", "tools", "images", "chosen", "rejected", "kto_tag"]
             if dataset_attr.formatting == "alpaca":

From d5ee4854409fb8dec19b5d71b2921eae029c6c07 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 30 May 2024 00:12:12 +0800
Subject: [PATCH 035/162] Update loader.py

Former-commit-id: 19d8fd62c18ee3ba0e431fc241f7d315cb716fef
---
 src/llamafactory/data/loader.py | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py
index 5ce4392e..322eefa0 100644
--- a/src/llamafactory/data/loader.py
+++ b/src/llamafactory/data/loader.py
@@ -1,10 +1,9 @@
 import inspect
 import os
-import numpy as np
-from numpy.random import RandomState
 import sys
 from typing import TYPE_CHECKING, Literal, Optional, Union
 
+import numpy as np
 from datasets import load_dataset, load_from_disk
 
 from ..extras.constants import FILEEXT2TYPE
@@ -108,20 +107,14 @@ def load_single_dataset(
     if data_args.streaming and (dataset_attr.load_from == "file"):  # faster than specifying streaming=True
         dataset = dataset.to_iterable_dataset()  # TODO: add num shards parameter
 
-    if data_args.max_samples is not None:  # truncate dataset
-        num_samples = min(data_args.max_samples, len(dataset))
-        dataset = dataset.select(range(num_samples))
+    if dataset_attr.num_samples is not None and not data_args.streaming:
+        indexes = np.random.permutation(len(dataset))[: dataset_attr.num_samples]
+        dataset = dataset.select(indexes)
+        logger.info("Sampled {} examples from dataset {}.".format(dataset_attr.num_samples, dataset_attr))
 
-    if dataset_attr.sample_num:
-        dataset_sample_num = dataset_attr.sample_num
-        logger.info(f"从 {dataset_attr.dataset_name} 采样 {dataset_sample_num} 条训练样本")
-        random_state = RandomState(42)
-        idx = random_state.permutation(len(dataset))[:dataset_sample_num]
-        dataset_sample_num -= len(idx)
-        if dataset_sample_num > 0:
-            idx2 = random_state.choice(len(dataset), dataset_sample_num)
-            idx = np.concatenate([idx, idx2], axis=0)
-        dataset = dataset.select(idx)
+    if data_args.max_samples is not None:  # truncate dataset
+        indexes = np.random.permutation(len(dataset))[: data_args.max_samples]
+        dataset = dataset.select(indexes)
 
     return align_dataset(dataset, dataset_attr, data_args)
 

From 88f4c583d3a64318111607e1a8f1f6782bccfe7b Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 30 May 2024 00:17:21 +0800
Subject: [PATCH 036/162] Update loader.py

Former-commit-id: aa7f335e3ad5a78e4ed5f99c120be28e9733ea2e
---
 src/llamafactory/data/loader.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py
index 322eefa0..fa5b12c5 100644
--- a/src/llamafactory/data/loader.py
+++ b/src/llamafactory/data/loader.py
@@ -108,7 +108,13 @@ def load_single_dataset(
         dataset = dataset.to_iterable_dataset()  # TODO: add num shards parameter
 
     if dataset_attr.num_samples is not None and not data_args.streaming:
-        indexes = np.random.permutation(len(dataset))[: dataset_attr.num_samples]
+        target_num = dataset_attr.num_samples
+        indexes = np.random.permutation(len(dataset))[:target_num]
+        target_num -= len(indexes)
+        if target_num > 0:
+            expand_indexes = np.random.choice(len(dataset), target_num)
+            indexes = np.concatenate((indexes, expand_indexes), axis=0)
+
         dataset = dataset.select(indexes)
         logger.info("Sampled {} examples from dataset {}.".format(dataset_attr.num_samples, dataset_attr))
 

From ae773f935587b9ddc1f9dd0cd9a7a074b8843b98 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 30 May 2024 00:20:20 +0800
Subject: [PATCH 037/162] Update loader.py

Former-commit-id: 0aa59322906d91c5e385c9c02ebb5dd64ba060f3
---
 src/llamafactory/data/loader.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py
index fa5b12c5..d4a19e27 100644
--- a/src/llamafactory/data/loader.py
+++ b/src/llamafactory/data/loader.py
@@ -115,6 +115,7 @@ def load_single_dataset(
             expand_indexes = np.random.choice(len(dataset), target_num)
             indexes = np.concatenate((indexes, expand_indexes), axis=0)
 
+        assert len(indexes) == dataset_attr.num_samples, "Sample num mismatched."
         dataset = dataset.select(indexes)
         logger.info("Sampled {} examples from dataset {}.".format(dataset_attr.num_samples, dataset_attr))
 

From 78aef175fe3ea9b1e120a6df8ec63471af97cbe9 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 30 May 2024 00:52:26 +0800
Subject: [PATCH 038/162] fix #3837

Former-commit-id: 72965aa3f13a9c085c29781b6790d80d00a545d8
---
 evaluation/mmlu/mmlu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation/mmlu/mmlu.py b/evaluation/mmlu/mmlu.py
index f3218c38..a4530250 100644
--- a/evaluation/mmlu/mmlu.py
+++ b/evaluation/mmlu/mmlu.py
@@ -154,7 +154,7 @@ class MMLU(datasets.GeneratorBasedBuilder):
         ]
 
     def _generate_examples(self, filepath):
-        df = pd.read_csv(filepath)
+        df = pd.read_csv(filepath, header=None)
         df.columns = ["question", "A", "B", "C", "D", "answer"]
 
         for i, instance in enumerate(df.to_dict(orient="records")):

From 26d48595389197efbdc18d80d0e509189e3a10e5 Mon Sep 17 00:00:00 2001
From: faddddeout <39449491+injet-zhou@users.noreply.github.com>
Date: Thu, 30 May 2024 08:39:21 +0000
Subject: [PATCH 039/162] fix cann't interrupt training when using multi GPUs
 in webui

Former-commit-id: a7fb02d52bc202c958490aa7081252be5d9eff50
---
 src/llamafactory/webui/runner.py | 34 +++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index 7a305d62..2580ded2 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -1,5 +1,6 @@
 import os
 import signal
+import random
 from copy import deepcopy
 from subprocess import Popen, TimeoutExpired
 from typing import TYPE_CHECKING, Any, Dict, Generator, Optional
@@ -8,12 +9,12 @@ import psutil
 from transformers.trainer import TRAINING_ARGS_NAME
 
 from ..extras.constants import PEFT_METHODS, TRAINING_STAGES
-from ..extras.misc import is_gpu_or_npu_available, torch_gc
+from ..extras.misc import is_gpu_or_npu_available, torch_gc, get_device_count
 from ..extras.packages import is_gradio_available
 from .common import DEFAULT_CACHE_DIR, get_module, get_save_dir, load_config
 from .locales import ALERTS
 from .utils import gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd
-
+from .. import launcher
 
 if is_gradio_available():
     import gradio as gr
@@ -41,6 +42,10 @@ class Runner:
         self.aborted = True
         if self.trainer is not None:
             for children in psutil.Process(self.trainer.pid).children():  # abort the child process
+                grand_children = children.children()
+                if len(grand_children) > 0:
+                    for grand_child in grand_children:
+                        os.kill(grand_child.pid, signal.SIGABRT)
                 os.kill(children.pid, signal.SIGABRT)
 
     def _initialize(self, data: Dict["Component", Any], do_train: bool, from_preview: bool) -> str:
@@ -275,7 +280,30 @@ class Runner:
             args = self._parse_train_args(data) if do_train else self._parse_eval_args(data)
             env = deepcopy(os.environ)
             env["LLAMABOARD_ENABLED"] = "1"
-            self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True)
+            if get_device_count() > 1:
+                nnodes = os.environ.get("NNODES", "1")
+                node_rank = os.environ.get("RANK", "0")
+                nproc_per_node = os.environ.get("NPROC_PER_NODE", str(get_device_count()))
+                master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
+                master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
+                
+                self.trainer = Popen([
+                    "torchrun",
+                    "--nnodes",
+                    nnodes,
+                    "--node_rank",
+                    node_rank,
+                    "--nproc_per_node",
+                    nproc_per_node,
+                    "--master_addr",
+                    master_addr,
+                    "--master_port",
+                    master_port,
+                    launcher.__file__,
+                    save_cmd(args)
+                ], env=env, shell=True)
+            else:
+                self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True)
             yield from self.monitor()
 
     def preview_train(self, data):

From 047e388a5e7879fcb680a1d2f52aaa4aaa3328fb Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 30 May 2024 16:40:17 +0800
Subject: [PATCH 040/162] update readme

Former-commit-id: 3b92d8c2ddb288b849f38e573ca168cab23315d2
---
 README.md    | 1 +
 README_zh.md | 1 +
 2 files changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 4ca6d1ec..95c0c345 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/9840a653-7e9c-41c8-ae89
 Choose your path:
 
 - **Colab**: https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing
+- **PAI-DSW**: https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory
 - **Local machine**: Please refer to [usage](#getting-started)
 
 ## Table of Contents
diff --git a/README_zh.md b/README_zh.md
index 70938e38..b10db1e5 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -26,6 +26,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 选择你的打开方式：
 
 - **Colab**：https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing
+- **PAI-DSW**: https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory
 - **本地机器**：请见[如何使用](#如何使用)
 
 ## 目录

From 9e15eca1c1368b139c8a7d22eb4e3a348d54819b Mon Sep 17 00:00:00 2001
From: Uminosachi <49424133+Uminosachi@users.noreply.github.com>
Date: Fri, 31 May 2024 13:45:39 +0900
Subject: [PATCH 041/162] Set scheduler_specific_kwargs to get_scheduler

Former-commit-id: f04e70dfab44480ef4c015c06470443237f69ba9
---
 src/llamafactory/train/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/llamafactory/train/utils.py b/src/llamafactory/train/utils.py
index 23834f2d..230fdc1e 100644
--- a/src/llamafactory/train/utils.py
+++ b/src/llamafactory/train/utils.py
@@ -379,6 +379,7 @@ def create_custom_scheduler(
                 optimizer=optimizer_dict[param],
                 num_warmup_steps=training_args.get_warmup_steps(num_training_steps),
                 num_training_steps=num_training_steps,
+                scheduler_specific_kwargs=training_args.lr_scheduler_kwargs,
             )
 
         def scheduler_hook(param: "torch.nn.Parameter"):

From e6fc5ab31ef04ff66561dfdd61a212961c658f8d Mon Sep 17 00:00:00 2001
From: statelesshz <hzji210@gmail.com>
Date: Fri, 31 May 2024 13:18:18 +0800
Subject: [PATCH 042/162] Update bug-report.yml

Former-commit-id: a8561502360c1e247eeacb46b77ffbcf3387c482
---
 .github/ISSUE_TEMPLATE/bug-report.yml | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index ab2851c6..c2182542 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -13,6 +13,18 @@ body:
         - label: I have read the README and searched the existing issues.
           required: true
 
+  - type: textarea
+    id: system-info
+    validations:
+      required: true
+    attributes:
+      label: System Info
+      description: |
+        Please share your system info with us. You can run the command **transformers-cli env** and copy-paste its output below.
+        请提供您的系统信息。您可以在命令行运行 **transformers-cli env** 并将其输出复制到该文本框中。
+
+      placeholder: transformers version, platform, python version, ...
+
   - type: textarea
     id: reproduction
     validations:
@@ -38,18 +50,6 @@ body:
         Please provide a clear and concise description of what you would expect to happen.
         请提供您原本的目的，即这段代码的期望行为。
 
-  - type: textarea
-    id: system-info
-    validations:
-      required: false
-    attributes:
-      label: System Info
-      description: |
-        Please share your system info with us. You can run the command **transformers-cli env** and copy-paste its output below.
-        请提供您的系统信息。您可以在命令行运行 **transformers-cli env** 并将其输出复制到该文本框中。
-
-      placeholder: transformers version, platform, python version, ...
-
   - type: textarea
     id: others
     validations:

From 0fba220d5d12eb8501208cd316d11789490a69d5 Mon Sep 17 00:00:00 2001
From: Xu Song <xusong.vip@gmail.com>
Date: Fri, 31 May 2024 14:35:48 +0800
Subject: [PATCH 043/162] Update model_args.py

Former-commit-id: f1e018587e5722e41962abd60f74043a3e55f692
---
 src/llamafactory/hparams/model_args.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 650d1c22..995d5f12 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -107,7 +107,7 @@ class ModelArguments:
     )
     vllm_maxlen: int = field(
         default=2048,
-        metadata={"help": "Maximum input length of the vLLM engine."},
+        metadata={"help": "Maximum sequence length of the vLLM engine (including prompt and output)."},
     )
     vllm_gpu_util: float = field(
         default=0.9,

From 3f849f2314483aa091010cf9f4d2057014fac125 Mon Sep 17 00:00:00 2001
From: ylfeng <ylfeng@ir.hit.edu.cn>
Date: Fri, 31 May 2024 15:33:54 +0800
Subject: [PATCH 044/162] supervised packing with greedy knapsack algorithm

Former-commit-id: 24d12396c9aabd49da0b08719068f24679111cc6
---
 .../data/processors/supervised.py             | 102 ++++++++++++++++--
 1 file changed, 92 insertions(+), 10 deletions(-)

diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py
index b119aa22..65aa4b4e 100644
--- a/src/llamafactory/data/processors/supervised.py
+++ b/src/llamafactory/data/processors/supervised.py
@@ -1,3 +1,5 @@
+import itertools
+from collections import defaultdict
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 from ...extras.constants import IGNORE_INDEX
@@ -16,6 +18,52 @@ if TYPE_CHECKING:
 logger = get_logger(__name__)
 
 
+def binary_search_for_fit(numbers, capacity):
+    """
+    Perform binary search to find the largest number that fits into the knapsack with the given capacity.
+    """
+    left, right = 0, len(numbers) - 1
+    result = -1  # If no number fits, return -1
+
+    while left <= right:
+        mid = (left + right) // 2
+        if numbers[mid] <= capacity:
+            result = mid
+            left = mid + 1
+        else:
+            right = mid - 1
+
+    return result
+
+
+def efficient_greedy_knapsack(numbers, capacity):
+    """
+    An efficient greedy algorithm with binary search for the knapsack problem.
+    """
+    numbers.sort()  # Sort numbers in ascending order for binary search
+    knapsacks = []
+
+    while numbers:
+        current_knapsack = []
+        remaining_capacity = capacity
+
+        while True:
+            index = binary_search_for_fit(numbers, remaining_capacity)
+            if index == -1:
+                break  # No more numbers fit in this knapsack
+
+            # Add the found number to the knapsack and update the remaining capacity
+            current_knapsack.append(numbers[index])
+            remaining_capacity -= numbers[index]
+
+            # Remove the number from the list
+            numbers.pop(index)
+
+        knapsacks.append(current_knapsack)
+
+    return knapsacks
+
+
 def preprocess_supervised_dataset(
     examples: Dict[str, List[Any]],
     template: "Template",
@@ -115,16 +163,50 @@ def preprocess_packed_supervised_dataset(
         input_ids += [tokenizer.eos_token_id]
         labels += [tokenizer.eos_token_id]
 
-    total_length = len(input_ids)
-    block_size = data_args.cutoff_len
-    # we drop the small remainder, and if the total_length < block_size, we exclude this batch
-    total_length = (total_length // block_size) * block_size
-    # split by chunks of cutoff_len
-    for i in range(0, total_length, block_size):
-        if not all(label == IGNORE_INDEX for label in labels[i : i + block_size]):
-            model_inputs["input_ids"].append(input_ids[i : i + block_size])
-            model_inputs["attention_mask"].append([1] * block_size)
-            model_inputs["labels"].append(labels[i : i + block_size])
+    # prepare for packing
+    lengths = []
+    length2examples_idx = defaultdict(list)
+    for idx, example in enumerate(input_ids):
+        length = len(example)
+        if length > data_args.cutoff_len:
+            logger.warning("Dropped example with length {} > cutoff_len {}".format(length, data_args.cutoff_len))
+            continue
+        lengths.append(length)
+        length2examples_idx[length].append(idx)
+
+    knapsacks = efficient_greedy_knapsack(lengths, data_args.cutoff_len)
+
+    for knapsack in knapsacks:
+        packed_input_ids = []
+        packed_labels = []
+
+        total_length = 0
+        for length in knapsack:
+            total_length += length
+            idx = length2examples_idx[length].pop()
+            packed_input_ids.append(input_ids[idx])
+            packed_labels.append(labels[idx])
+
+        # padding to cutoff_len
+        if total_length < data_args.cutoff_len:
+            pad_length = data_args.cutoff_len - total_length
+            packed_input_ids.append([tokenizer.eos_token_id] * pad_length)
+            packed_labels.append([IGNORE_INDEX] * pad_length)
+        elif total_length == data_args.cutoff_len:
+            pad_length = 0
+        else:
+            logger.warning(
+                "Dropped packed example with total length {} > cutoff_len {}".format(
+                    total_length, data_args.cutoff_len
+                )
+            )
+            continue
+
+        # concat all
+        model_inputs["input_ids"].append(list(itertools.chain(*packed_input_ids)))
+
+        model_inputs["labels"].append(list(itertools.chain(*packed_labels)))
+        model_inputs["attention_mask"].append([1] * total_length + [0] * pad_length)
 
     return model_inputs
 

From cc62c225598e4b825e74d6b61a69a880c9e99596 Mon Sep 17 00:00:00 2001
From: ylfeng <ylfeng@ir.hit.edu.cn>
Date: Fri, 31 May 2024 21:40:41 +0800
Subject: [PATCH 045/162] fix eos

Former-commit-id: 6e236c952958cbfe50b5dcb7b8eff6aea8477922
---
 .../data/processors/supervised.py             | 24 ++++++++++---------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py
index 65aa4b4e..f94cebba 100644
--- a/src/llamafactory/data/processors/supervised.py
+++ b/src/llamafactory/data/processors/supervised.py
@@ -151,17 +151,11 @@ def preprocess_packed_supervised_dataset(
         ):
             if data_args.train_on_prompt:
                 source_mask = source_ids
-            elif len(input_ids) != 0 and template.efficient_eos:
-                source_mask = [tokenizer.eos_token_id] + [IGNORE_INDEX] * (len(source_ids) - 1)
             else:
                 source_mask = [IGNORE_INDEX] * len(source_ids)
 
-            input_ids += source_ids + target_ids
-            labels += source_mask + target_ids
-
-    if template.efficient_eos:
-        input_ids += [tokenizer.eos_token_id]
-        labels += [tokenizer.eos_token_id]
+            input_ids.append(source_ids + target_ids)
+            labels.append(source_mask + target_ids)
 
     # prepare for packing
     lengths = []
@@ -174,7 +168,8 @@ def preprocess_packed_supervised_dataset(
         lengths.append(length)
         length2examples_idx[length].append(idx)
 
-    knapsacks = efficient_greedy_knapsack(lengths, data_args.cutoff_len)
+    # cutoff_len - 1 for efficient_eos
+    knapsacks = efficient_greedy_knapsack(lengths, data_args.cutoff_len - int(template.efficient_eos))
 
     for knapsack in knapsacks:
         packed_input_ids = []
@@ -190,8 +185,15 @@ def preprocess_packed_supervised_dataset(
         # padding to cutoff_len
         if total_length < data_args.cutoff_len:
             pad_length = data_args.cutoff_len - total_length
-            packed_input_ids.append([tokenizer.eos_token_id] * pad_length)
-            packed_labels.append([IGNORE_INDEX] * pad_length)
+            if template.efficient_eos:
+                # 确保有 eos
+                packed_input_ids.append([tokenizer.eos_token_id] * pad_length)
+                packed_labels.append([tokenizer.eos_token_id] + [IGNORE_INDEX] * (pad_length - 1))
+            else:
+                # 无 eos 的情况下，使用 0 填充？
+                packed_input_ids.append([0] * pad_length)
+                packed_labels.append([tokenizer.eos_token_id] + [IGNORE_INDEX] * (pad_length - 1))
+
         elif total_length == data_args.cutoff_len:
             pad_length = 0
         else:

From 7226a1b43160914887e82a15b3074ada14943594 Mon Sep 17 00:00:00 2001
From: ylfeng <ylfeng@ir.hit.edu.cn>
Date: Fri, 31 May 2024 21:43:08 +0800
Subject: [PATCH 046/162] remove empty line

Former-commit-id: 3164710971a6d6545629f5bf133f98de5ff0991a
---
 src/llamafactory/data/processors/supervised.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py
index f94cebba..eaceb5b8 100644
--- a/src/llamafactory/data/processors/supervised.py
+++ b/src/llamafactory/data/processors/supervised.py
@@ -206,7 +206,6 @@ def preprocess_packed_supervised_dataset(
 
         # concat all
         model_inputs["input_ids"].append(list(itertools.chain(*packed_input_ids)))
-
         model_inputs["labels"].append(list(itertools.chain(*packed_labels)))
         model_inputs["attention_mask"].append([1] * total_length + [0] * pad_length)
 

From 2e843a4cf68a764ac6b156033f49c28b5ccd32ba Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 3 Jun 2024 18:28:27 +0800
Subject: [PATCH 047/162] fix data loader hint

Former-commit-id: 25b56126a11591b0155e2f72b673dd8f45a6c8c9
---
 src/llamafactory/data/loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py
index d4a19e27..f5929f15 100644
--- a/src/llamafactory/data/loader.py
+++ b/src/llamafactory/data/loader.py
@@ -62,9 +62,9 @@ def load_single_dataset(
             raise ValueError("File {} not found.".format(local_path))
 
         if data_path is None:
-            raise ValueError("File extension must be txt, csv, json or jsonl.")
+            raise ValueError("Allowed file types: {}.".format(",".join(FILEEXT2TYPE.keys())))
     else:
-        raise NotImplementedError
+        raise NotImplementedError("Unknown load type: {}.".format(dataset_attr.load_from))
 
     if dataset_attr.load_from == "ms_hub":
         try:

From ee80c3acf19743a2cde86a0e51c404a76e38ac41 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 3 Jun 2024 18:29:38 +0800
Subject: [PATCH 048/162] bump versions

transformers 4.37.2->4.41.2
datasets 2.14.3->2.16.0
accelerate 0.27.2->0.30.1
peft 0.10.0->0.11.1
trl 0.8.1->0.8.6


Former-commit-id: 5f1e041f7295bf42a41dd4d9e7f0c42fcc37fed2
---
 requirements.txt                | 10 +++++-----
 src/llamafactory/extras/misc.py | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index f4a942e6..e17954e3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-transformers>=4.37.2
-datasets>=2.14.3
-accelerate>=0.27.2
-peft>=0.10.0
-trl>=0.8.1
+transformers>=4.41.2
+datasets>=2.16.0
+accelerate>=0.30.1
+peft>=0.11.1
+trl>=0.8.6
 gradio>=4.0.0
 scipy
 einops
diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py
index 2c7f170c..638c24cf 100644
--- a/src/llamafactory/extras/misc.py
+++ b/src/llamafactory/extras/misc.py
@@ -61,11 +61,11 @@ def check_dependencies() -> None:
     if os.environ.get("DISABLE_VERSION_CHECK", "0").lower() in ["true", "1"]:
         logger.warning("Version checking has been disabled, may lead to unexpected behaviors.")
     else:
-        require_version("transformers>=4.37.2", "To fix: pip install transformers>=4.37.2")
-        require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3")
-        require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2")
-        require_version("peft>=0.10.0", "To fix: pip install peft>=0.10.0")
-        require_version("trl>=0.8.2", "To fix: pip install trl>=0.8.2")
+        require_version("transformers>=4.41.2", "To fix: pip install transformers>=4.41.2")
+        require_version("datasets>=2.16.0", "To fix: pip install datasets>=2.16.0")
+        require_version("accelerate>=0.30.1", "To fix: pip install accelerate>=0.30.1")
+        require_version("peft>=0.11.1", "To fix: pip install peft>=0.11.1")
+        require_version("trl>=0.8.6", "To fix: pip install trl>=0.8.6")
 
 
 def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:

From d8d02e65a1c937ffc2adb48ef81427396221a93a Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 3 Jun 2024 18:38:36 +0800
Subject: [PATCH 049/162] fix #4022

Former-commit-id: 9541f2f1f1b7d7877eb734f051048e52003a3430
---
 setup.py                             | 2 +-
 src/llamafactory/chat/vllm_engine.py | 4 +---
 src/llamafactory/hparams/parser.py   | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index 45e73343..23f532e7 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@ extra_require = {
     "metrics": ["nltk", "jieba", "rouge-chinese"],
     "deepspeed": ["deepspeed>=0.10.0,<=0.14.0"],
     "bitsandbytes": ["bitsandbytes>=0.39.0"],
-    "vllm": ["vllm>=0.4.1"],
+    "vllm": ["vllm>=0.4.3"],
     "galore": ["galore-torch"],
     "badam": ["badam"],
     "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"],
diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py
index 3310a864..8a067754 100644
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -158,12 +158,10 @@ class VllmEngine(BaseEngine):
         )
 
         result_generator = self.model.generate(
-            prompt=None,
+            inputs={"prompt_token_ids": prompt_ids, "multi_modal_data": multi_modal_data},
             sampling_params=sampling_params,
             request_id=request_id,
-            prompt_token_ids=prompt_ids,
             lora_request=self.lora_request,
-            multi_modal_data=multi_modal_data,
         )
         return result_generator
 
diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py
index b3c673be..ff1fbf5d 100644
--- a/src/llamafactory/hparams/parser.py
+++ b/src/llamafactory/hparams/parser.py
@@ -94,7 +94,7 @@ def _check_extra_dependencies(
         require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6")
 
     if model_args.infer_backend == "vllm":
-        require_version("vllm>=0.4.1", "To fix: pip install vllm>=0.4.1")
+        require_version("vllm>=0.4.3", "To fix: pip install vllm>=0.4.3")
 
     if finetuning_args.use_galore:
         require_version("galore_torch", "To fix: pip install galore_torch")

From 1ef396362bcb09eb8badbbd760f30ae6e104ab6a Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 3 Jun 2024 19:12:29 +0800
Subject: [PATCH 050/162] fix #4005 #4013

Former-commit-id: 8608fa268cde5cddf8d0c6c2eb2cb5fa246c1831
---
 examples/README.md                                     |  8 ++++----
 examples/README_zh.md                                  | 10 +++++-----
 examples/extras/badam/llama3_lora_sft.yaml             |  4 ++--
 examples/extras/fsdp_qlora/llama3_lora_sft.yaml        |  4 ++--
 examples/extras/galore/llama3_full_sft.yaml            |  4 ++--
 examples/extras/llama_pro/llama3_freeze_sft.yaml       |  4 ++--
 examples/extras/loraplus/llama3_lora_sft.yaml          |  4 ++--
 examples/extras/mod/llama3_full_sft.yaml               |  4 ++--
 examples/full_multi_gpu/llama3_full_sft.yaml           |  4 ++--
 examples/lora_multi_gpu/llama3_lora_sft.yaml           |  4 ++--
 examples/lora_multi_gpu/llama3_lora_sft_ds.yaml        |  4 ++--
 examples/lora_multi_npu/llama3_lora_sft_ds.yaml        |  4 ++--
 examples/lora_single_gpu/llama3_lora_dpo.yaml          |  4 ++--
 examples/lora_single_gpu/llama3_lora_kto.yaml          |  4 ++--
 examples/lora_single_gpu/llama3_lora_ppo.yaml          |  4 ++--
 examples/lora_single_gpu/llama3_lora_pretrain.yaml     |  4 ++--
 examples/lora_single_gpu/llama3_lora_reward.yaml       |  4 ++--
 examples/lora_single_gpu/llama3_lora_sft.yaml          |  4 ++--
 examples/lora_single_gpu/llava1_5_lora_sft.yaml        |  4 ++--
 examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml    |  4 ++--
 examples/qlora_single_gpu/llama3_lora_sft_awq.yaml     |  4 ++--
 .../qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml |  4 ++--
 examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml    |  4 ++--
 src/llamafactory/hparams/model_args.py                 |  2 +-
 24 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/examples/README.md b/examples/README.md
index 727b27c8..f985d552 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -107,13 +107,13 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_l
 
 ### LoRA Fine-Tuning on Multiple GPUs
 
-#### Supervised Fine-Tuning with Accelerate on Single Node
+#### Supervised Fine-Tuning on Single Node
 
 ```bash
 CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
 ```
 
-#### Supervised Fine-Tuning with Accelerate on Multiple Nodes
+#### Supervised Fine-Tuning on Multiple Nodes
 
 ```bash
 CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
@@ -136,13 +136,13 @@ ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu
 
 ### Full-Parameter Fine-Tuning on Multiple GPUs
 
-#### Supervised Fine-Tuning with Accelerate on Single Node
+#### Supervised Fine-Tuning on Single Node
 
 ```bash
 CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
 ```
 
-#### Supervised Fine-Tuning with Accelerate on Multiple Nodes
+#### Supervised Fine-Tuning on Multiple Nodes
 
 ```bash
 CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
diff --git a/examples/README_zh.md b/examples/README_zh.md
index 6974faa9..cf5bbf49 100644
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@@ -107,13 +107,13 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_l
 
 ### 多 GPU LoRA 微调
 
-#### 使用 Accelerate 进行单节点训练
+#### 在单机上进行指令监督微调
 
 ```bash
 CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
 ```
 
-#### 使用 Accelerate 进行多节点训练
+#### 在多机上进行指令监督微调
 
 ```bash
 CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
@@ -128,7 +128,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llam
 
 ### 多 NPU LoRA 微调
 
-#### 使用 DeepSpeed ZeRO-0 训练
+#### 使用 DeepSpeed ZeRO-0 进行指令监督微调
 
 ```bash
 ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml
@@ -136,13 +136,13 @@ ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu
 
 ### 多 GPU 全参数微调
 
-#### 使用 DeepSpeed 进行单节点训练
+#### 在单机上进行指令监督微调
 
 ```bash
 CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
 ```
 
-#### 使用 DeepSpeed 进行多节点训练
+#### 在多机上进行指令监督微调
 
 ```bash
 CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
diff --git a/examples/extras/badam/llama3_lora_sft.yaml b/examples/extras/badam/llama3_lora_sft.yaml
index 4a482749..242e63ab 100644
--- a/examples/extras/badam/llama3_lora_sft.yaml
+++ b/examples/extras/badam/llama3_lora_sft.yaml
@@ -28,10 +28,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 pure_bf16: true
 
 ### eval
diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
index e9c04fa9..920d8fdb 100644
--- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
+++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
@@ -29,10 +29,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/extras/galore/llama3_full_sft.yaml b/examples/extras/galore/llama3_full_sft.yaml
index 87381fcc..3db31fed 100644
--- a/examples/extras/galore/llama3_full_sft.yaml
+++ b/examples/extras/galore/llama3_full_sft.yaml
@@ -29,10 +29,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 1
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 pure_bf16: true
 
 ### eval
diff --git a/examples/extras/llama_pro/llama3_freeze_sft.yaml b/examples/extras/llama_pro/llama3_freeze_sft.yaml
index 8ace8db8..214f411a 100644
--- a/examples/extras/llama_pro/llama3_freeze_sft.yaml
+++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml
@@ -27,10 +27,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml
index 26c2b1d2..9936bcd3 100644
--- a/examples/extras/loraplus/llama3_lora_sft.yaml
+++ b/examples/extras/loraplus/llama3_lora_sft.yaml
@@ -26,10 +26,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/extras/mod/llama3_full_sft.yaml b/examples/extras/mod/llama3_full_sft.yaml
index 6b724ed0..edfec44e 100644
--- a/examples/extras/mod/llama3_full_sft.yaml
+++ b/examples/extras/mod/llama3_full_sft.yaml
@@ -26,10 +26,10 @@ overwrite_output_dir: true
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
 optim: paged_adamw_8bit
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 pure_bf16: true
 
 ### eval
diff --git a/examples/full_multi_gpu/llama3_full_sft.yaml b/examples/full_multi_gpu/llama3_full_sft.yaml
index a96f1b8e..b8873e3a 100644
--- a/examples/full_multi_gpu/llama3_full_sft.yaml
+++ b/examples/full_multi_gpu/llama3_full_sft.yaml
@@ -28,10 +28,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 2
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/lora_multi_gpu/llama3_lora_sft.yaml b/examples/lora_multi_gpu/llama3_lora_sft.yaml
index 6389f21b..5e5dd9e6 100644
--- a/examples/lora_multi_gpu/llama3_lora_sft.yaml
+++ b/examples/lora_multi_gpu/llama3_lora_sft.yaml
@@ -28,10 +28,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 2
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
index 6011896a..e8dee216 100644
--- a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
+++ b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
@@ -29,10 +29,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 2
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml b/examples/lora_multi_npu/llama3_lora_sft_ds.yaml
index 65ab6347..825b8450 100644
--- a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml
+++ b/examples/lora_multi_npu/llama3_lora_sft_ds.yaml
@@ -29,10 +29,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 2
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/lora_single_gpu/llama3_lora_dpo.yaml
index f68244b7..62752e57 100644
--- a/examples/lora_single_gpu/llama3_lora_dpo.yaml
+++ b/examples/lora_single_gpu/llama3_lora_dpo.yaml
@@ -27,10 +27,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.000005
+learning_rate: 5.0e-6
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/lora_single_gpu/llama3_lora_kto.yaml b/examples/lora_single_gpu/llama3_lora_kto.yaml
index 4405aaec..6f689818 100644
--- a/examples/lora_single_gpu/llama3_lora_kto.yaml
+++ b/examples/lora_single_gpu/llama3_lora_kto.yaml
@@ -25,10 +25,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.000005
+learning_rate: 5.0e-6
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/lora_single_gpu/llama3_lora_ppo.yaml b/examples/lora_single_gpu/llama3_lora_ppo.yaml
index 88ce24f3..19e7ccb3 100644
--- a/examples/lora_single_gpu/llama3_lora_ppo.yaml
+++ b/examples/lora_single_gpu/llama3_lora_ppo.yaml
@@ -26,10 +26,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.00001
+learning_rate: 1.0e-5
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### generate
diff --git a/examples/lora_single_gpu/llama3_lora_pretrain.yaml b/examples/lora_single_gpu/llama3_lora_pretrain.yaml
index acb18ebf..54c5d89a 100644
--- a/examples/lora_single_gpu/llama3_lora_pretrain.yaml
+++ b/examples/lora_single_gpu/llama3_lora_pretrain.yaml
@@ -24,10 +24,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/lora_single_gpu/llama3_lora_reward.yaml b/examples/lora_single_gpu/llama3_lora_reward.yaml
index 6bf2ca02..c82f9414 100644
--- a/examples/lora_single_gpu/llama3_lora_reward.yaml
+++ b/examples/lora_single_gpu/llama3_lora_reward.yaml
@@ -25,10 +25,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.00001
+learning_rate: 1.0e-5
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/lora_single_gpu/llama3_lora_sft.yaml b/examples/lora_single_gpu/llama3_lora_sft.yaml
index 5492bc34..429cb6af 100644
--- a/examples/lora_single_gpu/llama3_lora_sft.yaml
+++ b/examples/lora_single_gpu/llama3_lora_sft.yaml
@@ -25,10 +25,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/lora_single_gpu/llava1_5_lora_sft.yaml b/examples/lora_single_gpu/llava1_5_lora_sft.yaml
index 8e4226da..acab4884 100644
--- a/examples/lora_single_gpu/llava1_5_lora_sft.yaml
+++ b/examples/lora_single_gpu/llava1_5_lora_sft.yaml
@@ -26,10 +26,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
index d2658051..53cc12e2 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
@@ -25,10 +25,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
index ba6d8ea5..1a92f822 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
@@ -25,10 +25,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
index a3db35ff..c7f72c66 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
@@ -26,10 +26,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
index cc9a454e..45caf17c 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
@@ -25,10 +25,10 @@ overwrite_output_dir: true
 ### train
 per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
-learning_rate: 0.0001
+learning_rate: 1.0e-4
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
-warmup_steps: 0.1
+warmup_ratio: 0.1
 fp16: true
 
 ### eval
diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 995d5f12..7003cbee 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -107,7 +107,7 @@ class ModelArguments:
     )
     vllm_maxlen: int = field(
         default=2048,
-        metadata={"help": "Maximum sequence length of the vLLM engine (including prompt and output)."},
+        metadata={"help": "Maximum sequence (prompt + response) length of the vLLM engine."},
     )
     vllm_gpu_util: float = field(
         default=0.9,

From ba16749942881fd0d202de0c5f8f57381a6958bb Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 3 Jun 2024 19:24:10 +0800
Subject: [PATCH 051/162] update placeholder in issue template

Former-commit-id: 5503a90d7e38273b67129e0b9eb62bd1fd23154f
---
 .github/ISSUE_TEMPLATE/bug-report.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index c2182542..82620fdb 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -38,7 +38,7 @@ body:
         请合理使用 Markdown 标签来格式化您的文本。
 
       placeholder: |
-        python src/train_bash.py ...
+        llamafactory-cli train ...
 
   - type: textarea
     id: expected-behavior

From 59aca304c0acb3375d72ed951df623a7b3541758 Mon Sep 17 00:00:00 2001
From: "enji.zhou" <enji.zhou@amh-group.com>
Date: Mon, 3 Jun 2024 21:32:38 +0800
Subject: [PATCH 052/162] fix KTO Trainer Sampler

Former-commit-id: 39eb1bfa272011554322e9bb2534f83b68282a70
---
 src/llamafactory/train/kto/trainer.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py
index 82ae722d..3f1220a9 100644
--- a/src/llamafactory/train/kto/trainer.py
+++ b/src/llamafactory/train/kto/trainer.py
@@ -4,6 +4,7 @@ from types import MethodType
 from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple, Union
 
 import torch
+from torch.utils.data import RandomSampler
 from transformers import Trainer
 from trl import KTOTrainer
 from trl.trainer import disable_dropout_in_model
@@ -173,6 +174,21 @@ class CustomKTOTrainer(KTOTrainer):
 
         return reference_chosen_logps, reference_rejected_logps, reference_kl_logps
 
+    def has_length(self,dataset):
+        """
+        Checks if the dataset implements __len__() and it doesn't raise an error
+        """
+        try:
+            return len(dataset) is not None
+        except TypeError:
+            # TypeError: len() of unsized object
+            return False
+
+    def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
+        if self.train_dataset is None or not self.has_length(self.train_dataset):
+            return None
+        return RandomSampler(self.train_dataset)
+
     def get_batch_loss_metrics(
         self,
         model: "PreTrainedModel",

From ca60eca259f031db8318e12d8576668e6e5f2023 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Mon, 3 Jun 2024 22:08:38 +0800
Subject: [PATCH 053/162] Update trainer.py

Former-commit-id: 8565d4b43db905374c328ae57c71fc226980d14f
---
 src/llamafactory/train/kto/trainer.py | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py
index 3f1220a9..7c0343f5 100644
--- a/src/llamafactory/train/kto/trainer.py
+++ b/src/llamafactory/train/kto/trainer.py
@@ -4,7 +4,6 @@ from types import MethodType
 from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple, Union
 
 import torch
-from torch.utils.data import RandomSampler
 from transformers import Trainer
 from trl import KTOTrainer
 from trl.trainer import disable_dropout_in_model
@@ -14,6 +13,7 @@ from ..utils import create_custom_optimzer, create_custom_scheduler
 
 
 if TYPE_CHECKING:
+    import torch.utils.data
     from transformers import PreTrainedModel, ProcessorMixin
 
     from ...hparams import FinetuningArguments
@@ -85,6 +85,12 @@ class CustomKTOTrainer(KTOTrainer):
         create_custom_scheduler(self.args, num_training_steps, optimizer)
         return super().create_scheduler(num_training_steps, optimizer)
 
+    def _get_train_sampler(self) -> Optional["torch.utils.data.Sampler"]:
+        r"""
+        Replaces the sequential sampler of KTO Trainer created by trl with the random sampler.
+        """
+        return Trainer._get_train_sampler(self)
+
     def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None:
         super()._save(output_dir, state_dict)
         if self.processor is not None:
@@ -174,21 +180,6 @@ class CustomKTOTrainer(KTOTrainer):
 
         return reference_chosen_logps, reference_rejected_logps, reference_kl_logps
 
-    def has_length(self,dataset):
-        """
-        Checks if the dataset implements __len__() and it doesn't raise an error
-        """
-        try:
-            return len(dataset) is not None
-        except TypeError:
-            # TypeError: len() of unsized object
-            return False
-
-    def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
-        if self.train_dataset is None or not self.has_length(self.train_dataset):
-            return None
-        return RandomSampler(self.train_dataset)
-
     def get_batch_loss_metrics(
         self,
         model: "PreTrainedModel",

From 2dc5743fba86e248c7ffd17f255903765c5590bc Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 3 Jun 2024 22:53:54 +0800
Subject: [PATCH 054/162] remove gc warnings in DPO&KTO

Former-commit-id: b649bdcbafb464a638387429b770fe258b41f8af
---
 src/llamafactory/train/dpo/trainer.py |  5 +++--
 src/llamafactory/train/kto/trainer.py |  5 +++--
 src/llamafactory/train/utils.py       | 16 ++++++++++++++--
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py
index 542335a3..ec1de810 100644
--- a/src/llamafactory/train/dpo/trainer.py
+++ b/src/llamafactory/train/dpo/trainer.py
@@ -10,7 +10,7 @@ from trl import DPOTrainer
 from trl.trainer import disable_dropout_in_model
 
 from ...extras.constants import IGNORE_INDEX
-from ..utils import create_custom_optimzer, create_custom_scheduler
+from ..utils import create_custom_optimzer, create_custom_scheduler, get_ref_context
 
 
 if TYPE_CHECKING:
@@ -69,6 +69,7 @@ class CustomDPOTrainer(DPOTrainer):
                     self.ref_model = self._prepare_deepspeed(self.ref_model)
             else:
                 self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
+                self.ref_model.eval()
 
         if finetuning_args.use_badam:
             from badam import clip_grad_norm_for_sparse_tensor
@@ -189,7 +190,7 @@ class CustomDPOTrainer(DPOTrainer):
 
         if self.ref_model is None:
             ref_model = model
-            ref_context = self.accelerator.unwrap_model(model).disable_adapter()
+            ref_context = get_ref_context(self.accelerator, model)
         else:
             ref_model = self.ref_model
             ref_context = nullcontext()
diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py
index 7c0343f5..f29945f5 100644
--- a/src/llamafactory/train/kto/trainer.py
+++ b/src/llamafactory/train/kto/trainer.py
@@ -9,7 +9,7 @@ from trl import KTOTrainer
 from trl.trainer import disable_dropout_in_model
 
 from ...extras.constants import IGNORE_INDEX
-from ..utils import create_custom_optimzer, create_custom_scheduler
+from ..utils import create_custom_optimzer, create_custom_scheduler, get_ref_context
 
 
 if TYPE_CHECKING:
@@ -68,6 +68,7 @@ class CustomKTOTrainer(KTOTrainer):
                     self.ref_model = self._prepare_deepspeed(self.ref_model)
             else:
                 self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
+                self.ref_model.eval()
 
         if finetuning_args.use_badam:
             from badam import clip_grad_norm_for_sparse_tensor
@@ -164,7 +165,7 @@ class CustomKTOTrainer(KTOTrainer):
         """
         if self.ref_model is None:
             ref_model = model
-            ref_context = self.accelerator.unwrap_model(model).disable_adapter()
+            ref_context = get_ref_context(self.accelerator, model)
         else:
             ref_model = self.ref_model
             ref_context = nullcontext()
diff --git a/src/llamafactory/train/utils.py b/src/llamafactory/train/utils.py
index 230fdc1e..2b33af1c 100644
--- a/src/llamafactory/train/utils.py
+++ b/src/llamafactory/train/utils.py
@@ -1,3 +1,4 @@
+from contextlib import contextmanager
 from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union
 
 import torch
@@ -17,8 +18,8 @@ if is_galore_available():
 
 
 if TYPE_CHECKING:
-    from transformers import Seq2SeqTrainingArguments
-    from transformers.modeling_utils import PreTrainedModel
+    from accelerate import Accelerator
+    from transformers import PreTrainedModel, Seq2SeqTrainingArguments
     from trl import AutoModelForCausalLMWithValueHead
 
     from ..hparams import DataArguments
@@ -156,6 +157,17 @@ def create_reward_model(
         return reward_model
 
 
+@contextmanager
+def get_ref_context(accelerator: "Accelerator", model: "PreTrainedModel"):
+    r"""
+    Gets adapter context for the reference model.
+    """
+    with accelerator.unwrap_model(model).disable_adapter():
+        model.eval()
+        yield
+        model.train()
+
+
 def _get_decay_parameter_names(model: "PreTrainedModel") -> List[str]:
     r"""
     Returns a list of names of parameters with weight decay. (weights in non-layernorm layers)

From f1fe357ee20e29d82bda65a55fac55ad03511d16 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 3 Jun 2024 23:30:37 +0800
Subject: [PATCH 055/162] fix #4043

Former-commit-id: 67af68f4fc5232760c57b3a0ae780628da09db6a
---
 src/llamafactory/webui/components/eval.py  |  2 +-
 src/llamafactory/webui/components/train.py | 21 ++++++++++++---------
 src/llamafactory/webui/locales.py          |  5 +++++
 src/llamafactory/webui/runner.py           |  9 +++++++--
 src/llamafactory/webui/utils.py            |  2 +-
 5 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/src/llamafactory/webui/components/eval.py b/src/llamafactory/webui/components/eval.py
index 99215fc2..0a7a0f44 100644
--- a/src/llamafactory/webui/components/eval.py
+++ b/src/llamafactory/webui/components/eval.py
@@ -57,7 +57,6 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]:
     with gr.Row():
         output_box = gr.Markdown()
 
-    output_elems = [output_box, progress_bar]
     elem_dict.update(
         dict(
             cmd_preview_btn=cmd_preview_btn,
@@ -68,6 +67,7 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]:
             output_box=output_box,
         )
     )
+    output_elems = [output_box, progress_bar]
 
     cmd_preview_btn.click(engine.runner.preview_eval, input_elems, output_elems, concurrency_limit=None)
     start_btn.click(engine.runner.run_eval, input_elems, output_elems)
diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py
index 6f742bb1..93e5dfc1 100644
--- a/src/llamafactory/webui/components/train.py
+++ b/src/llamafactory/webui/components/train.py
@@ -298,22 +298,25 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
     )
     output_elems = [output_box, progress_bar, loss_viewer]
 
-    lang = engine.manager.get_elem_by_id("top.lang")
-    model_name = engine.manager.get_elem_by_id("top.model_name")
-    finetuning_type = engine.manager.get_elem_by_id("top.finetuning_type")
-
     cmd_preview_btn.click(engine.runner.preview_train, input_elems, output_elems, concurrency_limit=None)
-    arg_save_btn.click(engine.runner.save_args, input_elems, output_elems, concurrency_limit=None)
-    arg_load_btn.click(
-        engine.runner.load_args, [lang, config_path], list(input_elems) + [output_box], concurrency_limit=None
-    )
     start_btn.click(engine.runner.run_train, input_elems, output_elems)
     stop_btn.click(engine.runner.set_abort)
     resume_btn.change(engine.runner.monitor, outputs=output_elems, concurrency_limit=None)
 
-    training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False)
+    lang = engine.manager.get_elem_by_id("top.lang")
+    model_name: "gr.Dropdown" = engine.manager.get_elem_by_id("top.model_name")
+    finetuning_type: "gr.Dropdown" = engine.manager.get_elem_by_id("top.finetuning_type")
+
+    arg_save_btn.click(engine.runner.save_args, input_elems, output_elems, concurrency_limit=None)
+    arg_load_btn.click(
+        engine.runner.load_args, [lang, config_path], list(input_elems) + [output_box], concurrency_limit=None
+    )
+
     dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False)
+    training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False)
     reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False)
+    model_name.change(list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], queue=False)
+    finetuning_type.change(list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], queue=False)
     output_dir.change(
         list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], concurrency_limit=None
     ).then(check_output_dir, inputs=[lang, model_name, finetuning_type, output_dir], concurrency_limit=None)
diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py
index 5b11c853..e30feab2 100644
--- a/src/llamafactory/webui/locales.py
+++ b/src/llamafactory/webui/locales.py
@@ -1475,6 +1475,11 @@ ALERTS = {
         "ru": "Пожалуйста, выберите адаптер.",
         "zh": "请选择适配器。",
     },
+    "err_no_output_dir": {
+        "en": "Please provide output dir.",
+        "ru": "Пожалуйста, укажите выходную директорию.",
+        "zh": "请填写输出目录。",
+    },
     "err_no_reward_model": {
         "en": "Please select a reward model.",
         "ru": "Пожалуйста, выберите модель вознаграждения.",
diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index 7a305d62..fe213513 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -64,10 +64,15 @@ class Runner:
             return ALERTS["err_demo"][lang]
 
         if do_train:
+            if not get("train.output_dir"):
+                return ALERTS["err_no_output_dir"][lang]
+
             stage = TRAINING_STAGES[get("train.training_stage")]
-            reward_model = get("train.reward_model")
-            if stage == "ppo" and not reward_model:
+            if stage == "ppo" and not get("train.reward_model"):
                 return ALERTS["err_no_reward_model"][lang]
+        else:
+            if not get("eval.output_dir"):
+                return ALERTS["err_no_output_dir"][lang]
 
         if not from_preview and not is_gpu_or_npu_available():
             gr.Warning(ALERTS["warn_no_cuda"][lang])
diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py
index 09cefa0e..0446cb47 100644
--- a/src/llamafactory/webui/utils.py
+++ b/src/llamafactory/webui/utils.py
@@ -180,7 +180,7 @@ def check_output_dir(lang: str, model_name: str, finetuning_type: str, output_di
     r"""
     Check if output dir exists.
     """
-    if os.path.isdir(get_save_dir(model_name, finetuning_type, output_dir)):
+    if model_name and output_dir and os.path.isdir(get_save_dir(model_name, finetuning_type, output_dir)):
         gr.Warning(ALERTS["warn_output_dir_exists"][lang])
 
 
From 2e87a54bf1045244b5ab2c6f432557c9562ed6ca Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 4 Jun 2024 00:10:24 +0800
Subject: [PATCH 056/162] fix abort in webui DDP mode

Former-commit-id: b90ac72d753b13a3eed9cb8b898fac2f2fe5153f
---
 src/llamafactory/cli.py          | 31 ++++++++++--------------
 src/llamafactory/webui/runner.py | 41 ++++----------------------------
 src/llamafactory/webui/utils.py  | 14 +++++++++++
 3 files changed, 32 insertions(+), 54 deletions(-)

diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py
index a74445a6..c14ae6ec 100644
--- a/src/llamafactory/cli.py
+++ b/src/llamafactory/cli.py
@@ -71,28 +71,23 @@ def main():
         export_model()
     elif command == Command.TRAIN:
         if get_device_count() > 1:
-            nnodes = os.environ.get("NNODES", "1")
-            node_rank = os.environ.get("RANK", "0")
-            nproc_per_node = os.environ.get("NPROC_PER_NODE", str(get_device_count()))
             master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
             master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
             logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))
             subprocess.run(
-                [
-                    "torchrun",
-                    "--nnodes",
-                    nnodes,
-                    "--node_rank",
-                    node_rank,
-                    "--nproc_per_node",
-                    nproc_per_node,
-                    "--master_addr",
-                    master_addr,
-                    "--master_port",
-                    master_port,
-                    launcher.__file__,
-                    *sys.argv[1:],
-                ]
+                (
+                    "torchrun --nnodes {nnodes} --node_rank {node_rank} --nproc_per_node {nproc_per_node} "
+                    "--master_addr {master_addr} --master_port {master_port} {file_name} {args}"
+                ).format(
+                    nnodes=os.environ.get("NNODES", "1"),
+                    node_rank=os.environ.get("RANK", "0"),
+                    nproc_per_node=os.environ.get("NPROC_PER_NODE", str(get_device_count())),
+                    master_addr=master_addr,
+                    master_port=master_port,
+                    file_name=launcher.__file__,
+                    args=" ".join(sys.argv[1:]),
+                ),
+                shell=True,
             )
         else:
             run_exp()
diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index 36f593ae..6e1facef 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -1,20 +1,17 @@
 import os
-import signal
-import random
 from copy import deepcopy
 from subprocess import Popen, TimeoutExpired
 from typing import TYPE_CHECKING, Any, Dict, Generator, Optional
 
-import psutil
 from transformers.trainer import TRAINING_ARGS_NAME
 
 from ..extras.constants import PEFT_METHODS, TRAINING_STAGES
-from ..extras.misc import is_gpu_or_npu_available, torch_gc, get_device_count
+from ..extras.misc import is_gpu_or_npu_available, torch_gc
 from ..extras.packages import is_gradio_available
 from .common import DEFAULT_CACHE_DIR, get_module, get_save_dir, load_config
 from .locales import ALERTS
-from .utils import gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd
-from .. import launcher
+from .utils import abort_leaf_process, gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd
+
 
 if is_gradio_available():
     import gradio as gr
@@ -41,12 +38,7 @@ class Runner:
     def set_abort(self) -> None:
         self.aborted = True
         if self.trainer is not None:
-            for children in psutil.Process(self.trainer.pid).children():  # abort the child process
-                grand_children = children.children()
-                if len(grand_children) > 0:
-                    for grand_child in grand_children:
-                        os.kill(grand_child.pid, signal.SIGABRT)
-                os.kill(children.pid, signal.SIGABRT)
+            abort_leaf_process(self.trainer.pid)
 
     def _initialize(self, data: Dict["Component", Any], do_train: bool, from_preview: bool) -> str:
         get = lambda elem_id: data[self.manager.get_elem_by_id(elem_id)]
@@ -285,30 +277,7 @@ class Runner:
             args = self._parse_train_args(data) if do_train else self._parse_eval_args(data)
             env = deepcopy(os.environ)
             env["LLAMABOARD_ENABLED"] = "1"
-            if get_device_count() > 1:
-                nnodes = os.environ.get("NNODES", "1")
-                node_rank = os.environ.get("RANK", "0")
-                nproc_per_node = os.environ.get("NPROC_PER_NODE", str(get_device_count()))
-                master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
-                master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
-                
-                self.trainer = Popen([
-                    "torchrun",
-                    "--nnodes",
-                    nnodes,
-                    "--node_rank",
-                    node_rank,
-                    "--nproc_per_node",
-                    nproc_per_node,
-                    "--master_addr",
-                    master_addr,
-                    "--master_port",
-                    master_port,
-                    launcher.__file__,
-                    save_cmd(args)
-                ], env=env, shell=True)
-            else:
-                self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True)
+            self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True)
             yield from self.monitor()
 
     def preview_train(self, data):
diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py
index 0446cb47..fc258806 100644
--- a/src/llamafactory/webui/utils.py
+++ b/src/llamafactory/webui/utils.py
@@ -1,8 +1,10 @@
 import json
 import os
+import signal
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Tuple
 
+import psutil
 from transformers.trainer_utils import get_last_checkpoint
 from yaml import safe_dump, safe_load
 
@@ -17,6 +19,18 @@ if is_gradio_available():
     import gradio as gr
 
 
+def abort_leaf_process(pid: int) -> None:
+    r"""
+    Aborts the leaf processes.
+    """
+    children = psutil.Process(pid).children()
+    if children:
+        for child in children:
+            abort_leaf_process(child.pid)
+    else:
+        os.kill(pid, signal.SIGABRT)
+
+
 def can_quantize(finetuning_type: str) -> "gr.Dropdown":
     r"""
     Judges if the quantization is available in this finetuning type.

From 920b091581ddc7deaf7c8fb2b3d3b8deec70fd33 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 4 Jun 2024 00:17:36 +0800
Subject: [PATCH 057/162] fix #3992

Former-commit-id: a48321fbf5196b88a11106cf74a74fbcea2ea50b
---
 src/llamafactory/data/loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py
index f5929f15..4d0503c3 100644
--- a/src/llamafactory/data/loader.py
+++ b/src/llamafactory/data/loader.py
@@ -120,8 +120,8 @@ def load_single_dataset(
         logger.info("Sampled {} examples from dataset {}.".format(dataset_attr.num_samples, dataset_attr))
 
     if data_args.max_samples is not None:  # truncate dataset
-        indexes = np.random.permutation(len(dataset))[: data_args.max_samples]
-        dataset = dataset.select(indexes)
+        max_samples = min(data_args.max_samples, len(dataset))
+        dataset = dataset.select(range(max_samples))
 
     return align_dataset(dataset, dataset_attr, data_args)
 

From 0e81997792275c49c6e981dd750c41b56bfb8d9a Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 4 Jun 2024 00:21:50 +0800
Subject: [PATCH 058/162] fix #3873

Former-commit-id: 1ac325b4d682bb493573c18bb0b67ceae8d0d372
---
 src/llamafactory/chat/hf_engine.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py
index ad0e90fe..28e6a409 100644
--- a/src/llamafactory/chat/hf_engine.py
+++ b/src/llamafactory/chat/hf_engine.py
@@ -83,6 +83,7 @@ class HuggingfaceEngine(BaseEngine):
 
         prompt_length = len(prompt_ids)
         inputs = torch.tensor([prompt_ids], device=model.device)
+        attention_mask = torch.ones_like(inputs, dtype=torch.bool)
 
         do_sample: Optional[bool] = input_kwargs.pop("do_sample", None)
         temperature: Optional[float] = input_kwargs.pop("temperature", None)
@@ -136,6 +137,7 @@ class HuggingfaceEngine(BaseEngine):
 
         gen_kwargs = dict(
             inputs=inputs,
+            attention_mask=attention_mask,
             generation_config=GenerationConfig(**generating_args),
             logits_processor=get_logits_processor(),
         )

From 1cc9508fb32c8513c3de029237c7b3986da430ac Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 4 Jun 2024 00:31:10 +0800
Subject: [PATCH 059/162] tiny fix

Former-commit-id: f9d50501aac1f60a3b445ca3fee9aa60995461ee
---
 examples/extras/fsdp_qlora/single_node.sh | 4 ----
 scripts/llama_pro.py                      | 8 ++++----
 src/llamafactory/data/loader.py           | 2 +-
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/examples/extras/fsdp_qlora/single_node.sh b/examples/extras/fsdp_qlora/single_node.sh
index 54ec2bd2..fac8cdee 100644
--- a/examples/extras/fsdp_qlora/single_node.sh
+++ b/examples/extras/fsdp_qlora/single_node.sh
@@ -1,10 +1,6 @@
 #!/bin/bash
 # DO NOT use GPTQ/AWQ model in FSDP+QLoRA
 
-pip install "transformers>=4.39.1"
-pip install "accelerate>=0.28.0"
-pip install "bitsandbytes>=0.43.0"
-
 CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
     --config_file examples/accelerate/fsdp_config.yaml \
     src/train.py examples/extras/fsdp_qlora/llama3_lora_sft.yaml
diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py
index 997b3496..727998ae 100644
--- a/scripts/llama_pro.py
+++ b/scripts/llama_pro.py
@@ -104,10 +104,10 @@ def block_expansion(
         print("Model weights saved in {}".format(output_dir))
 
     print("Fine-tune this model with:")
-    print("  --model_name_or_path {} \\".format(output_dir))
-    print("  --finetuning_type freeze \\")
-    print("  --freeze_trainable_layers {} \\".format(num_expand))
-    print("  --use_llama_pro")
+    print("model_name_or_path: {}".format(output_dir))
+    print("finetuning_type: freeze")
+    print("freeze_trainable_layers: {}".format(num_expand))
+    print("use_llama_pro: true")
 
 
 if __name__ == "__main__":
diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py
index 4d0503c3..7d013d27 100644
--- a/src/llamafactory/data/loader.py
+++ b/src/llamafactory/data/loader.py
@@ -179,7 +179,7 @@ def get_dataset(
             if training_args.should_save:
                 dataset.save_to_disk(data_args.tokenized_path)
                 logger.info("Tokenized dataset saved at {}.".format(data_args.tokenized_path))
-                logger.info("Please restart the training with `--tokenized_path {}`.".format(data_args.tokenized_path))
+                logger.info("Please restart the training with `tokenized_path: {}`.".format(data_args.tokenized_path))
 
             sys.exit(0)
 

From 1ca6d03bc1afc949739104a8d2a0d6f6497f752a Mon Sep 17 00:00:00 2001
From: hzhaoy <hzywong@gmail.com>
Date: Tue, 4 Jun 2024 10:33:43 +0800
Subject: [PATCH 060/162] add: support selecting saved configuration files and
 loading training parameters

Former-commit-id: 5c9b17c1dc9093da0ea813642bce9b5c9ae96274
---
 src/llamafactory/webui/components/train.py |  5 +++--
 src/llamafactory/webui/utils.py            | 12 ++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py
index 6f742bb1..fabb91ea 100644
--- a/src/llamafactory/webui/components/train.py
+++ b/src/llamafactory/webui/components/train.py
@@ -6,7 +6,7 @@ from ...extras.constants import TRAINING_STAGES
 from ...extras.misc import get_device_count
 from ...extras.packages import is_gradio_available
 from ..common import DEFAULT_DATA_DIR, list_checkpoints, list_datasets
-from ..utils import change_stage, check_output_dir, list_output_dirs
+from ..utils import change_stage, check_output_dir, list_output_dirs, list_config_paths
 from .data import create_preview_box
 
 
@@ -259,7 +259,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
             with gr.Row():
                 initial_dir = gr.Textbox(visible=False, interactive=False)
                 output_dir = gr.Dropdown(allow_custom_value=True)
-                config_path = gr.Textbox()
+                config_path = gr.Dropdown(allow_custom_value=True)
 
             with gr.Row():
                 device_count = gr.Textbox(value=str(get_device_count() or 1), interactive=False)
@@ -317,5 +317,6 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
     output_dir.change(
         list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], concurrency_limit=None
     ).then(check_output_dir, inputs=[lang, model_name, finetuning_type, output_dir], concurrency_limit=None)
+    config_path.change(list_config_paths, outputs=[config_path], concurrency_limit=None)
 
     return elem_dict
diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py
index 09cefa0e..37df1b52 100644
--- a/src/llamafactory/webui/utils.py
+++ b/src/llamafactory/webui/utils.py
@@ -176,6 +176,18 @@ def list_output_dirs(model_name: str, finetuning_type: str, initial_dir: str) ->
     return gr.Dropdown(choices=output_dirs)
 
 
+def list_config_paths() -> "gr.Dropdown":
+    """
+    Lists all the saved configuration files that can be loaded.
+    """
+    if os.path.exists(DEFAULT_CONFIG_DIR) and os.path.isdir(DEFAULT_CONFIG_DIR):
+        config_files = [file_name for file_name in os.listdir(DEFAULT_CONFIG_DIR) if file_name.endswith(".yaml")]
+    else:
+        config_files = []
+
+    return gr.Dropdown(choices=config_files)
+
+
 def check_output_dir(lang: str, model_name: str, finetuning_type: str, output_dir: str) -> None:
     r"""
     Check if output dir exists.

From f4cf31a1a0e81f84c036cb71d656c56d1ffe84d7 Mon Sep 17 00:00:00 2001
From: faddddeout <39449491+injet-zhou@users.noreply.github.com>
Date: Tue, 4 Jun 2024 11:04:29 +0000
Subject: [PATCH 061/162] add throughput entry to log

Former-commit-id: 691f999f64c7bac78761e4354f89816d2f0d46fc
---
 src/llamafactory/extras/callbacks.py | 6 ++++--
 src/llamafactory/webui/runner.py     | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/llamafactory/extras/callbacks.py b/src/llamafactory/extras/callbacks.py
index 637b786d..441ebbfd 100644
--- a/src/llamafactory/extras/callbacks.py
+++ b/src/llamafactory/extras/callbacks.py
@@ -170,12 +170,14 @@ class LogCallback(TrainerCallback):
             percentage=round(self.cur_steps / self.max_steps * 100, 2) if self.max_steps != 0 else 100,
             elapsed_time=self.elapsed_time,
             remaining_time=self.remaining_time,
+            throughput="{:.2f}".format(state.num_input_tokens_seen / (time.time() - self.start_time)),
+            total_tokens=state.num_input_tokens_seen,
         )
         logs = {k: v for k, v in logs.items() if v is not None}
         if self.webui_mode and all(key in logs for key in ["loss", "learning_rate", "epoch"]):
             logger.info(
-                "{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}}}".format(
-                    logs["loss"], logs["learning_rate"], logs["epoch"]
+                "{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}, 'throughput': {}}}".format(
+                    logs["loss"], logs["learning_rate"], logs["epoch"], logs["throughput"]
                 )
             )
 
diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index 6e1facef..6378a506 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -132,6 +132,7 @@ class Runner:
             pure_bf16=(get("train.compute_type") == "pure_bf16"),
             plot_loss=True,
             ddp_timeout=180000000,
+            include_num_input_tokens_seen=True,
         )
 
         # checkpoints

From d11ede9dc2991ac5b259db00b74e5f6657d7bf1f Mon Sep 17 00:00:00 2001
From: MengqingCao <cmq0113@163.com>
Date: Wed, 5 Jun 2024 07:06:40 +0000
Subject: [PATCH 062/162] add npu for model export

Former-commit-id: ce020b6eb3f35c1db37ee4835e694eddcd0f59b0
---
 src/llamafactory/hparams/model_args.py      | 4 ++--
 src/llamafactory/webui/components/export.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 7003cbee..99c02850 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -145,9 +145,9 @@ class ModelArguments:
         default=1,
         metadata={"help": "The file shard size (in GB) of the exported model."},
     )
-    export_device: Literal["cpu", "cuda"] = field(
+    export_device: Literal["cpu", "cuda", "npu"] = field(
         default="cpu",
-        metadata={"help": "The device used in model export, use cuda to avoid addmm errors."},
+        metadata={"help": "The device used in model export, use cuda to avoid addmm errors; use npu/cuda to speed up exporting."},
     )
     export_quantization_bit: Optional[int] = field(
         default=None,
diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py
index 2f354011..32bedffb 100644
--- a/src/llamafactory/webui/components/export.py
+++ b/src/llamafactory/webui/components/export.py
@@ -89,7 +89,7 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
         export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1)
         export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none")
         export_quantization_dataset = gr.Textbox(value="data/c4_demo.json")
-        export_device = gr.Radio(choices=["cpu", "cuda"], value="cpu")
+        export_device = gr.Radio(choices=["cpu", "cuda", "npu"], value="cpu")
         export_legacy_format = gr.Checkbox()
 
     with gr.Row():

From 04a7065830b04e4973f3e34fe4db05103e110a78 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 5 Jun 2024 15:16:38 +0800
Subject: [PATCH 063/162] support glm-4

Former-commit-id: a10f4718fbf3f3c89dc7eb31cb8e1a46ca6adda5
---
 README.md                            |  9 ++++--
 README_zh.md                         |  9 ++++--
 src/llamafactory/data/template.py    | 13 +++++++++
 src/llamafactory/extras/constants.py | 42 ++++++++++++++++++++--------
 4 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 95c0c345..7df6cb09 100644
--- a/README.md
+++ b/README.md
@@ -71,14 +71,16 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 
 ## Changelog
 
+[24/06/05] We supported fine-tuning the **GLM-4-9B** and **GLM-4-9B-Chat** models.
+
 [24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage.
 
 [24/05/20] We supported fine-tuning the **PaliGemma** series models. Note that the PaliGemma models are pre-trained models, you need to fine-tune them with `gemma` template for chat completion.
 
-[24/05/18] We supported **[KTO](https://arxiv.org/abs/2402.01306)** algorithm for preference learning. See [examples](examples/README.md) for usage.
-
 <details><summary>Full Changelog</summary>
 
+[24/05/18] We supported **[KTO](https://arxiv.org/abs/2402.01306)** algorithm for preference learning. See [examples](examples/README.md) for usage.
+
 [24/05/14] We supported training and inference on the Ascend NPU devices. Check [installation](#installation) section for details.
 
 [24/04/26] We supported fine-tuning the **LLaVA-1.5** multimodal LLMs. See [examples](examples/README.md) for usage.
@@ -157,6 +159,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [DeepSeek (MoE)](https://huggingface.co/deepseek-ai)     | 7B/16B/67B/236B                  | q_proj,v_proj     | deepseek  |
 | [Falcon](https://huggingface.co/tiiuae)                  | 7B/11B/40B/180B                  | query_key_value   | falcon    |
 | [Gemma/CodeGemma](https://huggingface.co/google)         | 2B/7B                            | q_proj,v_proj     | gemma     |
+| [GLM4](https://huggingface.co/THUDM)                     | 9B                               | query_key_value   | glm4      |
 | [InternLM2](https://huggingface.co/internlm)             | 7B/20B                           | wqkv              | intern2   |
 | [LLaMA](https://github.com/facebookresearch/llama)       | 7B/13B/33B/65B                   | q_proj,v_proj     | -         |
 | [LLaMA-2](https://huggingface.co/meta-llama)             | 7B/13B/70B                       | q_proj,v_proj     | llama2    |
@@ -531,7 +534,7 @@ If you have a project that should be incorporated, please contact via email or c
 
 This repository is licensed under the [Apache-2.0 License](LICENSE).
 
-Please follow the model licenses to use the corresponding model weights: [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
+Please follow the model licenses to use the corresponding model weights: [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
 
 ## Citation
 
diff --git a/README_zh.md b/README_zh.md
index b10db1e5..bdb61b8b 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -71,14 +71,16 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 
 ## 更新日志
 
+[24/06/05] 我们支持了 **GLM-4-9B** 和 **GLM-4-9B-Chat** 模型的微调。
+
 [24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。
 
 [24/05/20] 我们支持了 **PaliGemma** 系列模型的微调。注意 PaliGemma 是预训练模型，你需要使用 `gemma` 模板进行微调使其获得对话能力。
 
-[24/05/18] 我们支持了 **[KTO](https://arxiv.org/abs/2402.01306)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。
-
 <details><summary>展开日志</summary>
 
+[24/05/18] 我们支持了 **[KTO](https://arxiv.org/abs/2402.01306)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。
+
 [24/05/14] 我们支持了昇腾 NPU 设备的训练和推理。详情请查阅[安装](#安装-llama-factory)部分。
 
 [24/04/26] 我们支持了多模态模型 **LLaVA-1.5** 的微调。详细用法请参照 [examples](examples/README_zh.md)。
@@ -157,6 +159,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 | [DeepSeek (MoE)](https://huggingface.co/deepseek-ai)     | 7B/16B/67B/236B                  | q_proj,v_proj     | deepseek  |
 | [Falcon](https://huggingface.co/tiiuae)                  | 7B/11B/40B/180B                  | query_key_value   | falcon    |
 | [Gemma/CodeGemma](https://huggingface.co/google)         | 2B/7B                            | q_proj,v_proj     | gemma     |
+| [GLM4](https://huggingface.co/THUDM)                     | 9B                               | query_key_value   | glm4      |
 | [InternLM2](https://huggingface.co/internlm)             | 7B/20B                           | wqkv              | intern2   |
 | [LLaMA](https://github.com/facebookresearch/llama)       | 7B/13B/33B/65B                   | q_proj,v_proj     | -         |
 | [LLaMA-2](https://huggingface.co/meta-llama)             | 7B/13B/70B                       | q_proj,v_proj     | llama2    |
@@ -529,7 +532,7 @@ run_name: test_run # 可选
 
 本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。
 
-使用模型权重时，请遵循对应的模型协议：[Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
+使用模型权重时，请遵循对应的模型协议：[Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
 
 ## 引用
 
diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 00527b44..fe0211c6 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -658,6 +658,19 @@ _register_template(
 )
 
 
+_register_template(
+    name="glm4",
+    format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]),
+    format_assistant=StringFormatter(slots=["\n{{content}}"]),
+    format_system=StringFormatter(slots=["[gMASK]<sop>{{content}}"]),
+    format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]),
+    format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]),
+    stop_words=["<|user|>", "<|observation|>"],
+    efficient_eos=True,
+    force_system=True,
+)
+
+
 _register_template(
     name="intern",
     format_user=StringFormatter(slots=["<|User|>:{{content}}", {"token": "<eoh>"}, "\n<|Bot|>:"]),
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index f365016f..4d7685c5 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -281,6 +281,26 @@ register_model_group(
 )
 
 
+register_model_group(
+    models={
+        "CodeGemma-7B": {
+            DownloadSource.DEFAULT: "google/codegemma-7b",
+        },
+        "CodeGemma-7B-Chat": {
+            DownloadSource.DEFAULT: "google/codegemma-7b-it",
+            DownloadSource.MODELSCOPE: "AI-ModelScope/codegemma-7b-it",
+        },
+        "CodeGemma-1.1-2B": {
+            DownloadSource.DEFAULT: "google/codegemma-1.1-2b",
+        },
+        "CodeGemma-1.1-7B-Chat": {
+            DownloadSource.DEFAULT: "google/codegemma-1.1-7b-it",
+        },
+    },
+    template="gemma",
+)
+
+
 register_model_group(
     models={
         "CommandR-35B-Chat": {
@@ -469,21 +489,21 @@ register_model_group(
 
 register_model_group(
     models={
-        "CodeGemma-7B": {
-            DownloadSource.DEFAULT: "google/codegemma-7b",
+        "GLM-4-9B": {
+            DownloadSource.DEFAULT: "THUDM/glm-4-9b",
+            DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b",
         },
-        "CodeGemma-7B-Chat": {
-            DownloadSource.DEFAULT: "google/codegemma-7b-it",
-            DownloadSource.MODELSCOPE: "AI-ModelScope/codegemma-7b-it",
+        "GLM-4-9B-Chat": {
+            DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat",
+            DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat",
         },
-        "CodeGemma-1.1-2B": {
-            DownloadSource.DEFAULT: "google/codegemma-1.1-2b",
-        },
-        "CodeGemma-1.1-7B-Chat": {
-            DownloadSource.DEFAULT: "google/codegemma-1.1-7b-it",
+        "GLM-4-9B-1M-Chat": {
+            DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat-1m",
+            DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat-1m",
         },
     },
-    template="gemma",
+    module="query_key_value",
+    template="glm4",
 )
 
 
From 48b2d0d12b4281d27d87a0ba4e4b786d2e63e092 Mon Sep 17 00:00:00 2001
From: MengqingCao <cmq0113@163.com>
Date: Wed, 5 Jun 2024 08:03:30 +0000
Subject: [PATCH 064/162] fix #4077

Former-commit-id: fedbe92f3b56294acc6c49f9a51e369cf2de3ead
---
 src/llamafactory/cli.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py
index c14ae6ec..8a229a38 100644
--- a/src/llamafactory/cli.py
+++ b/src/llamafactory/cli.py
@@ -70,7 +70,11 @@ def main():
     elif command == Command.EXPORT:
         export_model()
     elif command == Command.TRAIN:
-        if get_device_count() > 1:
+        if get_device_count() > 0:
+            # NOTE (MengqingCao): why use torchrun when only one accelerator is available?
+            # DeepSpeed only warp model with DeepSpeedEngine when launching by distributed launcher,
+            # e.g., torchrun, causing some feature missing
+            # sa: https://github.com/huggingface/transformers/issues/24309
             master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
             master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
             logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))

From 3d2b0813318fa66226eb2703aff814322bf07a8f Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 5 Jun 2024 16:32:32 +0800
Subject: [PATCH 065/162] update readme

Former-commit-id: 02d34db29a7a35c25711d49e98fd3167a2f4dfe7
---
 README.md    | 2 +-
 README_zh.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 7df6cb09..5e8bc8eb 100644
--- a/README.md
+++ b/README.md
@@ -71,7 +71,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 
 ## Changelog
 
-[24/06/05] We supported fine-tuning the **GLM-4-9B** and **GLM-4-9B-Chat** models.
+[24/06/05] We supported fine-tuning the **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** models.
 
 [24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage.
 
diff --git a/README_zh.md b/README_zh.md
index bdb61b8b..d8e17b29 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -71,7 +71,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 
 ## 更新日志
 
-[24/06/05] 我们支持了 **GLM-4-9B** 和 **GLM-4-9B-Chat** 模型的微调。
+[24/06/05] 我们支持了 **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** 模型的微调。
 
 [24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。
 

From 861d93a06a7621cfba4c05224fa40dabb80966c2 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 5 Jun 2024 16:56:54 +0800
Subject: [PATCH 066/162] fix #4079

Former-commit-id: fda732d7f4616373844c97beff416880260f49db
---
 requirements.txt | 1 +
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index e17954e3..9e00555e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,6 +7,7 @@ gradio>=4.0.0
 scipy
 einops
 sentencepiece
+tiktoken
 protobuf
 uvicorn
 pydantic
diff --git a/setup.py b/setup.py
index 23f532e7..54f971ca 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,7 @@ extra_require = {
     "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"],
     "awq": ["autoawq"],
     "aqlm": ["aqlm[gpu]>=1.1.0"],
-    "qwen": ["tiktoken", "transformers_stream_generator"],
+    "qwen": ["transformers_stream_generator"],
     "modelscope": ["modelscope"],
     "quality": ["ruff"],
 }

From 80897e3d6fda7ecf27d78c30a9cd457d65969bbd Mon Sep 17 00:00:00 2001
From: MengqingCao <cmq0113@163.com>
Date: Wed, 5 Jun 2024 09:37:36 +0000
Subject: [PATCH 067/162] modify export_device option

Former-commit-id: b2fc4a5499e21a5b9622c2285402efef6e27a74d
---
 src/llamafactory/webui/components/export.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py
index 32bedffb..791a833b 100644
--- a/src/llamafactory/webui/components/export.py
+++ b/src/llamafactory/webui/components/export.py
@@ -89,7 +89,7 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
         export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1)
         export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none")
         export_quantization_dataset = gr.Textbox(value="data/c4_demo.json")
-        export_device = gr.Radio(choices=["cpu", "cuda", "npu"], value="cpu")
+        export_device = gr.Radio(choices=["cpu", "cuda/npu"], value="cpu")
         export_legacy_format = gr.Checkbox()
 
     with gr.Row():

From fc053cf81f0cc135e5fce7fcb4880fcbc7ec3db2 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 00:50:32 +0800
Subject: [PATCH 068/162] fix #4090

Former-commit-id: d9f15f30a8f4bc64778a5c96baeb6801700d7a2c
---
 requirements.txt                      |  2 +-
 src/llamafactory/extras/misc.py       |  2 +-
 src/llamafactory/train/dpo/trainer.py | 29 ++++++++++-----------------
 3 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 9e00555e..7b6cbee9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ transformers>=4.41.2
 datasets>=2.16.0
 accelerate>=0.30.1
 peft>=0.11.1
-trl>=0.8.6
+trl>=0.9.3
 gradio>=4.0.0
 scipy
 einops
diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py
index 638c24cf..78f71847 100644
--- a/src/llamafactory/extras/misc.py
+++ b/src/llamafactory/extras/misc.py
@@ -65,7 +65,7 @@ def check_dependencies() -> None:
         require_version("datasets>=2.16.0", "To fix: pip install datasets>=2.16.0")
         require_version("accelerate>=0.30.1", "To fix: pip install accelerate>=0.30.1")
         require_version("peft>=0.11.1", "To fix: pip install peft>=0.11.1")
-        require_version("trl>=0.8.6", "To fix: pip install trl>=0.8.6")
+        require_version("trl>=0.9.3", "To fix: pip install trl>=0.9.3")
 
 
 def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py
index ec1de810..2bbe6a06 100644
--- a/src/llamafactory/train/dpo/trainer.py
+++ b/src/llamafactory/train/dpo/trainer.py
@@ -93,18 +93,6 @@ class CustomDPOTrainer(DPOTrainer):
             output_dir = output_dir if output_dir is not None else self.args.output_dir
             getattr(self.processor, "image_processor").save_pretrained(output_dir)
 
-    def sft_loss(self, batch: Dict[str, "torch.Tensor"], chosen_logits: "torch.FloatTensor") -> "torch.Tensor":
-        r"""
-        Computes supervised cross-entropy loss of given labels under the given logits.
-
-        Returns:
-            A tensor of shape (batch_size,) containing the cross-entropy loss of each samples.
-        """
-        batch_size = batch["input_ids"].size(0) // 2
-        chosen_labels, _ = batch["labels"].split(batch_size, dim=0)
-        chosen_logps = self.get_batch_logps(chosen_logits, chosen_labels, average_log_prob=True)
-        return -chosen_logps
-
     def odds_ratio_loss(self, chosen_logps: "torch.Tensor", rejected_logps: "torch.Tensor") -> "torch.Tensor":
         r"""
         Computes ORPO's odds ratio (OR) loss for batched log probabilities of the policy model.
@@ -156,9 +144,9 @@ class CustomDPOTrainer(DPOTrainer):
 
     def concatenated_forward(
         self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"]
-    ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]:
+    ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]:
         r"""
-        Computes the sum log probabilities of the labels under the given logits if loss_type != IPO.
+        Computes the sum log probabilities of the labels under given logits if loss_type is not IPO, ORPO or SimPO.
 
         Otherwise the average log probabilities.
         """
@@ -167,17 +155,20 @@ class CustomDPOTrainer(DPOTrainer):
 
         all_logits: "torch.Tensor" = model(**batch, return_dict=True, use_cache=False).logits.to(torch.float32)
 
-        all_logps = self.get_batch_logps(
+        all_logps, valid_length = self.get_batch_logps(
             logits=all_logits,
             labels=batch["labels"],
-            average_log_prob=(self.loss_type in ["ipo", "orpo", "simpo"]),
             is_encoder_decoder=self.is_encoder_decoder,
             label_pad_token_id=self.label_pad_token_id,
         )
+        if self.loss_type in ["ipo", "orpo", "simpo"]:
+            all_logps = all_logps / valid_length
+
         batch_size = batch["input_ids"].size(0) // 2
         chosen_logps, rejected_logps = all_logps.split(batch_size, dim=0)
         chosen_logits, rejected_logits = all_logits.split(batch_size, dim=0)
-        return chosen_logps, rejected_logps, chosen_logits, rejected_logits
+        chosen_length, _ = valid_length.split(batch_size, dim=0)
+        return chosen_logps, rejected_logps, chosen_logits, rejected_logits, chosen_logps / chosen_length
 
     def compute_reference_log_probs(
         self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"]
@@ -201,6 +192,7 @@ class CustomDPOTrainer(DPOTrainer):
                 reference_rejected_logps,
                 _,
                 _,
+                _,
             ) = self.concatenated_forward(ref_model, batch)
 
         return reference_chosen_logps, reference_rejected_logps
@@ -220,6 +212,7 @@ class CustomDPOTrainer(DPOTrainer):
             policy_rejected_logps,
             policy_chosen_logits,
             policy_rejected_logits,
+            policy_chosen_logps_avg,
         ) = self.concatenated_forward(model, batch)
 
         reference_chosen_logps, reference_rejected_logps = self.compute_reference_log_probs(model, batch)
@@ -229,7 +222,7 @@ class CustomDPOTrainer(DPOTrainer):
             reference_chosen_logps,
             reference_rejected_logps,
         )
-        sft_loss = self.sft_loss(batch, policy_chosen_logits)  # compute chosen_logps with masks
+        sft_loss = -policy_chosen_logps_avg
         if self.ftx_gamma > 1e-6:
             losses += self.ftx_gamma * sft_loss
 

From 1935f4a1e09d0ea171ac7aeb93c0edfb82705a29 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 01:28:14 +0800
Subject: [PATCH 069/162] add llamafactory-cli env

Former-commit-id: 1df077184845ff5f394b9324d46f8c382869e590
---
 .github/ISSUE_TEMPLATE/bug-report.yml |  6 +--
 src/llamafactory/cli.py               |  6 ++-
 src/llamafactory/extras/env.py        | 54 +++++++++++++++++++++++++++
 src/llamafactory/extras/packages.py   |  4 ++
 4 files changed, 65 insertions(+), 5 deletions(-)
 create mode 100644 src/llamafactory/extras/env.py

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index 82620fdb..1d962200 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -20,10 +20,10 @@ body:
     attributes:
       label: System Info
       description: |
-        Please share your system info with us. You can run the command **transformers-cli env** and copy-paste its output below.
-        请提供您的系统信息。您可以在命令行运行 **transformers-cli env** 并将其输出复制到该文本框中。
+        Please share your system info with us. You can run the command **llamafactory-cli env** and copy-paste its output below.
+        请提供您的系统信息。您可以在命令行运行 **llamafactory-cli env** 并将其输出复制到该文本框中。
 
-      placeholder: transformers version, platform, python version, ...
+      placeholder: llamafactory version, platform, python version, ...
 
   - type: textarea
     id: reproduction
diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py
index c14ae6ec..fbe18d86 100644
--- a/src/llamafactory/cli.py
+++ b/src/llamafactory/cli.py
@@ -8,6 +8,7 @@ from . import launcher
 from .api.app import run_api
 from .chat.chat_model import run_chat
 from .eval.evaluator import run_eval
+from .extras.env import VERSION, print_env
 from .extras.logging import get_logger
 from .extras.misc import get_device_count
 from .train.tuner import export_model, run_exp
@@ -29,8 +30,6 @@ USAGE = (
     + "-" * 70
 )
 
-VERSION = "0.7.2.dev0"
-
 WELCOME = (
     "-" * 58
     + "\n"
@@ -50,6 +49,7 @@ logger = get_logger(__name__)
 class Command(str, Enum):
     API = "api"
     CHAT = "chat"
+    ENV = "env"
     EVAL = "eval"
     EXPORT = "export"
     TRAIN = "train"
@@ -65,6 +65,8 @@ def main():
         run_api()
     elif command == Command.CHAT:
         run_chat()
+    elif command == Command.ENV:
+        print_env()
     elif command == Command.EVAL:
         run_eval()
     elif command == Command.EXPORT:
diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py
new file mode 100644
index 00000000..27453a6b
--- /dev/null
+++ b/src/llamafactory/extras/env.py
@@ -0,0 +1,54 @@
+import platform
+
+import accelerate
+import datasets
+import peft
+import torch
+import transformers
+import trl
+from transformers.utils import is_bitsandbytes_available, is_torch_cuda_available, is_torch_npu_available
+
+from .packages import is_deepspeed_available, is_vllm_available
+
+
+VERSION = "0.7.2.dev0"
+
+
+def print_env() -> None:
+    info = {
+        "`llamafactory` version": VERSION,
+        "Platform": platform.platform(),
+        "Python version": platform.python_version(),
+        "PyTorch version": torch.__version__,
+        "Transformers version": transformers.__version__,
+        "Datasets version": datasets.__version__,
+        "Accelerate version": accelerate.__version__,
+        "PEFT version": peft.__version__,
+        "TRL version": trl.__version__,
+    }
+
+    if is_torch_cuda_available():
+        info["PyTorch version"] += " (GPU)"
+        info["GPU type"] = torch.cuda.get_device_name()
+
+    if is_torch_npu_available():
+        info["PyTorch version"] += " (NPU)"
+        info["NPU type"] = torch.npu.get_device_name()
+        info["CANN version"] = torch.version.cann
+
+    if is_deepspeed_available():
+        import deepspeed  # type: ignore
+
+        info["DeepSpeed version"] = deepspeed.__version__
+
+    if is_bitsandbytes_available():
+        import bitsandbytes
+
+        info["Bitsandbytes version"] = bitsandbytes.__version__
+
+    if is_vllm_available():
+        import vllm
+
+        info["vLLM version"] = vllm.__version__
+
+    print("\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n")
diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py
index 4c9e6492..fe056e2d 100644
--- a/src/llamafactory/extras/packages.py
+++ b/src/llamafactory/extras/packages.py
@@ -20,6 +20,10 @@ def _get_package_version(name: str) -> "Version":
         return version.parse("0.0.0")
 
 
+def is_deepspeed_available():
+    return _is_package_available("deepspeed")
+
+
 def is_fastapi_available():
     return _is_package_available("fastapi")
 

From eff00a8172c0573b76a0949b21dd75a089679406 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 01:39:02 +0800
Subject: [PATCH 070/162] fix setup

Former-commit-id: b2b80d434fcc0c3838d229098e1c21d26632204c
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 54f971ca..7a5b9304 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ from setuptools import find_packages, setup
 
 
 def get_version():
-    with open(os.path.join("src", "llamafactory", "cli.py"), "r", encoding="utf-8") as f:
+    with open(os.path.join("src", "llamafactory", "extras", "env.py"), "r", encoding="utf-8") as f:
         file_content = f.read()
         pattern = r"{}\W*=\W*\"([^\"]+)\"".format("VERSION")
         (version,) = re.findall(pattern, file_content)

From 0b671615d0b2fc02a752a7f35592809fd4841054 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 01:49:20 +0800
Subject: [PATCH 071/162] update train hparams

Former-commit-id: 1ca9fce55b55bf209f4b76152b586731932a3f39
---
 examples/extras/badam/llama3_lora_sft.yaml                  | 2 +-
 examples/extras/fsdp_qlora/llama3_lora_sft.yaml             | 2 +-
 examples/extras/galore/llama3_full_sft.yaml                 | 2 +-
 examples/extras/llama_pro/llama3_freeze_sft.yaml            | 2 +-
 examples/extras/loraplus/llama3_lora_sft.yaml               | 2 +-
 examples/extras/mod/llama3_full_sft.yaml                    | 2 +-
 examples/full_multi_gpu/llama3_full_sft.yaml                | 2 +-
 examples/lora_multi_gpu/llama3_lora_sft.yaml                | 2 +-
 examples/lora_multi_gpu/llama3_lora_sft_ds.yaml             | 2 +-
 examples/lora_multi_npu/llama3_lora_sft_ds.yaml             | 2 +-
 examples/lora_single_gpu/llama3_lora_dpo.yaml               | 2 +-
 examples/lora_single_gpu/llama3_lora_kto.yaml               | 2 +-
 examples/lora_single_gpu/llama3_lora_pretrain.yaml          | 2 +-
 examples/lora_single_gpu/llama3_lora_reward.yaml            | 2 +-
 examples/lora_single_gpu/llama3_lora_sft.yaml               | 2 +-
 examples/lora_single_gpu/llava1_5_lora_sft.yaml             | 2 +-
 examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml         | 2 +-
 examples/qlora_single_gpu/llama3_lora_sft_awq.yaml          | 2 +-
 examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml | 2 +-
 examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml         | 2 +-
 src/llamafactory/extras/env.py                              | 2 +-
 src/llamafactory/webui/runner.py                            | 2 +-
 22 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/examples/extras/badam/llama3_lora_sft.yaml b/examples/extras/badam/llama3_lora_sft.yaml
index 242e63ab..a78de2fa 100644
--- a/examples/extras/badam/llama3_lora_sft.yaml
+++ b/examples/extras/badam/llama3_lora_sft.yaml
@@ -37,5 +37,5 @@ pure_bf16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
index 920d8fdb..348459b8 100644
--- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
+++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
@@ -38,5 +38,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/extras/galore/llama3_full_sft.yaml b/examples/extras/galore/llama3_full_sft.yaml
index 3db31fed..605545de 100644
--- a/examples/extras/galore/llama3_full_sft.yaml
+++ b/examples/extras/galore/llama3_full_sft.yaml
@@ -38,5 +38,5 @@ pure_bf16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/extras/llama_pro/llama3_freeze_sft.yaml b/examples/extras/llama_pro/llama3_freeze_sft.yaml
index 214f411a..444a1113 100644
--- a/examples/extras/llama_pro/llama3_freeze_sft.yaml
+++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml
@@ -36,5 +36,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml
index 9936bcd3..960f613e 100644
--- a/examples/extras/loraplus/llama3_lora_sft.yaml
+++ b/examples/extras/loraplus/llama3_lora_sft.yaml
@@ -35,5 +35,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/extras/mod/llama3_full_sft.yaml b/examples/extras/mod/llama3_full_sft.yaml
index edfec44e..df03c1e0 100644
--- a/examples/extras/mod/llama3_full_sft.yaml
+++ b/examples/extras/mod/llama3_full_sft.yaml
@@ -35,5 +35,5 @@ pure_bf16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/full_multi_gpu/llama3_full_sft.yaml b/examples/full_multi_gpu/llama3_full_sft.yaml
index b8873e3a..40b62f24 100644
--- a/examples/full_multi_gpu/llama3_full_sft.yaml
+++ b/examples/full_multi_gpu/llama3_full_sft.yaml
@@ -37,5 +37,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_multi_gpu/llama3_lora_sft.yaml b/examples/lora_multi_gpu/llama3_lora_sft.yaml
index 5e5dd9e6..9be3c780 100644
--- a/examples/lora_multi_gpu/llama3_lora_sft.yaml
+++ b/examples/lora_multi_gpu/llama3_lora_sft.yaml
@@ -37,5 +37,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
index e8dee216..41152243 100644
--- a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
+++ b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
@@ -38,5 +38,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml b/examples/lora_multi_npu/llama3_lora_sft_ds.yaml
index 825b8450..1ed24d04 100644
--- a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml
+++ b/examples/lora_multi_npu/llama3_lora_sft_ds.yaml
@@ -38,5 +38,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/lora_single_gpu/llama3_lora_dpo.yaml
index 62752e57..158c9e04 100644
--- a/examples/lora_single_gpu/llama3_lora_dpo.yaml
+++ b/examples/lora_single_gpu/llama3_lora_dpo.yaml
@@ -36,5 +36,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_single_gpu/llama3_lora_kto.yaml b/examples/lora_single_gpu/llama3_lora_kto.yaml
index 6f689818..ead221e9 100644
--- a/examples/lora_single_gpu/llama3_lora_kto.yaml
+++ b/examples/lora_single_gpu/llama3_lora_kto.yaml
@@ -34,5 +34,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_single_gpu/llama3_lora_pretrain.yaml b/examples/lora_single_gpu/llama3_lora_pretrain.yaml
index 54c5d89a..9167a893 100644
--- a/examples/lora_single_gpu/llama3_lora_pretrain.yaml
+++ b/examples/lora_single_gpu/llama3_lora_pretrain.yaml
@@ -33,5 +33,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_single_gpu/llama3_lora_reward.yaml b/examples/lora_single_gpu/llama3_lora_reward.yaml
index c82f9414..91663057 100644
--- a/examples/lora_single_gpu/llama3_lora_reward.yaml
+++ b/examples/lora_single_gpu/llama3_lora_reward.yaml
@@ -34,5 +34,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_single_gpu/llama3_lora_sft.yaml b/examples/lora_single_gpu/llama3_lora_sft.yaml
index 429cb6af..cc93d05a 100644
--- a/examples/lora_single_gpu/llama3_lora_sft.yaml
+++ b/examples/lora_single_gpu/llama3_lora_sft.yaml
@@ -34,5 +34,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_single_gpu/llava1_5_lora_sft.yaml b/examples/lora_single_gpu/llava1_5_lora_sft.yaml
index acab4884..95c1d40d 100644
--- a/examples/lora_single_gpu/llava1_5_lora_sft.yaml
+++ b/examples/lora_single_gpu/llava1_5_lora_sft.yaml
@@ -35,5 +35,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
index 53cc12e2..23301de5 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
@@ -34,5 +34,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
index 1a92f822..40a290a3 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
@@ -34,5 +34,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
index c7f72c66..6652d8cf 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
@@ -35,5 +35,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
index 45caf17c..323ea7c6 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
@@ -34,5 +34,5 @@ fp16: true
 ### eval
 val_size: 0.1
 per_device_eval_batch_size: 1
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 500
diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py
index 27453a6b..059730f1 100644
--- a/src/llamafactory/extras/env.py
+++ b/src/llamafactory/extras/env.py
@@ -51,4 +51,4 @@ def print_env() -> None:
 
         info["vLLM version"] = vllm.__version__
 
-    print("\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n")
+    print("\n" + "\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n")
diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index 6e1facef..d35fd903 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -200,7 +200,7 @@ class Runner:
         # eval config
         if get("train.val_size") > 1e-6 and args["stage"] != "ppo":
             args["val_size"] = get("train.val_size")
-            args["evaluation_strategy"] = "steps"
+            args["eval_strategy"] = "steps"
             args["eval_steps"] = args["save_steps"]
             args["per_device_eval_batch_size"] = args["per_device_train_batch_size"]
 

From 7226fe780d4511f9f34cb5ccf084132f4fc3e2d9 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 02:29:55 +0800
Subject: [PATCH 072/162] support image input in api #3971 #4061

Former-commit-id: c70aaf763ef22fb83ce3635e8ffd5ec4c89c1cb0
---
 README.md                        |  3 +++
 README_zh.md                     |  3 +++
 src/llamafactory/api/chat.py     | 39 ++++++++++++++++++++++++++------
 src/llamafactory/api/protocol.py | 12 +++++++++-
 4 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 5e8bc8eb..3eebf355 100644
--- a/README.md
+++ b/README.md
@@ -456,6 +456,9 @@ docker compose -f ./docker-compose.yml up -d
 CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
 ```
 
+> [!TIP]
+> Visit https://platform.openai.com/docs/api-reference/chat/create for API document.
+
 ### Download from ModelScope Hub
 
 If you have trouble with downloading models and datasets from Hugging Face, you can use ModelScope.
diff --git a/README_zh.md b/README_zh.md
index d8e17b29..09a7f330 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -454,6 +454,9 @@ docker compose -f ./docker-compose.yml up -d
 CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
 ```
 
+> [!TIP]
+> API 文档请查阅 https://platform.openai.com/docs/api-reference/chat/create。
+
 ### 从魔搭社区下载
 
 如果您在 Hugging Face 模型和数据集的下载中遇到了问题，可以通过下述方法使用魔搭社区。
diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py
index b7a08f0b..712b6940 100644
--- a/src/llamafactory/api/chat.py
+++ b/src/llamafactory/api/chat.py
@@ -1,10 +1,11 @@
 import json
+import os
 import uuid
 from typing import TYPE_CHECKING, AsyncGenerator, Dict, List, Optional, Tuple
 
 from ..data import Role as DataRole
 from ..extras.logging import get_logger
-from ..extras.packages import is_fastapi_available
+from ..extras.packages import is_fastapi_available, is_pillow_available
 from .common import dictify, jsonify
 from .protocol import (
     ChatCompletionMessage,
@@ -25,7 +26,14 @@ if is_fastapi_available():
     from fastapi import HTTPException, status
 
 
+if is_pillow_available():
+    import requests
+    from PIL import Image
+
+
 if TYPE_CHECKING:
+    from numpy.typing import NDArray
+
     from ..chat import ChatModel
     from .protocol import ChatCompletionRequest, ScoreEvaluationRequest
 
@@ -40,7 +48,9 @@ ROLE_MAPPING = {
 }
 
 
-def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, str]], str, str]:
+def _process_request(
+    request: "ChatCompletionRequest",
+) -> Tuple[List[Dict[str, str]], Optional[str], Optional[str], Optional["NDArray"]]:
     logger.info("==== request ====\n{}".format(json.dumps(dictify(request), indent=2, ensure_ascii=False)))
 
     if len(request.messages) == 0:
@@ -49,12 +59,13 @@ def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, s
     if request.messages[0].role == Role.SYSTEM:
         system = request.messages.pop(0).content
     else:
-        system = ""
+        system = None
 
     if len(request.messages) % 2 == 0:
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only supports u/a/u/a/u...")
 
     input_messages = []
+    image = None
     for i, message in enumerate(request.messages):
         if i % 2 == 0 and message.role not in [Role.USER, Role.TOOL]:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role")
@@ -66,6 +77,18 @@ def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, s
             arguments = message.tool_calls[0].function.arguments
             content = json.dumps({"name": name, "argument": arguments}, ensure_ascii=False)
             input_messages.append({"role": ROLE_MAPPING[Role.FUNCTION], "content": content})
+        elif isinstance(message.content, list):
+            for input_item in message.content:
+                if input_item.type == "text":
+                    input_messages.append({"role": ROLE_MAPPING[message.role], "content": input_item.text})
+                else:
+                    image_url = input_item.image_url.url
+                    if os.path.isfile(image_url):
+                        image_path = open(image_url, "rb")
+                    else:
+                        image_path = requests.get(image_url, stream=True).raw
+
+                    image = Image.open(image_path).convert("RGB")
         else:
             input_messages.append({"role": ROLE_MAPPING[message.role], "content": message.content})
 
@@ -76,9 +99,9 @@ def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, s
         except Exception:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid tools")
     else:
-        tools = ""
+        tools = None
 
-    return input_messages, system, tools
+    return input_messages, system, tools, image
 
 
 def _create_stream_chat_completion_chunk(
@@ -97,11 +120,12 @@ async def create_chat_completion_response(
     request: "ChatCompletionRequest", chat_model: "ChatModel"
 ) -> "ChatCompletionResponse":
     completion_id = "chatcmpl-{}".format(uuid.uuid4().hex)
-    input_messages, system, tools = _process_request(request)
+    input_messages, system, tools, image = _process_request(request)
     responses = await chat_model.achat(
         input_messages,
         system,
         tools,
+        image,
         do_sample=request.do_sample,
         temperature=request.temperature,
         top_p=request.top_p,
@@ -145,7 +169,7 @@ async def create_stream_chat_completion_response(
     request: "ChatCompletionRequest", chat_model: "ChatModel"
 ) -> AsyncGenerator[str, None]:
     completion_id = "chatcmpl-{}".format(uuid.uuid4().hex)
-    input_messages, system, tools = _process_request(request)
+    input_messages, system, tools, image = _process_request(request)
     if tools:
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream function calls.")
 
@@ -159,6 +183,7 @@ async def create_stream_chat_completion_response(
         input_messages,
         system,
         tools,
+        image,
         do_sample=request.do_sample,
         temperature=request.temperature,
         top_p=request.top_p,
diff --git a/src/llamafactory/api/protocol.py b/src/llamafactory/api/protocol.py
index 525fa6a7..055fa781 100644
--- a/src/llamafactory/api/protocol.py
+++ b/src/llamafactory/api/protocol.py
@@ -56,9 +56,19 @@ class FunctionCall(BaseModel):
     function: Function
 
 
+class ImageURL(BaseModel):
+    url: str
+
+
+class MultimodalInputItem(BaseModel):
+    type: Literal["text", "image_url"]
+    text: Optional[str] = None
+    image_url: Optional[ImageURL] = None
+
+
 class ChatMessage(BaseModel):
     role: Role
-    content: Optional[str] = None
+    content: Optional[Union[str, List[MultimodalInputItem]]] = None
     tool_calls: Optional[List[FunctionCall]] = None
 
 
From c91655e952f5dec7cc5bb898478a4b0f40533045 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 02:43:19 +0800
Subject: [PATCH 073/162] support train from scratch #4033 #4075

Former-commit-id: 1290b9d01077e62f8de7a23637daa2586cc82bfa
---
 src/llamafactory/hparams/model_args.py | 4 ++++
 src/llamafactory/model/loader.py       | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 7003cbee..a3b5b2a6 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -101,6 +101,10 @@ class ModelArguments:
         default=False,
         metadata={"help": "Whether or not to upcast the output of lm_head in fp32."},
     )
+    train_from_scratch: bool = field(
+        default=False,
+        metadata={"help": "Whether or not to randomly initialize the model weights."},
+    )
     infer_backend: Literal["huggingface", "vllm"] = field(
         default="huggingface",
         metadata={"help": "Backend engine used at inference."},
diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py
index 49b347d5..8f3309b3 100644
--- a/src/llamafactory/model/loader.py
+++ b/src/llamafactory/model/loader.py
@@ -131,6 +131,8 @@ def load_model(
             model = load_mod_pretrained_model(**init_kwargs)
         elif model_args.visual_inputs:
             model = AutoModelForVision2Seq.from_pretrained(**init_kwargs)
+        elif model_args.train_from_scratch:
+            model = AutoModelForCausalLM.from_config(config)
         else:
             model = AutoModelForCausalLM.from_pretrained(**init_kwargs)
 

From c955a332d7b58dd029ff20e5ec97b58927c2d265 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 02:53:27 +0800
Subject: [PATCH 074/162] add vllm_dtype arg #3387 #3717

Former-commit-id: a0dd3a6351bb78541d40fec1d2fc457d803c86a4
---
 src/llamafactory/chat/vllm_engine.py   | 7 ++-----
 src/llamafactory/hparams/model_args.py | 4 ++++
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py
index 8a067754..e193704a 100644
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Opt
 
 from ..data import get_template_and_fix_tokenizer
 from ..extras.logging import get_logger
-from ..extras.misc import get_device_count, infer_optim_dtype
+from ..extras.misc import get_device_count
 from ..extras.packages import is_vllm_available
 from ..model import load_config, load_tokenizer
 from ..model.utils.visual import LlavaMultiModalProjectorForYiVLForVLLM
@@ -35,8 +35,6 @@ class VllmEngine(BaseEngine):
         generating_args: "GeneratingArguments",
     ) -> None:
         config = load_config(model_args)  # may download model from ms hub
-        infer_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None))
-        infer_dtype = str(infer_dtype).split(".")[-1]
 
         self.can_generate = finetuning_args.stage == "sft"
         tokenizer_module = load_tokenizer(model_args)
@@ -50,7 +48,7 @@ class VllmEngine(BaseEngine):
             "model": model_args.model_name_or_path,
             "trust_remote_code": True,
             "download_dir": model_args.cache_dir,
-            "dtype": infer_dtype,
+            "dtype": model_args.vllm_dtype,
             "max_model_len": model_args.vllm_maxlen,
             "tensor_parallel_size": get_device_count() or 1,
             "gpu_memory_utilization": model_args.vllm_gpu_util,
@@ -70,7 +68,6 @@ class VllmEngine(BaseEngine):
             engine_args["image_input_shape"] = "1,3,{},{}".format(image_size, image_size)
             engine_args["image_feature_size"] = self.image_feature_size
             if getattr(config, "is_yi_vl_derived_model", None):
-                # bug in vllm 0.4.2, see: https://github.com/vllm-project/vllm/pull/4828
                 import vllm.model_executor.models.llava
 
                 logger.info("Detected Yi-VL model, applying projector patch.")
diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index a3b5b2a6..0434f426 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -125,6 +125,10 @@ class ModelArguments:
         default=8,
         metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."},
     )
+    vllm_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field(
+        default="auto",
+        metadata={"help": "Data type for model weights and activations in the vLLM engine."},
+    )
     offload_folder: str = field(
         default="offload",
         metadata={"help": "Path to offload model weights."},

From 79a39ca7031163a928dbc107bcf11fc41697ad6a Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 6 Jun 2024 03:14:23 +0800
Subject: [PATCH 075/162] Update model_args.py

Former-commit-id: 09c0afd94a8a5f5b45a61b32c983d50e1b9e2941
---
 src/llamafactory/hparams/model_args.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 99c02850..024bc2f8 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -145,9 +145,9 @@ class ModelArguments:
         default=1,
         metadata={"help": "The file shard size (in GB) of the exported model."},
     )
-    export_device: Literal["cpu", "cuda", "npu"] = field(
+    export_device: Literal["cpu", "auto"] = field(
         default="cpu",
-        metadata={"help": "The device used in model export, use cuda to avoid addmm errors; use npu/cuda to speed up exporting."},
+        metadata={"help": "The device used in model export, use `auto` to accelerate exporting."},
     )
     export_quantization_bit: Optional[int] = field(
         default=None,

From f002409e6adf7046f67047ad24a41f00ea196075 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 6 Jun 2024 03:14:46 +0800
Subject: [PATCH 076/162] Update export.py

Former-commit-id: 694833c1104d13929d4f181f014a121f25955dc5
---
 src/llamafactory/webui/components/export.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py
index 791a833b..7e1493c8 100644
--- a/src/llamafactory/webui/components/export.py
+++ b/src/llamafactory/webui/components/export.py
@@ -89,7 +89,7 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
         export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1)
         export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none")
         export_quantization_dataset = gr.Textbox(value="data/c4_demo.json")
-        export_device = gr.Radio(choices=["cpu", "cuda/npu"], value="cpu")
+        export_device = gr.Radio(choices=["cpu", "auto"], value="cpu")
         export_legacy_format = gr.Checkbox()
 
     with gr.Row():

From e9f9b1f250ef3a8a2f784d7a0ba696d3cde24891 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 03:33:44 +0800
Subject: [PATCH 077/162] lint

Former-commit-id: 9030501eaef97ea249347198272adf0d709503ec
---
 src/llamafactory/extras/env.py             |  3 ++-
 src/llamafactory/extras/packages.py        |  4 ---
 src/llamafactory/webui/components/train.py | 14 +++++------
 src/llamafactory/webui/engine.py           |  2 +-
 src/llamafactory/webui/utils.py            | 29 +++++++++++-----------
 5 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py
index 059730f1..fdccf86b 100644
--- a/src/llamafactory/extras/env.py
+++ b/src/llamafactory/extras/env.py
@@ -6,9 +6,10 @@ import peft
 import torch
 import transformers
 import trl
+from transformers.integrations import is_deepspeed_available
 from transformers.utils import is_bitsandbytes_available, is_torch_cuda_available, is_torch_npu_available
 
-from .packages import is_deepspeed_available, is_vllm_available
+from .packages import is_vllm_available
 
 
 VERSION = "0.7.2.dev0"
diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py
index fe056e2d..4c9e6492 100644
--- a/src/llamafactory/extras/packages.py
+++ b/src/llamafactory/extras/packages.py
@@ -20,10 +20,6 @@ def _get_package_version(name: str) -> "Version":
         return version.parse("0.0.0")
 
 
-def is_deepspeed_available():
-    return _is_package_available("deepspeed")
-
-
 def is_fastapi_available():
     return _is_package_available("fastapi")
 
diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py
index eca8f9b3..74f8ef2a 100644
--- a/src/llamafactory/webui/components/train.py
+++ b/src/llamafactory/webui/components/train.py
@@ -6,7 +6,7 @@ from ...extras.constants import TRAINING_STAGES
 from ...extras.misc import get_device_count
 from ...extras.packages import is_gradio_available
 from ..common import DEFAULT_DATA_DIR, list_checkpoints, list_datasets
-from ..utils import change_stage, check_output_dir, list_output_dirs, list_config_paths
+from ..utils import change_stage, check_output_dir, list_config_paths, list_output_dirs
 from .data import create_preview_box
 
 
@@ -257,7 +257,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
     with gr.Row():
         with gr.Column(scale=3):
             with gr.Row():
-                initial_dir = gr.Textbox(visible=False, interactive=False)
+                current_time = gr.Textbox(visible=False, interactive=False)
                 output_dir = gr.Dropdown(allow_custom_value=True)
                 config_path = gr.Dropdown(allow_custom_value=True)
 
@@ -284,7 +284,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
             arg_load_btn=arg_load_btn,
             start_btn=start_btn,
             stop_btn=stop_btn,
-            initial_dir=initial_dir,
+            current_time=current_time,
             output_dir=output_dir,
             config_path=config_path,
             device_count=device_count,
@@ -315,11 +315,11 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
     dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False)
     training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False)
     reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False)
-    model_name.change(list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], queue=False)
-    finetuning_type.change(list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], queue=False)
+    model_name.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)
+    finetuning_type.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)
     output_dir.change(
-        list_output_dirs, [model_name, finetuning_type, initial_dir], [output_dir], concurrency_limit=None
+        list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], concurrency_limit=None
     ).then(check_output_dir, inputs=[lang, model_name, finetuning_type, output_dir], concurrency_limit=None)
-    config_path.change(list_config_paths, outputs=[config_path], concurrency_limit=None)
+    config_path.change(list_config_paths, [current_time], [config_path], queue=False)
 
     return elem_dict
diff --git a/src/llamafactory/webui/engine.py b/src/llamafactory/webui/engine.py
index 00877115..eb6142d3 100644
--- a/src/llamafactory/webui/engine.py
+++ b/src/llamafactory/webui/engine.py
@@ -41,7 +41,7 @@ class Engine:
 
         if not self.pure_chat:
             current_time = get_time()
-            init_dict["train.initial_dir"] = {"value": "train_{}".format(current_time)}
+            init_dict["train.current_time"] = {"value": current_time}
             init_dict["train.output_dir"] = {"value": "train_{}".format(current_time)}
             init_dict["train.config_path"] = {"value": "{}.yaml".format(current_time)}
             init_dict["eval.output_dir"] = {"value": "eval_{}".format(current_time)}
diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py
index 0303aa31..23e62dca 100644
--- a/src/llamafactory/webui/utils.py
+++ b/src/llamafactory/webui/utils.py
@@ -174,11 +174,24 @@ def save_args(config_path: str, config_dict: Dict[str, Any]) -> str:
     return str(get_arg_save_path(config_path))
 
 
-def list_output_dirs(model_name: str, finetuning_type: str, initial_dir: str) -> "gr.Dropdown":
+def list_config_paths(current_time: str) -> "gr.Dropdown":
+    r"""
+    Lists all the saved configuration files.
+    """
+    config_files = ["{}.yaml".format(current_time)]
+    if os.path.isdir(DEFAULT_CONFIG_DIR):
+        for file_name in os.listdir(DEFAULT_CONFIG_DIR):
+            if file_name.endswith(".yaml"):
+                config_files.append(file_name)
+
+    return gr.Dropdown(choices=config_files)
+
+
+def list_output_dirs(model_name: str, finetuning_type: str, current_time: str) -> "gr.Dropdown":
     r"""
     Lists all the directories that can resume from.
     """
-    output_dirs = [initial_dir]
+    output_dirs = ["train_{}".format(current_time)]
     if model_name:
         save_dir = get_save_dir(model_name, finetuning_type)
         if save_dir and os.path.isdir(save_dir):
@@ -190,18 +203,6 @@ def list_output_dirs(model_name: str, finetuning_type: str, initial_dir: str) ->
     return gr.Dropdown(choices=output_dirs)
 
 
-def list_config_paths() -> "gr.Dropdown":
-    """
-    Lists all the saved configuration files that can be loaded.
-    """
-    if os.path.exists(DEFAULT_CONFIG_DIR) and os.path.isdir(DEFAULT_CONFIG_DIR):
-        config_files = [file_name for file_name in os.listdir(DEFAULT_CONFIG_DIR) if file_name.endswith(".yaml")]
-    else:
-        config_files = []
-
-    return gr.Dropdown(choices=config_files)
-
-
 def check_output_dir(lang: str, model_name: str, finetuning_type: str, output_dir: str) -> None:
     r"""
     Check if output dir exists.

From 8d9f3022d2b8832024d0755a636a6330a6153dc9 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 03:42:50 +0800
Subject: [PATCH 078/162] add codestral 22B

Former-commit-id: b011c7f527a57cb1d21c4e2c9631c2fb62bb835e
---
 src/llamafactory/extras/constants.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 4d7685c5..687e16cc 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -301,6 +301,16 @@ register_model_group(
 )
 
 
+register_model_group(
+    models={
+        "Codestral-22B-v0.1-Chat": {
+            DownloadSource.DEFAULT: "mistralai/Codestral-22B-v0.1",
+        },
+    },
+    template="mistral",
+)
+
+
 register_model_group(
     models={
         "CommandR-35B-Chat": {

From 990dd6d44c6c20e256e9e298b06fbbbcad9b0464 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 03:53:28 +0800
Subject: [PATCH 079/162] lora modules: all by default

Former-commit-id: 52c4ae87c7f4312704c31ef26b079b2c5b95ea5f
---
 README.md                                     | 56 ++++++++---------
 README_zh.md                                  | 60 +++++++++----------
 .../extras/fsdp_qlora/llama3_lora_sft.yaml    |  2 +-
 examples/extras/loraplus/llama3_lora_sft.yaml |  2 +-
 examples/lora_multi_gpu/llama3_lora_sft.yaml  |  2 +-
 .../lora_multi_gpu/llama3_lora_sft_ds.yaml    |  2 +-
 .../lora_multi_npu/llama3_lora_sft_ds.yaml    |  2 +-
 examples/lora_single_gpu/llama3_lora_dpo.yaml |  2 +-
 examples/lora_single_gpu/llama3_lora_kto.yaml |  2 +-
 examples/lora_single_gpu/llama3_lora_ppo.yaml |  2 +-
 .../lora_single_gpu/llama3_lora_pretrain.yaml |  2 +-
 .../lora_single_gpu/llama3_lora_reward.yaml   |  2 +-
 examples/lora_single_gpu/llama3_lora_sft.yaml |  2 +-
 .../lora_single_gpu/llama3_preprocess.yaml    |  2 +-
 .../lora_single_gpu/llava1_5_lora_sft.yaml    |  2 +-
 .../llama3_lora_sft_aqlm.yaml                 |  2 +-
 .../qlora_single_gpu/llama3_lora_sft_awq.yaml |  2 +-
 .../llama3_lora_sft_bitsandbytes.yaml         |  2 +-
 .../llama3_lora_sft_gptq.yaml                 |  2 +-
 src/llamafactory/extras/constants.py          | 19 ------
 src/llamafactory/hparams/finetuning_args.py   | 15 +----
 src/llamafactory/webui/common.py              |  8 ---
 src/llamafactory/webui/runner.py              |  4 +-
 23 files changed, 78 insertions(+), 118 deletions(-)

diff --git a/README.md b/README.md
index 3eebf355..f3ced20e 100644
--- a/README.md
+++ b/README.md
@@ -149,34 +149,34 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 
 ## Supported Models
 
-| Model                                                    | Model size                       | Default module    | Template  |
-| -------------------------------------------------------- | -------------------------------- | ----------------- | --------- |
-| [Baichuan2](https://huggingface.co/baichuan-inc)         | 7B/13B                           | W_pack            | baichuan2 |
-| [BLOOM](https://huggingface.co/bigscience)               | 560M/1.1B/1.7B/3B/7.1B/176B      | query_key_value   | -         |
-| [BLOOMZ](https://huggingface.co/bigscience)              | 560M/1.1B/1.7B/3B/7.1B/176B      | query_key_value   | -         |
-| [ChatGLM3](https://huggingface.co/THUDM)                 | 6B                               | query_key_value   | chatglm3  |
-| [Command-R](https://huggingface.co/CohereForAI)          | 35B/104B                         | q_proj,v_proj     | cohere    |
-| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai)     | 7B/16B/67B/236B                  | q_proj,v_proj     | deepseek  |
-| [Falcon](https://huggingface.co/tiiuae)                  | 7B/11B/40B/180B                  | query_key_value   | falcon    |
-| [Gemma/CodeGemma](https://huggingface.co/google)         | 2B/7B                            | q_proj,v_proj     | gemma     |
-| [GLM4](https://huggingface.co/THUDM)                     | 9B                               | query_key_value   | glm4      |
-| [InternLM2](https://huggingface.co/internlm)             | 7B/20B                           | wqkv              | intern2   |
-| [LLaMA](https://github.com/facebookresearch/llama)       | 7B/13B/33B/65B                   | q_proj,v_proj     | -         |
-| [LLaMA-2](https://huggingface.co/meta-llama)             | 7B/13B/70B                       | q_proj,v_proj     | llama2    |
-| [LLaMA-3](https://huggingface.co/meta-llama)             | 8B/70B                           | q_proj,v_proj     | llama3    |
-| [LLaVA-1.5](https://huggingface.co/llava-hf)             | 7B/13B                           | q_proj,v_proj     | vicuna    |
-| [Mistral/Mixtral](https://huggingface.co/mistralai)      | 7B/8x7B/8x22B                    | q_proj,v_proj     | mistral   |
-| [OLMo](https://huggingface.co/allenai)                   | 1B/7B                            | q_proj,v_proj     | -         |
-| [PaliGemma](https://huggingface.co/google)               | 3B                               | q_proj,v_proj     | gemma     |
-| [Phi-1.5/2](https://huggingface.co/microsoft)            | 1.3B/2.7B                        | q_proj,v_proj     | -         |
-| [Phi-3](https://huggingface.co/microsoft)                | 4B/7B/14B                        | qkv_proj          | phi       |
-| [Qwen](https://huggingface.co/Qwen)                      | 1.8B/7B/14B/72B                  | c_attn            | qwen      |
-| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen)        | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | q_proj,v_proj     | qwen      |
-| [StarCoder2](https://huggingface.co/bigcode)             | 3B/7B/15B                        | q_proj,v_proj     | -         |
-| [XVERSE](https://huggingface.co/xverse)                  | 7B/13B/65B                       | q_proj,v_proj     | xverse    |
-| [Yi (1/1.5)](https://huggingface.co/01-ai)               | 6B/9B/34B                        | q_proj,v_proj     | yi        |
-| [Yi-VL](https://huggingface.co/01-ai)                    | 6B/34B                           | q_proj,v_proj     | yi_vl     |
-| [Yuan](https://huggingface.co/IEITYuan)                  | 2B/51B/102B                      | q_proj,v_proj     | yuan      |
+| Model                                                    | Model size                       | Template  |
+| -------------------------------------------------------- | -------------------------------- | --------- |
+| [Baichuan2](https://huggingface.co/baichuan-inc)         | 7B/13B                           | baichuan2 |
+| [BLOOM](https://huggingface.co/bigscience)               | 560M/1.1B/1.7B/3B/7.1B/176B      | -         |
+| [BLOOMZ](https://huggingface.co/bigscience)              | 560M/1.1B/1.7B/3B/7.1B/176B      | -         |
+| [ChatGLM3](https://huggingface.co/THUDM)                 | 6B                               | chatglm3  |
+| [Command-R](https://huggingface.co/CohereForAI)          | 35B/104B                         | cohere    |
+| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai)     | 7B/16B/67B/236B                  | deepseek  |
+| [Falcon](https://huggingface.co/tiiuae)                  | 7B/11B/40B/180B                  | falcon    |
+| [Gemma/CodeGemma](https://huggingface.co/google)         | 2B/7B                            | gemma     |
+| [GLM4](https://huggingface.co/THUDM)                     | 9B                               | glm4      |
+| [InternLM2](https://huggingface.co/internlm)             | 7B/20B                           | intern2   |
+| [LLaMA](https://github.com/facebookresearch/llama)       | 7B/13B/33B/65B                   | -         |
+| [LLaMA-2](https://huggingface.co/meta-llama)             | 7B/13B/70B                       | llama2    |
+| [LLaMA-3](https://huggingface.co/meta-llama)             | 8B/70B                           | llama3    |
+| [LLaVA-1.5](https://huggingface.co/llava-hf)             | 7B/13B                           | vicuna    |
+| [Mistral/Mixtral](https://huggingface.co/mistralai)      | 7B/8x7B/8x22B                    | mistral   |
+| [OLMo](https://huggingface.co/allenai)                   | 1B/7B                            | -         |
+| [PaliGemma](https://huggingface.co/google)               | 3B                               | gemma     |
+| [Phi-1.5/2](https://huggingface.co/microsoft)            | 1.3B/2.7B                        | -         |
+| [Phi-3](https://huggingface.co/microsoft)                | 4B/7B/14B                        | phi       |
+| [Qwen](https://huggingface.co/Qwen)                      | 1.8B/7B/14B/72B                  | qwen      |
+| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen)        | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen      |
+| [StarCoder2](https://huggingface.co/bigcode)             | 3B/7B/15B                        | -         |
+| [XVERSE](https://huggingface.co/xverse)                  | 7B/13B/65B                       | xverse    |
+| [Yi (1/1.5)](https://huggingface.co/01-ai)               | 6B/9B/34B                        | yi        |
+| [Yi-VL](https://huggingface.co/01-ai)                    | 6B/34B                           | yi_vl     |
+| [Yuan](https://huggingface.co/IEITYuan)                  | 2B/51B/102B                      | yuan      |
 
 > [!NOTE]
 > **Default module** is used for the `lora_target` argument, you can use `lora_target: all` to specify all the available modules for better convergence.
diff --git a/README_zh.md b/README_zh.md
index 09a7f330..982c0123 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -149,41 +149,39 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 
 ## 模型
 
-| 模型名                                                   | 模型大小                          | 默认模块           | Template  |
-| -------------------------------------------------------- | -------------------------------- | ----------------- | --------- |
-| [Baichuan2](https://huggingface.co/baichuan-inc)         | 7B/13B                           | W_pack            | baichuan2 |
-| [BLOOM](https://huggingface.co/bigscience)               | 560M/1.1B/1.7B/3B/7.1B/176B      | query_key_value   | -         |
-| [BLOOMZ](https://huggingface.co/bigscience)              | 560M/1.1B/1.7B/3B/7.1B/176B      | query_key_value   | -         |
-| [ChatGLM3](https://huggingface.co/THUDM)                 | 6B                               | query_key_value   | chatglm3  |
-| [Command-R](https://huggingface.co/CohereForAI)          | 35B/104B                         | q_proj,v_proj     | cohere    |
-| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai)     | 7B/16B/67B/236B                  | q_proj,v_proj     | deepseek  |
-| [Falcon](https://huggingface.co/tiiuae)                  | 7B/11B/40B/180B                  | query_key_value   | falcon    |
-| [Gemma/CodeGemma](https://huggingface.co/google)         | 2B/7B                            | q_proj,v_proj     | gemma     |
-| [GLM4](https://huggingface.co/THUDM)                     | 9B                               | query_key_value   | glm4      |
-| [InternLM2](https://huggingface.co/internlm)             | 7B/20B                           | wqkv              | intern2   |
-| [LLaMA](https://github.com/facebookresearch/llama)       | 7B/13B/33B/65B                   | q_proj,v_proj     | -         |
-| [LLaMA-2](https://huggingface.co/meta-llama)             | 7B/13B/70B                       | q_proj,v_proj     | llama2    |
-| [LLaMA-3](https://huggingface.co/meta-llama)             | 8B/70B                           | q_proj,v_proj     | llama3    |
-| [LLaVA-1.5](https://huggingface.co/llava-hf)             | 7B/13B                           | q_proj,v_proj     | vicuna    |
-| [Mistral/Mixtral](https://huggingface.co/mistralai)      | 7B/8x7B/8x22B                    | q_proj,v_proj     | mistral   |
-| [OLMo](https://huggingface.co/allenai)                   | 1B/7B                            | q_proj,v_proj     | -         |
-| [PaliGemma](https://huggingface.co/google)               | 3B                               | q_proj,v_proj     | gemma     |
-| [Phi-1.5/2](https://huggingface.co/microsoft)            | 1.3B/2.7B                        | q_proj,v_proj     | -         |
-| [Phi-3](https://huggingface.co/microsoft)                | 4B/7B/14B                        | qkv_proj          | phi       |
-| [Qwen](https://huggingface.co/Qwen)                      | 1.8B/7B/14B/72B                  | c_attn            | qwen      |
-| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen)        | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | q_proj,v_proj     | qwen      |
-| [StarCoder2](https://huggingface.co/bigcode)             | 3B/7B/15B                        | q_proj,v_proj     | -         |
-| [XVERSE](https://huggingface.co/xverse)                  | 7B/13B/65B                       | q_proj,v_proj     | xverse    |
-| [Yi (1/1.5)](https://huggingface.co/01-ai)               | 6B/9B/34B                        | q_proj,v_proj     | yi        |
-| [Yi-VL](https://huggingface.co/01-ai)                    | 6B/34B                           | q_proj,v_proj     | yi_vl     |
-| [Yuan](https://huggingface.co/IEITYuan)                  | 2B/51B/102B                      | q_proj,v_proj     | yuan      |
+| 模型名                                                   | 模型大小                          | Template  |
+| -------------------------------------------------------- | -------------------------------- | --------- |
+| [Baichuan2](https://huggingface.co/baichuan-inc)         | 7B/13B                           | baichuan2 |
+| [BLOOM](https://huggingface.co/bigscience)               | 560M/1.1B/1.7B/3B/7.1B/176B      | -         |
+| [BLOOMZ](https://huggingface.co/bigscience)              | 560M/1.1B/1.7B/3B/7.1B/176B      | -         |
+| [ChatGLM3](https://huggingface.co/THUDM)                 | 6B                               | chatglm3  |
+| [Command-R](https://huggingface.co/CohereForAI)          | 35B/104B                         | cohere    |
+| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai)     | 7B/16B/67B/236B                  | deepseek  |
+| [Falcon](https://huggingface.co/tiiuae)                  | 7B/11B/40B/180B                  | falcon    |
+| [Gemma/CodeGemma](https://huggingface.co/google)         | 2B/7B                            | gemma     |
+| [GLM4](https://huggingface.co/THUDM)                     | 9B                               | glm4      |
+| [InternLM2](https://huggingface.co/internlm)             | 7B/20B                           | intern2   |
+| [LLaMA](https://github.com/facebookresearch/llama)       | 7B/13B/33B/65B                   | -         |
+| [LLaMA-2](https://huggingface.co/meta-llama)             | 7B/13B/70B                       | llama2    |
+| [LLaMA-3](https://huggingface.co/meta-llama)             | 8B/70B                           | llama3    |
+| [LLaVA-1.5](https://huggingface.co/llava-hf)             | 7B/13B                           | vicuna    |
+| [Mistral/Mixtral](https://huggingface.co/mistralai)      | 7B/8x7B/8x22B                    | mistral   |
+| [OLMo](https://huggingface.co/allenai)                   | 1B/7B                            | -         |
+| [PaliGemma](https://huggingface.co/google)               | 3B                               | gemma     |
+| [Phi-1.5/2](https://huggingface.co/microsoft)            | 1.3B/2.7B                        | -         |
+| [Phi-3](https://huggingface.co/microsoft)                | 4B/7B/14B                        | phi       |
+| [Qwen](https://huggingface.co/Qwen)                      | 1.8B/7B/14B/72B                  | qwen      |
+| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen)        | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen      |
+| [StarCoder2](https://huggingface.co/bigcode)             | 3B/7B/15B                        | -         |
+| [XVERSE](https://huggingface.co/xverse)                  | 7B/13B/65B                       | xverse    |
+| [Yi (1/1.5)](https://huggingface.co/01-ai)               | 6B/9B/34B                        | yi        |
+| [Yi-VL](https://huggingface.co/01-ai)                    | 6B/34B                           | yi_vl     |
+| [Yuan](https://huggingface.co/IEITYuan)                  | 2B/51B/102B                      | yuan      |
 
 > [!NOTE]
-> **默认模块**应作为 `lora_target` 参数的默认值，可使用 `lora_target: all` 参数指定全部模块以取得更好的效果。
->
 > 对于所有“基座”（Base）模型，`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”（Instruct/Chat）模型请务必使用**对应的模板**。
 >
-> 请务必在训练和推理时使用**完全一致**的模板。
+> 请务必在训练和推理时采用**完全一致**的模板。
 
 项目所支持模型的完整列表请参阅 [constants.py](src/llamafactory/extras/constants.py)。
 
diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
index 348459b8..084269ef 100644
--- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
+++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
@@ -6,7 +6,7 @@ quantization_bit: 4
 stage: sft
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### ddp
 ddp_timeout: 180000000
diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml
index 960f613e..1ba654ec 100644
--- a/examples/extras/loraplus/llama3_lora_sft.yaml
+++ b/examples/extras/loraplus/llama3_lora_sft.yaml
@@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 stage: sft
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 loraplus_lr_ratio: 16.0
 
 ### dataset
diff --git a/examples/lora_multi_gpu/llama3_lora_sft.yaml b/examples/lora_multi_gpu/llama3_lora_sft.yaml
index 9be3c780..348e53b9 100644
--- a/examples/lora_multi_gpu/llama3_lora_sft.yaml
+++ b/examples/lora_multi_gpu/llama3_lora_sft.yaml
@@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 stage: sft
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### ddp
 ddp_timeout: 180000000
diff --git a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
index 41152243..1c432fa7 100644
--- a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
+++ b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
@@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 stage: sft
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### ddp
 ddp_timeout: 180000000
diff --git a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml b/examples/lora_multi_npu/llama3_lora_sft_ds.yaml
index 1ed24d04..a0ec8aa1 100644
--- a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml
+++ b/examples/lora_multi_npu/llama3_lora_sft_ds.yaml
@@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 stage: sft
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### ddp
 ddp_timeout: 180000000
diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/lora_single_gpu/llama3_lora_dpo.yaml
index 158c9e04..78344330 100644
--- a/examples/lora_single_gpu/llama3_lora_dpo.yaml
+++ b/examples/lora_single_gpu/llama3_lora_dpo.yaml
@@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 stage: dpo
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 pref_beta: 0.1
 pref_loss: sigmoid  # [sigmoid (dpo), orpo, simpo]
 
diff --git a/examples/lora_single_gpu/llama3_lora_kto.yaml b/examples/lora_single_gpu/llama3_lora_kto.yaml
index ead221e9..d5234c0a 100644
--- a/examples/lora_single_gpu/llama3_lora_kto.yaml
+++ b/examples/lora_single_gpu/llama3_lora_kto.yaml
@@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 stage: kto
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### dataset
 dataset: kto_en_demo
diff --git a/examples/lora_single_gpu/llama3_lora_ppo.yaml b/examples/lora_single_gpu/llama3_lora_ppo.yaml
index 19e7ccb3..98c842f9 100644
--- a/examples/lora_single_gpu/llama3_lora_ppo.yaml
+++ b/examples/lora_single_gpu/llama3_lora_ppo.yaml
@@ -6,7 +6,7 @@ reward_model: saves/llama3-8b/lora/reward
 stage: ppo
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### dataset
 dataset: identity,alpaca_en_demo
diff --git a/examples/lora_single_gpu/llama3_lora_pretrain.yaml b/examples/lora_single_gpu/llama3_lora_pretrain.yaml
index 9167a893..db435ca9 100644
--- a/examples/lora_single_gpu/llama3_lora_pretrain.yaml
+++ b/examples/lora_single_gpu/llama3_lora_pretrain.yaml
@@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 stage: pt
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### dataset
 dataset: c4_demo
diff --git a/examples/lora_single_gpu/llama3_lora_reward.yaml b/examples/lora_single_gpu/llama3_lora_reward.yaml
index 91663057..1ce42ea4 100644
--- a/examples/lora_single_gpu/llama3_lora_reward.yaml
+++ b/examples/lora_single_gpu/llama3_lora_reward.yaml
@@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 stage: rm
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### dataset
 dataset: dpo_en_demo
diff --git a/examples/lora_single_gpu/llama3_lora_sft.yaml b/examples/lora_single_gpu/llama3_lora_sft.yaml
index cc93d05a..651b636f 100644
--- a/examples/lora_single_gpu/llama3_lora_sft.yaml
+++ b/examples/lora_single_gpu/llama3_lora_sft.yaml
@@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 stage: sft
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### dataset
 dataset: identity,alpaca_en_demo
diff --git a/examples/lora_single_gpu/llama3_preprocess.yaml b/examples/lora_single_gpu/llama3_preprocess.yaml
index 86dad37b..34bb9efc 100644
--- a/examples/lora_single_gpu/llama3_preprocess.yaml
+++ b/examples/lora_single_gpu/llama3_preprocess.yaml
@@ -5,7 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 stage: sft
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### dataset
 dataset: identity,alpaca_en_demo
diff --git a/examples/lora_single_gpu/llava1_5_lora_sft.yaml b/examples/lora_single_gpu/llava1_5_lora_sft.yaml
index 95c1d40d..df510a93 100644
--- a/examples/lora_single_gpu/llava1_5_lora_sft.yaml
+++ b/examples/lora_single_gpu/llava1_5_lora_sft.yaml
@@ -6,7 +6,7 @@ visual_inputs: true
 stage: sft
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### dataset
 dataset: mllm_demo
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
index 23301de5..d54d6af6 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
@@ -5,7 +5,7 @@ model_name_or_path: ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16
 stage: sft
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### dataset
 dataset: identity,alpaca_en_demo
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
index 40a290a3..5cef178a 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
@@ -5,7 +5,7 @@ model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-AWQ
 stage: sft
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### dataset
 dataset: identity,alpaca_en_demo
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
index 6652d8cf..b308dcab 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
@@ -6,7 +6,7 @@ quantization_bit: 4
 stage: sft
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### dataset
 dataset: identity,alpaca_en_demo
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
index 323ea7c6..b950042e 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
@@ -5,7 +5,7 @@ model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ
 stage: sft
 do_train: true
 finetuning_type: lora
-lora_target: q_proj,v_proj
+lora_target: all
 
 ### dataset
 dataset: identity,alpaca_en_demo
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 687e16cc..4d9cb26d 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -20,8 +20,6 @@ CHOICES = ["A", "B", "C", "D"]
 
 DATA_CONFIG = "dataset_info.json"
 
-DEFAULT_MODULE = defaultdict(str)
-
 DEFAULT_TEMPLATE = defaultdict(str)
 
 FILEEXT2TYPE = {
@@ -80,7 +78,6 @@ class DownloadSource(str, Enum):
 
 def register_model_group(
     models: Dict[str, Dict[DownloadSource, str]],
-    module: Optional[str] = None,
     template: Optional[str] = None,
     vision: bool = False,
 ) -> None:
@@ -91,8 +88,6 @@ def register_model_group(
         else:
             assert prefix == name.split("-")[0], "prefix should be identical."
         SUPPORTED_MODELS[name] = path
-    if module is not None:
-        DEFAULT_MODULE[prefix] = module
     if template is not None:
         DEFAULT_TEMPLATE[prefix] = template
     if vision:
@@ -127,7 +122,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan-13B-Chat",
         },
     },
-    module="W_pack",
     template="baichuan",
 )
 
@@ -151,7 +145,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Chat",
         },
     },
-    module="W_pack",
     template="baichuan2",
 )
 
@@ -171,7 +164,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-7b1",
         },
     },
-    module="query_key_value",
 )
 
 
@@ -190,7 +182,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-7b1-mt",
         },
     },
-    module="query_key_value",
 )
 
 
@@ -229,7 +220,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "ZhipuAI/chatglm2-6b",
         }
     },
-    module="query_key_value",
     template="chatglm2",
 )
 
@@ -245,7 +235,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "ZhipuAI/chatglm3-6b",
         },
     },
-    module="query_key_value",
     template="chatglm3",
 )
 
@@ -344,7 +333,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "AI-ModelScope/dbrx-instruct",
         },
     },
-    module="Wqkv",
     template="dbrx",
 )
 
@@ -463,7 +451,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "modelscope/falcon-180B-chat",
         },
     },
-    module="query_key_value",
     template="falcon",
 )
 
@@ -512,7 +499,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat-1m",
         },
     },
-    module="query_key_value",
     template="glm4",
 )
 
@@ -559,7 +545,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2-chat-20b",
         },
     },
-    module="wqkv",
     template="intern2",
 )
 
@@ -581,7 +566,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "DeepLang/LingoWhale-8B",
         }
     },
-    module="qkv_proj",
 )
 
 
@@ -868,7 +852,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "LLM-Research/Phi-3-medium-128k-instruct",
         },
     },
-    module="qkv_proj",
     template="phi",
 )
 
@@ -940,7 +923,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat-Int4",
         },
     },
-    module="c_attn",
     template="qwen",
 )
 
@@ -1153,7 +1135,6 @@ register_model_group(
             DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B-v2",
         },
     },
-    module="query,key_value",
     template="telechat",
 )
 
diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py
index b9322f18..08af31e4 100644
--- a/src/llamafactory/hparams/finetuning_args.py
+++ b/src/llamafactory/hparams/finetuning_args.py
@@ -24,12 +24,7 @@ class FreezeArguments:
             "help": (
                 "Name(s) of trainable modules for freeze (partial-parameter) fine-tuning. "
                 "Use commas to separate multiple modules. "
-                "Use `all` to specify all the available modules. "
-                "LLaMA choices: [`mlp`, `self_attn`], "
-                "BLOOM & Falcon & ChatGLM choices: [`mlp`, `self_attention`], "
-                "Qwen choices: [`mlp`, `attn`], "
-                "InternLM2 choices: [`feed_forward`, `attention`], "
-                "Others choices: the same as LLaMA."
+                "Use `all` to specify all the available modules."
             )
         },
     )
@@ -79,13 +74,7 @@ class LoraArguments:
             "help": (
                 "Name(s) of target modules to apply LoRA. "
                 "Use commas to separate multiple modules. "
-                "Use `all` to specify all the linear modules. "
-                "LLaMA choices: [`q_proj`, `k_proj`, `v_proj`, `o_proj`, `gate_proj`, `up_proj`, `down_proj`], "
-                "BLOOM & Falcon & ChatGLM choices: [`query_key_value`, `dense`, `dense_h_to_4h`, `dense_4h_to_h`], "
-                "Baichuan choices: [`W_pack`, `o_proj`, `gate_proj`, `up_proj`, `down_proj`], "
-                "Qwen choices: [`c_attn`, `attn.c_proj`, `w1`, `w2`, `mlp.c_proj`], "
-                "InternLM2 choices: [`wqkv`, `wo`, `w1`, `w2`, `w3`], "
-                "Others choices: the same as LLaMA."
+                "Use `all` to specify all the linear modules."
             )
         },
     )
diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py
index 62004bce..304b56a5 100644
--- a/src/llamafactory/webui/common.py
+++ b/src/llamafactory/webui/common.py
@@ -8,7 +8,6 @@ from yaml import safe_dump, safe_load
 from ..extras.constants import (
     CHECKPOINT_NAMES,
     DATA_CONFIG,
-    DEFAULT_MODULE,
     DEFAULT_TEMPLATE,
     PEFT_METHODS,
     STAGES_USE_PAIR_DATA,
@@ -118,13 +117,6 @@ def get_model_info(model_name: str) -> Tuple[str, str, bool]:
     return get_model_path(model_name), get_template(model_name), get_visual(model_name)
 
 
-def get_module(model_name: str) -> str:
-    r"""
-    Gets the LoRA modules of this model.
-    """
-    return DEFAULT_MODULE.get(get_prefix(model_name), "all")
-
-
 def get_template(model_name: str) -> str:
     r"""
     Gets the template name if the model is a chat model.
diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index 4ec1531a..e8fdd129 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -8,7 +8,7 @@ from transformers.trainer import TRAINING_ARGS_NAME
 from ..extras.constants import PEFT_METHODS, TRAINING_STAGES
 from ..extras.misc import is_gpu_or_npu_available, torch_gc
 from ..extras.packages import is_gradio_available
-from .common import DEFAULT_CACHE_DIR, get_module, get_save_dir, load_config
+from .common import DEFAULT_CACHE_DIR, get_save_dir, load_config
 from .locales import ALERTS
 from .utils import abort_leaf_process, gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd
 
@@ -159,7 +159,7 @@ class Runner:
             args["create_new_adapter"] = get("train.create_new_adapter")
             args["use_rslora"] = get("train.use_rslora")
             args["use_dora"] = get("train.use_dora")
-            args["lora_target"] = get("train.lora_target") or get_module(model_name)
+            args["lora_target"] = get("train.lora_target") or "all"
             args["additional_target"] = get("train.additional_target") or None
 
             if args["use_llama_pro"]:

From ed8dc92e3983f3ddd1e1b71ed3848e76bc102400 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 16:25:42 +0800
Subject: [PATCH 080/162] update readme

Former-commit-id: fb1f709af5199976e63d7188e088e33c75d19bfe
---
 README.md    | 20 +++++++++-----------
 README_zh.md | 18 +++++++++---------
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index f3ced20e..1f9ff5a2 100644
--- a/README.md
+++ b/README.md
@@ -179,8 +179,6 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [Yuan](https://huggingface.co/IEITYuan)                  | 2B/51B/102B                      | yuan      |
 
 > [!NOTE]
-> **Default module** is used for the `lora_target` argument, you can use `lora_target: all` to specify all the available modules for better convergence.
->
 > For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models.
 >
 > Remember to use the **SAME** template in training and inference.
@@ -291,21 +289,21 @@ huggingface-cli login
 
 | Mandatory    | Minimum | Recommend |
 | ------------ | ------- | --------- |
-| python       | 3.8     | 3.10      |
-| torch        | 1.13.1  | 2.2.0     |
-| transformers | 4.37.2  | 4.41.0    |
-| datasets     | 2.14.3  | 2.19.1    |
-| accelerate   | 0.27.2  | 0.30.1    |
-| peft         | 0.9.0   | 0.11.1    |
-| trl          | 0.8.2   | 0.8.6     |
+| python       | 3.8     | 3.11      |
+| torch        | 1.13.1  | 2.3.0     |
+| transformers | 4.41.2  | 4.41.2    |
+| datasets     | 2.16.0  | 2.19.2    |
+| accelerate   | 0.30.1  | 0.30.1    |
+| peft         | 0.11.1  | 0.11.1    |
+| trl          | 0.9.3   | 0.9.3     |
 
 | Optional     | Minimum | Recommend |
 | ------------ | ------- | --------- |
 | CUDA         | 11.6    | 12.2      |
 | deepspeed    | 0.10.0  | 0.14.0    |
 | bitsandbytes | 0.39.0  | 0.43.1    |
-| vllm         | 0.4.0   | 0.4.2     |
-| flash-attn   | 2.3.0   | 2.5.8     |
+| vllm         | 0.4.3   | 0.4.3     |
+| flash-attn   | 2.3.0   | 2.5.9     |
 
 ### Hardware Requirement
 
diff --git a/README_zh.md b/README_zh.md
index 982c0123..8422e667 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -289,21 +289,21 @@ huggingface-cli login
 
 | 必需项       | 至少     | 推荐      |
 | ------------ | ------- | --------- |
-| python       | 3.8     | 3.10      |
-| torch        | 1.13.1  | 2.2.0     |
-| transformers | 4.37.2  | 4.41.0    |
-| datasets     | 2.14.3  | 2.19.1    |
-| accelerate   | 0.27.2  | 0.30.1    |
-| peft         | 0.9.0   | 0.11.1    |
-| trl          | 0.8.2   | 0.8.6     |
+| python       | 3.8     | 3.11      |
+| torch        | 1.13.1  | 2.3.0     |
+| transformers | 4.41.2  | 4.41.2    |
+| datasets     | 2.16.0  | 2.19.2    |
+| accelerate   | 0.30.1  | 0.30.1    |
+| peft         | 0.11.1  | 0.11.1    |
+| trl          | 0.9.3   | 0.9.3     |
 
 | 可选项       | 至少     | 推荐      |
 | ------------ | ------- | --------- |
 | CUDA         | 11.6    | 12.2      |
 | deepspeed    | 0.10.0  | 0.14.0    |
 | bitsandbytes | 0.39.0  | 0.43.1    |
-| vllm         | 0.4.0   | 0.4.2     |
-| flash-attn   | 2.3.0   | 2.5.8     |
+| vllm         | 0.4.3   | 0.4.3     |
+| flash-attn   | 2.3.0   | 2.5.9     |
 
 ### 硬件依赖
 

From a4e1fcc881127f717440a2d7165c7339813e3c82 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 16:59:18 +0800
Subject: [PATCH 081/162] update readme

Former-commit-id: cc331fa2d28afe081937c50ea83d63add21d4e3a
---
 README.md    | 4 ++--
 README_zh.md | 4 ++--
 setup.py     | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 1f9ff5a2..77684757 100644
--- a/README.md
+++ b/README.md
@@ -329,7 +329,7 @@ huggingface-cli login
 ```bash
 git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
 cd LLaMA-Factory
-pip install -e .[torch,metrics]
+pip install -e '.[torch,metrics]'
 ```
 
 Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality
@@ -353,7 +353,7 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec
 
 Join [NPU user group](assets/wechat_npu.jpg).
 
-To install LLaMA Factory on Ascend NPU devices, please specify extra dependencies: `pip install -e .[torch_npu,metrics]`. Additionally, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. Please follow the [installation tutorial](https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/softwareinstall/instg/atlasdeploy_03_0031.html) or use the following commands:
+To install LLaMA Factory on Ascend NPU devices, please specify extra dependencies: `pip install -e '.[torch-npu,metrics]'`. Additionally, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. Please follow the [installation tutorial](https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/softwareinstall/instg/atlasdeploy_03_0031.html) or use the following commands:
 
 ```bash
 # replace the url according to your CANN version and devices
diff --git a/README_zh.md b/README_zh.md
index 8422e667..da5ff079 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -329,7 +329,7 @@ huggingface-cli login
 ```bash
 git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
 cd LLaMA-Factory
-pip install -e .[torch,metrics]
+pip install -e '.[torch,metrics]'
 ```
 
 可选的额外依赖项：torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality
@@ -353,7 +353,7 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
 
 加入 [NPU 用户群](assets/wechat_npu.jpg)。
 
-在昇腾 NPU 设备上安装 LLaMA Factory 时，需要指定额外依赖项，使用 `pip install -e .[torch_npu,metrics]` 命令安装。此外，还需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**，安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令：
+在昇腾 NPU 设备上安装 LLaMA Factory 时，需要指定额外依赖项，使用 `pip install -e '.[torch-npu,metrics]'` 命令安装。此外，还需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**，安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令：
 
 ```bash
 # 请替换 URL 为 CANN 版本和设备型号对应的 URL
diff --git a/setup.py b/setup.py
index 7a5b9304..c32be8af 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@ def get_requires():
 
 extra_require = {
     "torch": ["torch>=1.13.1"],
-    "torch_npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"],
+    "torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"],
     "metrics": ["nltk", "jieba", "rouge-chinese"],
     "deepspeed": ["deepspeed>=0.10.0,<=0.14.0"],
     "bitsandbytes": ["bitsandbytes>=0.39.0"],

From e963a470c1a4c67ab59a3b118770f1f6b01f2a65 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 17:29:19 +0800
Subject: [PATCH 082/162] fix base64 image read #4061

Former-commit-id: 66ccb2a27a04296b4600f2c85f428071bf14eeb0
---
 src/llamafactory/api/chat.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py
index 712b6940..50892a54 100644
--- a/src/llamafactory/api/chat.py
+++ b/src/llamafactory/api/chat.py
@@ -1,3 +1,5 @@
+import base64
+import io
 import json
 import os
 import uuid
@@ -83,9 +85,12 @@ def _process_request(
                     input_messages.append({"role": ROLE_MAPPING[message.role], "content": input_item.text})
                 else:
                     image_url = input_item.image_url.url
-                    if os.path.isfile(image_url):
+                    if image_url.startswith("data:image"):  # base64 image
+                        image_data = base64.b64decode(image_url.split(",", maxsplit=1)[1])
+                        image_path = io.BytesIO(image_data)
+                    elif os.path.isfile(image_url):  # local file
                         image_path = open(image_url, "rb")
-                    else:
+                    else:  # web uri
                         image_path = requests.get(image_url, stream=True).raw
 
                     image = Image.open(image_path).convert("RGB")

From 67246f52f2f9848003e7154b286455d05bed6cdc Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 18:45:49 +0800
Subject: [PATCH 083/162] update trainers

Former-commit-id: b7f6c4a171293cf4f3e88f15a811f847342f84ee
---
 src/llamafactory/api/chat.py          |  7 +++++--
 src/llamafactory/train/dpo/trainer.py |  8 +-------
 src/llamafactory/train/kto/trainer.py | 11 ++---------
 src/llamafactory/train/ppo/utils.py   |  7 ++++---
 4 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py
index 50892a54..98957bc1 100644
--- a/src/llamafactory/api/chat.py
+++ b/src/llamafactory/api/chat.py
@@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, AsyncGenerator, Dict, List, Optional, Tuple
 
 from ..data import Role as DataRole
 from ..extras.logging import get_logger
-from ..extras.packages import is_fastapi_available, is_pillow_available
+from ..extras.packages import is_fastapi_available, is_pillow_available, is_requests_available
 from .common import dictify, jsonify
 from .protocol import (
     ChatCompletionMessage,
@@ -29,10 +29,13 @@ if is_fastapi_available():
 
 
 if is_pillow_available():
-    import requests
     from PIL import Image
 
 
+if is_requests_available():
+    import requests
+
+
 if TYPE_CHECKING:
     from numpy.typing import NDArray
 
diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py
index 2bbe6a06..6f1da34e 100644
--- a/src/llamafactory/train/dpo/trainer.py
+++ b/src/llamafactory/train/dpo/trainer.py
@@ -187,13 +187,7 @@ class CustomDPOTrainer(DPOTrainer):
             ref_context = nullcontext()
 
         with torch.no_grad(), ref_context:
-            (
-                reference_chosen_logps,
-                reference_rejected_logps,
-                _,
-                _,
-                _,
-            ) = self.concatenated_forward(ref_model, batch)
+            reference_chosen_logps, reference_rejected_logps, *_ = self.concatenated_forward(ref_model, batch)
 
         return reference_chosen_logps, reference_rejected_logps
 
diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py
index f29945f5..03cad5a7 100644
--- a/src/llamafactory/train/kto/trainer.py
+++ b/src/llamafactory/train/kto/trainer.py
@@ -146,15 +146,8 @@ class CustomKTOTrainer(KTOTrainer):
         if len(target_logps) != len(batch["kto_tags"]):
             raise ValueError("Mismatched shape of inputs and labels.")
 
-        chosen_idx = [i for i in range(len(target_logps)) if batch["kto_tags"][i]]
-        rejected_idx = [i for i in range(len(target_logps)) if not batch["kto_tags"][i]]
-
-        chosen_logps = target_logps[chosen_idx, ...]
-        rejected_logps = target_logps[rejected_idx, ...]
-
-        chosen_logits = target_logits[chosen_idx, ...]
-        rejected_logits = target_logits[rejected_idx, ...]
-
+        chosen_logps, rejected_logps = target_logps[batch["kto_tags"]], target_logps[~batch["kto_tags"]]
+        chosen_logits, rejected_logits = target_logits[batch["kto_tags"]], target_logits[~batch["kto_tags"]]
         return chosen_logps, rejected_logps, chosen_logits, rejected_logits, kl_logps
 
     def compute_reference_log_probs(
diff --git a/src/llamafactory/train/ppo/utils.py b/src/llamafactory/train/ppo/utils.py
index e6bdb89c..e5025581 100644
--- a/src/llamafactory/train/ppo/utils.py
+++ b/src/llamafactory/train/ppo/utils.py
@@ -8,13 +8,14 @@ from transformers.integrations import is_deepspeed_zero3_enabled
 from ...extras.packages import is_requests_available
 
 
+if is_requests_available():
+    import requests
+
+
 if TYPE_CHECKING:
     from transformers import PreTrainedModel
     from trl import AutoModelForCausalLMWithValueHead
 
-if is_requests_available():
-    import requests
-
 
 def get_rewards_from_server(server_url: str, messages: List[str]) -> List[torch.Tensor]:
     headers = {"Content-Type": "application/json"}

From 56a6db6d8446aadc143dccbd42c73d234e11bfb2 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 19:03:20 +0800
Subject: [PATCH 084/162] fix ppo dataset bug #4012

Former-commit-id: 7fc51b2e93698ae5e012566af8481f4d861c873d
---
 src/llamafactory/data/loader.py              | 2 +-
 src/llamafactory/data/preprocess.py          | 2 +-
 src/llamafactory/data/processors/pretrain.py | 2 +-
 src/llamafactory/train/ppo/workflow.py       | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py
index 7d013d27..859f9a93 100644
--- a/src/llamafactory/data/loader.py
+++ b/src/llamafactory/data/loader.py
@@ -130,7 +130,7 @@ def get_dataset(
     model_args: "ModelArguments",
     data_args: "DataArguments",
     training_args: "Seq2SeqTrainingArguments",
-    stage: Literal["pt", "sft", "rm", "kto"],
+    stage: Literal["pt", "sft", "rm", "ppo", "kto"],
     tokenizer: "PreTrainedTokenizer",
     processor: Optional["ProcessorMixin"] = None,
 ) -> Union["Dataset", "IterableDataset"]:
diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py
index 336257ca..97789c39 100644
--- a/src/llamafactory/data/preprocess.py
+++ b/src/llamafactory/data/preprocess.py
@@ -23,7 +23,7 @@ if TYPE_CHECKING:
 def get_preprocess_and_print_func(
     data_args: "DataArguments",
     training_args: "Seq2SeqTrainingArguments",
-    stage: Literal["pt", "sft", "rm", "kto"],
+    stage: Literal["pt", "sft", "rm", "ppo", "kto"],
     template: "Template",
     tokenizer: "PreTrainedTokenizer",
     processor: Optional["ProcessorMixin"],
diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py
index 3de0d1ac..87727b55 100644
--- a/src/llamafactory/data/processors/pretrain.py
+++ b/src/llamafactory/data/processors/pretrain.py
@@ -18,7 +18,7 @@ def preprocess_pretrain_dataset(
         if data_args.template == "gemma":
             text_examples = [tokenizer.bos_token + example for example in text_examples]
 
-        result = tokenizer(text_examples, add_special_tokens=False, max_length=data_args.cutoff_len)
+        result = tokenizer(text_examples, add_special_tokens=False, max_length=data_args.cutoff_len, truncation=True)
     else:
         tokenized_examples = tokenizer(text_examples, add_special_tokens=False)
         concatenated_examples = {k: list(chain(*tokenized_examples[k])) for k in tokenized_examples.keys()}
diff --git a/src/llamafactory/train/ppo/workflow.py b/src/llamafactory/train/ppo/workflow.py
index c4e05e57..4383bcdc 100644
--- a/src/llamafactory/train/ppo/workflow.py
+++ b/src/llamafactory/train/ppo/workflow.py
@@ -29,7 +29,7 @@ def run_ppo(
 ):
     tokenizer_module = load_tokenizer(model_args)
     tokenizer = tokenizer_module["tokenizer"]
-    dataset = get_dataset(model_args, data_args, training_args, stage="pt", **tokenizer_module)
+    dataset = get_dataset(model_args, data_args, training_args, stage="ppo", **tokenizer_module)
     model = load_model(tokenizer, model_args, finetuning_args, training_args.do_train, add_valuehead=True)
 
     tokenizer.padding_side = "left"  # use left-padding in generation while using right-padding in training

From d3a378ffea59d9f94e70a4e80d065f0aba1e0305 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 20:30:25 +0800
Subject: [PATCH 085/162] fix torch gc

Former-commit-id: e173799d057598e5692a407601c30d8ce1513461
---
 src/llamafactory/extras/misc.py           | 11 ++++++++---
 src/llamafactory/model/utils/embedding.py |  2 +-
 src/llamafactory/model/utils/valuehead.py |  9 +++++----
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py
index 78f71847..48476f9c 100644
--- a/src/llamafactory/extras/misc.py
+++ b/src/llamafactory/extras/misc.py
@@ -212,12 +212,17 @@ def has_tokenized_data(path: os.PathLike) -> bool:
 
 def torch_gc() -> None:
     r"""
-    Collects GPU memory.
+    Collects GPU or NPU memory.
     """
     gc.collect()
-    if torch.cuda.is_available():
+    if is_torch_xpu_available():
+        torch.xpu.empty_cache()
+    elif is_torch_npu_available():
+        torch.npu.empty_cache()
+    elif is_torch_mps_available():
+        torch.mps.empty_cache()
+    elif is_torch_cuda_available():
         torch.cuda.empty_cache()
-        torch.cuda.ipc_collect()
 
 
 def try_download_model_from_ms(model_args: "ModelArguments") -> str:
diff --git a/src/llamafactory/model/utils/embedding.py b/src/llamafactory/model/utils/embedding.py
index 357c9cc0..3d9278e3 100644
--- a/src/llamafactory/model/utils/embedding.py
+++ b/src/llamafactory/model/utils/embedding.py
@@ -15,7 +15,7 @@ if TYPE_CHECKING:
 logger = get_logger(__name__)
 
 
-def _noisy_mean_initialization(embed_weight: torch.Tensor, num_new_tokens: int) -> None:
+def _noisy_mean_initialization(embed_weight: "torch.Tensor", num_new_tokens: int) -> None:
     embedding_dim = embed_weight.size(1)
     avg_weight = embed_weight[:-num_new_tokens].mean(dim=0, keepdim=True)
     noise_weight = torch.empty_like(embed_weight[-num_new_tokens:])
diff --git a/src/llamafactory/model/utils/valuehead.py b/src/llamafactory/model/utils/valuehead.py
index d813729e..64333688 100644
--- a/src/llamafactory/model/utils/valuehead.py
+++ b/src/llamafactory/model/utils/valuehead.py
@@ -23,6 +23,7 @@ def load_valuehead_params(path_or_repo_id: str, model_args: "ModelArguments") ->
     Returns: dict with keys `v_head.summary.weight` and `v_head.summary.bias`.
     """
     kwargs = {"path_or_repo_id": path_or_repo_id, "cache_dir": model_args.cache_dir, "token": model_args.hf_hub_token}
+    err_text = ""
 
     try:
         from safetensors import safe_open
@@ -31,16 +32,16 @@ def load_valuehead_params(path_or_repo_id: str, model_args: "ModelArguments") ->
         with safe_open(vhead_file, framework="pt", device="cpu") as f:
             return {key: f.get_tensor(key) for key in f.keys()}
     except Exception as err:
-        logger.info("Failed to load {}: {}".format(V_HEAD_SAFE_WEIGHTS_NAME, str(err)))
+        err_text = str(err)
 
     try:
         vhead_file = cached_file(filename=V_HEAD_WEIGHTS_NAME, **kwargs)
         return torch.load(vhead_file, map_location="cpu")
     except Exception as err:
-        logger.info("Failed to load {}: {}".format(V_HEAD_WEIGHTS_NAME, str(err)))
+        err_text = str(err)
 
-    logger.info("Provided path ({}) does not contain value head weights.".format(path_or_repo_id))
-    logger.info("Ignore these messages if you are not resuming the training of a value head model.")
+    logger.info("Provided path ({}) does not contain value head weights: {}.".format(path_or_repo_id, err_text))
+    logger.info("Ignore the above message if you are not resuming the training of a value head model.")
     return None
 
 
From ee2c3601677f0360c934fc263406810ffaf9037e Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 23:30:07 +0800
Subject: [PATCH 086/162] fix ppo+zero3 #3108

Former-commit-id: 33a93cc29e3e57bf001515000c0a70c112573dea
---
 src/llamafactory/train/ppo/trainer.py | 91 ++++++++++++++-------------
 src/llamafactory/train/ppo/utils.py   | 36 ++++++-----
 2 files changed, 66 insertions(+), 61 deletions(-)

diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py
index 27353c72..b0c7e25d 100644
--- a/src/llamafactory/train/ppo/trainer.py
+++ b/src/llamafactory/train/ppo/trainer.py
@@ -2,9 +2,10 @@ import math
 import os
 import sys
 from types import MethodType
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
 
 import torch
+from accelerate.utils import DistributedDataParallelKwargs
 from tqdm import tqdm
 from transformers import GenerationConfig, Trainer, TrainerControl, TrainerState
 from transformers.optimization import get_scheduler
@@ -79,6 +80,13 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
             project_kwargs={"logging_dir": training_args.logging_dir},
         )
 
+        # Add deepspeed config
+        ppo_config.accelerator_kwargs["kwargs_handlers"] = [
+            DistributedDataParallelKwargs(find_unused_parameters=training_args.ddp_find_unused_parameters)
+        ]
+        if training_args.deepspeed_plugin is not None:
+            ppo_config.accelerator_kwargs["deepspeed_plugin"] = training_args.deepspeed_plugin
+
         # Create optimizer and scheduler
         if training_args.max_steps > 0:
             num_training_steps = training_args.max_steps
@@ -124,6 +132,12 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
         if self.args.max_steps > 0:
             logger.info("max_steps is given, it will override any value given in num_train_epochs")
 
+        unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model)
+        self.is_chatglm_model = getattr(unwrapped_model.config, "model_type", None) == "chatglm"
+
+        device_type = unwrapped_model.pretrained_model.device.type
+        self.amp_context = torch.autocast(device_type, dtype=model_args.compute_dtype)
+
         if finetuning_args.reward_model_type == "full":
             if self.is_deepspeed_enabled:
                 if not (
@@ -184,7 +198,6 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
             logger.info("  Total training steps = {}".format(max_steps))
             logger.info("  Number of trainable parameters = {}".format(count_parameters(self.model)[0]))
 
-        unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model)
         dataiter = iter(self.dataloader)
         loss_meter = AverageMeter()
         reward_meter = AverageMeter()
@@ -197,29 +210,21 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
                 dataiter = iter(self.dataloader)
                 batch = next(dataiter)
 
-            # Cast to inference mode
-            unwrapped_model.gradient_checkpointing_disable()
-            unwrapped_model.config.use_cache = True
-            self.model.eval()
-
             # Get inputs
+            self.model.eval()
             self.tokenizer.padding_side = "right"  # change padding side
             queries, responses, rewards = [], [], []
             for idx in range(0, self.config.batch_size, self.config.mini_batch_size):
                 mini_batch_queries, mini_batch_responses = self.get_inputs(
                     batch[idx : idx + self.config.mini_batch_size]
                 )
-                mini_batch_rewards = self.get_rewards(mini_batch_queries, mini_batch_responses, unwrapped_model)
+                mini_batch_rewards = self.get_rewards(mini_batch_queries, mini_batch_responses)
                 queries.extend(mini_batch_queries)
                 responses.extend(mini_batch_responses)
                 rewards.extend(mini_batch_rewards)
 
-            # Cast to training mode
-            unwrapped_model.gradient_checkpointing_enable()
-            unwrapped_model.config.use_cache = False
-            self.model.train()
-
             # Run PPO step
+            self.model.train()
             stats = self.step(queries, responses, rewards)
             self.tokenizer.padding_side = "left"  # restore padding side
             loss_meter.update(float(stats["ppo/loss/total"]), n=len(rewards))
@@ -311,25 +316,24 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
             getattr(self.processor, "image_processor").save_pretrained(output_dir)
 
     @torch.no_grad()
-    def get_inputs(self, batch: Dict[str, torch.Tensor]) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+    def get_inputs(self, batch: Dict[str, "torch.Tensor"]) -> Tuple[List["torch.Tensor"], List["torch.Tensor"]]:
         r"""
         Generates model's responses given queries.
         """
-        if self.model_args.upcast_layernorm:
-            layernorm_params = dump_layernorm(self.model)
-
         if batch["input_ids"].size(0) == 1:  # handle llama2 ppo with gradient accumulation > 1
             start_index = (batch["input_ids"][0] != self.tokenizer.pad_token_id).nonzero()[0].item()
             for k, v in batch.items():
                 batch[k] = v[:, start_index:]
 
         with unwrap_model_for_generation(self.model, self.accelerator) as unwrapped_model:
+            if self.model_args.upcast_layernorm:
+                layernorm_params = dump_layernorm(unwrapped_model)
+
             generate_output: torch.Tensor = unwrapped_model.generate(
                 generation_config=self.generation_config, logits_processor=get_logits_processor(), **batch
             )
-
-        if self.model_args.upcast_layernorm:
-            restore_layernorm(self.model, layernorm_params)
+            if self.model_args.upcast_layernorm:
+                restore_layernorm(unwrapped_model, layernorm_params)
 
         query = batch["input_ids"].detach().cpu()
         response = generate_output[:, batch["input_ids"].size(-1) :].detach().cpu()
@@ -351,10 +355,9 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
     @torch.no_grad()
     def get_rewards(
         self,
-        queries: List[torch.Tensor],
-        responses: List[torch.Tensor],
-        unwrapped_model: "AutoModelForCausalLMWithValueHead",
-    ) -> List[torch.Tensor]:
+        queries: List["torch.Tensor"],
+        responses: List["torch.Tensor"],
+    ) -> List["torch.Tensor"]:
         r"""
         Computes scores using given reward model.
 
@@ -365,18 +368,22 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
             messages = self.tokenizer.batch_decode(token_ids, skip_special_tokens=True)
             return get_rewards_from_server(self.reward_model, messages)
 
-        if self.finetuning_args.reward_model_type == "lora":
-            replace_model(unwrapped_model, target="reward")
-            reward_model = self.model
-        else:
-            reward_model = self.reward_model
-
         batch = self.prepare_model_inputs(queries, responses)
 
-        with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype):  # support bf16
-            _, _, values = reward_model(**batch, output_hidden_states=True, return_dict=True, use_cache=False)
+        with unwrap_model_for_generation(self.model, self.accelerator) as unwrapped_model:
+            if self.finetuning_args.reward_model_type == "lora":
+                replace_model(unwrapped_model, target="reward")
+                reward_model = self.model
+            else:
+                reward_model = self.reward_model
 
-        if getattr(unwrapped_model.config, "model_type", None) == "chatglm":  # assume same architecture
+            with self.amp_context:  # support bf16
+                _, _, values = reward_model(**batch, output_hidden_states=True, return_dict=True, use_cache=False)
+
+            if self.finetuning_args.reward_model_type == "lora":
+                replace_model(unwrapped_model, target="default")
+
+        if self.is_chatglm_model:  # assume same architecture
             values = torch.transpose(values, 0, 1)
 
         rewards = []
@@ -385,21 +392,18 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
             end_index = end_indexes[-1].item() if len(end_indexes) else 0
             rewards.append(values[i, end_index].float().detach().cpu())  # use fp32 type
 
-        if self.finetuning_args.reward_model_type == "lora":
-            replace_model(unwrapped_model, target="default")
-
         return rewards
 
     @PPODecorators.empty_device_cache()
     def batched_forward_pass(
         self,
         model: "AutoModelForCausalLMWithValueHead",
-        queries: torch.Tensor,
-        responses: torch.Tensor,
-        model_inputs: dict,
+        queries: "torch.Tensor",
+        responses: "torch.Tensor",
+        model_inputs: Dict[str, Any],
         return_logits: bool = False,
-        response_masks: Optional[torch.Tensor] = None,
-    ):
+        response_masks: Optional["torch.Tensor"] = None,
+    ) -> Tuple["torch.Tensor", Optional["torch.Tensor"], "torch.Tensor", "torch.Tensor"]:
         r"""
         Calculates model outputs in multiple batches.
 
@@ -421,11 +425,10 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
             input_ids = input_kwargs["input_ids"]
             attention_mask = input_kwargs["attention_mask"]
 
-            with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype):  # support bf16
+            with self.amp_context:  # support bf16
                 logits, _, values = model(**input_kwargs)
 
-            unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model)
-            if getattr(unwrapped_model.config, "model_type", None) == "chatglm":
+            if self.is_chatglm_model:
                 values = torch.transpose(values, 0, 1)
 
             logprobs = logprobs_from_logits(logits[:, :-1, :], input_ids[:, 1:])
diff --git a/src/llamafactory/train/ppo/utils.py b/src/llamafactory/train/ppo/utils.py
index e5025581..570409f2 100644
--- a/src/llamafactory/train/ppo/utils.py
+++ b/src/llamafactory/train/ppo/utils.py
@@ -1,9 +1,7 @@
 import json
-from contextlib import nullcontext
 from typing import TYPE_CHECKING, Dict, List, Literal, Optional
 
 import torch
-from transformers.integrations import is_deepspeed_zero3_enabled
 
 from ...extras.packages import is_requests_available
 
@@ -18,6 +16,9 @@ if TYPE_CHECKING:
 
 
 def get_rewards_from_server(server_url: str, messages: List[str]) -> List[torch.Tensor]:
+    r"""
+    Gets reward scores from the API server.
+    """
     headers = {"Content-Type": "application/json"}
     payload = {"model": "model", "messages": messages}
     response = requests.post(server_url, json=payload, headers=headers)
@@ -26,25 +27,23 @@ def get_rewards_from_server(server_url: str, messages: List[str]) -> List[torch.
 
 
 def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["default", "reward"]) -> None:
-    if is_deepspeed_zero3_enabled():
-        import deepspeed  # type: ignore
+    r"""
+    Replaces the default/reward modules in the model. The model is already unwrapped (and gathered).
+    """
+    if target == "reward":  # save default head temporarily
+        setattr(model, "default_head_weight", model.v_head.summary.weight.data.detach().clone())
+        setattr(model, "default_head_bias", model.v_head.summary.bias.data.detach().clone())
 
-        params = [model.v_head.summary.weight, model.v_head.summary.bias]
-        context_maybe_zero3 = deepspeed.zero.GatheredParameters(params, modifier_rank=0)
-    else:
-        context_maybe_zero3 = nullcontext()
-
-    with context_maybe_zero3:
-        if target == "reward":  # save default head temporarily
-            setattr(model, "default_head_weight", model.v_head.summary.weight.data.detach().clone())
-            setattr(model, "default_head_bias", model.v_head.summary.bias.data.detach().clone())
-
-        model.pretrained_model.set_adapter(target)  # set the LoRA adapter to be active
-        model.v_head.summary.weight.data = model.get_buffer("{}_head_weight".format(target)).detach().clone()
-        model.v_head.summary.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone()
+    model.pretrained_model.set_adapter(target)  # set the LoRA adapter to be active
+    device = model.v_head.summary.weight.device
+    model.v_head.summary.weight.data = model.get_buffer("{}_head_weight".format(target)).detach().clone().to(device)
+    model.v_head.summary.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone().to(device)
 
 
 def dump_layernorm(model: "PreTrainedModel") -> Dict[str, torch.Tensor]:
+    r"""
+    Dumps the layernorm parameters in the model. The model is already unwrapped (and gathered).
+    """
     layer_norm_params = {}
     for name, param in model.named_parameters():
         if param.data.dtype == torch.float32:
@@ -55,6 +54,9 @@ def dump_layernorm(model: "PreTrainedModel") -> Dict[str, torch.Tensor]:
 
 
 def restore_layernorm(model: "PreTrainedModel", layernorm_params: Optional[Dict[str, torch.Tensor]] = None) -> None:
+    r"""
+    Restores the layernorm parameters in the model. The model is already unwrapped (and gathered).
+    """
     for name, param in model.named_parameters():
         if name in layernorm_params:
             param.data = layernorm_params[name]

From 969d0f7cddd4447152ebe4a17791707886bf4102 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 6 Jun 2024 23:38:09 +0800
Subject: [PATCH 087/162] Update cli.py

Former-commit-id: 32190507534adf5f505858b3af2b592ca6568ac7
---
 src/llamafactory/cli.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py
index 8a229a38..19a104bd 100644
--- a/src/llamafactory/cli.py
+++ b/src/llamafactory/cli.py
@@ -71,10 +71,6 @@ def main():
         export_model()
     elif command == Command.TRAIN:
         if get_device_count() > 0:
-            # NOTE (MengqingCao): why use torchrun when only one accelerator is available?
-            # DeepSpeed only warp model with DeepSpeedEngine when launching by distributed launcher,
-            # e.g., torchrun, causing some feature missing
-            # sa: https://github.com/huggingface/transformers/issues/24309
             master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
             master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
             logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))

From 3e274dfb19bfe773c94ad42a68b9ede43af22226 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 6 Jun 2024 23:44:58 +0800
Subject: [PATCH 088/162] add DISABLE_TORCHRUN option

Former-commit-id: bcc574b479c2101438723aadead42743d4378776
---
 src/llamafactory/cli.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py
index 8ae3d6a8..092f4cf7 100644
--- a/src/llamafactory/cli.py
+++ b/src/llamafactory/cli.py
@@ -72,7 +72,12 @@ def main():
     elif command == Command.EXPORT:
         export_model()
     elif command == Command.TRAIN:
-        if get_device_count() > 0:
+        disable_torchrun = os.environ.get("DISABLE_TORCHRUN", "0").lower() in ["true", "1"]
+        if disable_torchrun and get_device_count() > 1:
+            logger.warning("`torchrun` cannot be disabled when device count > 1.")
+            disable_torchrun = False
+
+        if (not disable_torchrun) and (get_device_count() > 0):
             master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
             master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
             logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))

From 0b1f4a34f866874707c4689a1569db24125a3bf8 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Fri, 7 Jun 2024 00:09:06 +0800
Subject: [PATCH 089/162] rename files

Former-commit-id: e1a8431770fc36c0c9ee7fed4abbc3d7fdcc5efd
---
 src/llamafactory/chat/vllm_engine.py           |  2 +-
 src/llamafactory/data/__init__.py              | 12 ++++++------
 src/llamafactory/data/aligner.py               |  2 +-
 .../data/{utils.py => data_utils.py}           |  0
 src/llamafactory/data/loader.py                |  2 +-
 src/llamafactory/data/processors/feedback.py   |  2 +-
 src/llamafactory/data/processors/pairwise.py   |  2 +-
 .../{mm_utils.py => processor_utils.py}        |  0
 src/llamafactory/data/processors/supervised.py |  2 +-
 .../data/processors/unsupervised.py            |  4 ++--
 src/llamafactory/data/template.py              | 10 +++++-----
 src/llamafactory/model/__init__.py             |  6 +++---
 src/llamafactory/model/adapter.py              |  6 +++---
 src/llamafactory/model/loader.py               |  8 ++++----
 .../model/{utils => model_utils}/__init__.py   |  0
 .../model/{utils => model_utils}/attention.py  |  0
 .../{utils => model_utils}/checkpointing.py    |  0
 .../model/{utils => model_utils}/embedding.py  |  0
 .../model/{utils => model_utils}/longlora.py   |  0
 .../model/{utils => model_utils}/misc.py       |  0
 .../model/{utils => model_utils}/mod.py        |  0
 .../model/{utils => model_utils}/moe.py        |  0
 .../{utils => model_utils}/quantization.py     |  0
 .../model/{utils => model_utils}/rope.py       |  0
 .../model/{utils => model_utils}/unsloth.py    |  0
 .../model/{utils => model_utils}/valuehead.py  |  0
 .../model/{utils => model_utils}/visual.py     |  0
 src/llamafactory/model/patcher.py              | 18 +++++++++---------
 src/llamafactory/train/dpo/trainer.py          |  2 +-
 src/llamafactory/train/dpo/workflow.py         |  2 +-
 src/llamafactory/train/kto/trainer.py          |  2 +-
 src/llamafactory/train/kto/workflow.py         |  2 +-
 .../train/ppo/{utils.py => ppo_utils.py}       |  0
 src/llamafactory/train/ppo/trainer.py          |  4 ++--
 src/llamafactory/train/ppo/workflow.py         |  2 +-
 src/llamafactory/train/pt/trainer.py           |  2 +-
 src/llamafactory/train/pt/workflow.py          |  2 +-
 src/llamafactory/train/rm/trainer.py           |  2 +-
 src/llamafactory/train/rm/workflow.py          |  2 +-
 src/llamafactory/train/sft/trainer.py          |  2 +-
 src/llamafactory/train/sft/workflow.py         |  2 +-
 .../train/{utils.py => trainer_utils.py}       |  0
 src/llamafactory/webui/components/top.py       |  4 ++--
 43 files changed, 53 insertions(+), 53 deletions(-)
 rename src/llamafactory/data/{utils.py => data_utils.py} (100%)
 rename src/llamafactory/data/processors/{mm_utils.py => processor_utils.py} (100%)
 rename src/llamafactory/model/{utils => model_utils}/__init__.py (100%)
 rename src/llamafactory/model/{utils => model_utils}/attention.py (100%)
 rename src/llamafactory/model/{utils => model_utils}/checkpointing.py (100%)
 rename src/llamafactory/model/{utils => model_utils}/embedding.py (100%)
 rename src/llamafactory/model/{utils => model_utils}/longlora.py (100%)
 rename src/llamafactory/model/{utils => model_utils}/misc.py (100%)
 rename src/llamafactory/model/{utils => model_utils}/mod.py (100%)
 rename src/llamafactory/model/{utils => model_utils}/moe.py (100%)
 rename src/llamafactory/model/{utils => model_utils}/quantization.py (100%)
 rename src/llamafactory/model/{utils => model_utils}/rope.py (100%)
 rename src/llamafactory/model/{utils => model_utils}/unsloth.py (100%)
 rename src/llamafactory/model/{utils => model_utils}/valuehead.py (100%)
 rename src/llamafactory/model/{utils => model_utils}/visual.py (100%)
 rename src/llamafactory/train/ppo/{utils.py => ppo_utils.py} (100%)
 rename src/llamafactory/train/{utils.py => trainer_utils.py} (100%)

diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py
index e193704a..87ce8684 100644
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -6,7 +6,7 @@ from ..extras.logging import get_logger
 from ..extras.misc import get_device_count
 from ..extras.packages import is_vllm_available
 from ..model import load_config, load_tokenizer
-from ..model.utils.visual import LlavaMultiModalProjectorForYiVLForVLLM
+from ..model.model_utils.visual import LlavaMultiModalProjectorForYiVLForVLLM
 from .base_engine import BaseEngine, Response
 
 
diff --git a/src/llamafactory/data/__init__.py b/src/llamafactory/data/__init__.py
index 44887d24..b08691d3 100644
--- a/src/llamafactory/data/__init__.py
+++ b/src/llamafactory/data/__init__.py
@@ -1,16 +1,16 @@
 from .collator import KTODataCollatorWithPadding, PairwiseDataCollatorWithPadding
+from .data_utils import Role, split_dataset
 from .loader import get_dataset
-from .template import Template, get_template_and_fix_tokenizer, templates
-from .utils import Role, split_dataset
+from .template import TEMPLATES, Template, get_template_and_fix_tokenizer
 
 
 __all__ = [
     "KTODataCollatorWithPadding",
     "PairwiseDataCollatorWithPadding",
-    "get_dataset",
-    "Template",
-    "get_template_and_fix_tokenizer",
-    "templates",
     "Role",
     "split_dataset",
+    "get_dataset",
+    "TEMPLATES",
+    "Template",
+    "get_template_and_fix_tokenizer",
 ]
diff --git a/src/llamafactory/data/aligner.py b/src/llamafactory/data/aligner.py
index 2a382c60..434956af 100644
--- a/src/llamafactory/data/aligner.py
+++ b/src/llamafactory/data/aligner.py
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Union
 from datasets import Features
 
 from ..extras.logging import get_logger
-from .utils import Role
+from .data_utils import Role
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/data/utils.py b/src/llamafactory/data/data_utils.py
similarity index 100%
rename from src/llamafactory/data/utils.py
rename to src/llamafactory/data/data_utils.py
diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py
index 859f9a93..2c236c76 100644
--- a/src/llamafactory/data/loader.py
+++ b/src/llamafactory/data/loader.py
@@ -10,10 +10,10 @@ from ..extras.constants import FILEEXT2TYPE
 from ..extras.logging import get_logger
 from ..extras.misc import has_tokenized_data
 from .aligner import align_dataset
+from .data_utils import merge_dataset
 from .parser import get_dataset_list
 from .preprocess import get_preprocess_and_print_func
 from .template import get_template_and_fix_tokenizer
-from .utils import merge_dataset
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py
index 1aaff0ab..dc7d817c 100644
--- a/src/llamafactory/data/processors/feedback.py
+++ b/src/llamafactory/data/processors/feedback.py
@@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 from ...extras.constants import IGNORE_INDEX
 from ...extras.logging import get_logger
-from .mm_utils import get_paligemma_token_type_ids, get_pixel_values
+from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py
index 69dab34a..8ad3979f 100644
--- a/src/llamafactory/data/processors/pairwise.py
+++ b/src/llamafactory/data/processors/pairwise.py
@@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 from ...extras.constants import IGNORE_INDEX
 from ...extras.logging import get_logger
-from .mm_utils import get_paligemma_token_type_ids, get_pixel_values
+from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/data/processors/mm_utils.py b/src/llamafactory/data/processors/processor_utils.py
similarity index 100%
rename from src/llamafactory/data/processors/mm_utils.py
rename to src/llamafactory/data/processors/processor_utils.py
diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py
index b119aa22..d90a32ac 100644
--- a/src/llamafactory/data/processors/supervised.py
+++ b/src/llamafactory/data/processors/supervised.py
@@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 from ...extras.constants import IGNORE_INDEX
 from ...extras.logging import get_logger
-from .mm_utils import get_paligemma_token_type_ids, get_pixel_values
+from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py
index 6a9f9460..e00bde55 100644
--- a/src/llamafactory/data/processors/unsupervised.py
+++ b/src/llamafactory/data/processors/unsupervised.py
@@ -1,8 +1,8 @@
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 from ...extras.logging import get_logger
-from ..utils import Role
-from .mm_utils import get_paligemma_token_type_ids, get_pixel_values
+from ..data_utils import Role
+from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index fe0211c6..3dce5ec6 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -2,8 +2,8 @@ from dataclasses import dataclass
 from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
 
 from ..extras.logging import get_logger
+from .data_utils import Role, infer_max_len
 from .formatter import EmptyFormatter, FunctionFormatter, StringFormatter, ToolFormatter
-from .utils import Role, infer_max_len
 
 
 if TYPE_CHECKING:
@@ -196,7 +196,7 @@ class Llama2Template(Template):
         return self._make_pairs(encoded_messages, cutoff_len, reserved_label_len)
 
 
-templates: Dict[str, Template] = {}
+TEMPLATES: Dict[str, Template] = {}
 
 
 def _register_template(
@@ -248,7 +248,7 @@ def _register_template(
     default_function_formatter = FunctionFormatter(slots=["Action: {{name}}\nAction Input: {{arguments}}"] + eos_slots)
     default_tool_formatter = ToolFormatter(tool_format="default")
     default_separator_formatter = EmptyFormatter()
-    templates[name] = template_class(
+    TEMPLATES[name] = template_class(
         format_user=format_user or default_user_formatter,
         format_assistant=format_assistant or default_assistant_formatter,
         format_system=format_system or default_user_formatter,
@@ -348,9 +348,9 @@ def get_template_and_fix_tokenizer(
     name: Optional[str] = None,
 ) -> Template:
     if name is None:
-        template = templates["empty"]  # placeholder
+        template = TEMPLATES["empty"]  # placeholder
     else:
-        template = templates.get(name, None)
+        template = TEMPLATES.get(name, None)
         if template is None:
             raise ValueError("Template {} does not exist.".format(name))
 
diff --git a/src/llamafactory/model/__init__.py b/src/llamafactory/model/__init__.py
index 88f666c8..9d23d59f 100644
--- a/src/llamafactory/model/__init__.py
+++ b/src/llamafactory/model/__init__.py
@@ -1,12 +1,12 @@
 from .loader import load_config, load_model, load_tokenizer
-from .utils.misc import find_all_linear_modules
-from .utils.valuehead import load_valuehead_params
+from .model_utils.misc import find_all_linear_modules
+from .model_utils.valuehead import load_valuehead_params
 
 
 __all__ = [
     "load_config",
     "load_model",
     "load_tokenizer",
-    "load_valuehead_params",
     "find_all_linear_modules",
+    "load_valuehead_params",
 ]
diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py
index a9204ef0..1a77d613 100644
--- a/src/llamafactory/model/adapter.py
+++ b/src/llamafactory/model/adapter.py
@@ -7,9 +7,9 @@ from transformers.integrations import is_deepspeed_zero3_enabled
 from transformers.modeling_utils import is_fsdp_enabled
 
 from ..extras.logging import get_logger
-from .utils.misc import find_all_linear_modules, find_expanded_modules
-from .utils.quantization import QuantizationMethod
-from .utils.unsloth import get_unsloth_peft_model, load_unsloth_peft_model
+from .model_utils.misc import find_all_linear_modules, find_expanded_modules
+from .model_utils.quantization import QuantizationMethod
+from .model_utils.unsloth import get_unsloth_peft_model, load_unsloth_peft_model
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py
index 8f3309b3..697a04e7 100644
--- a/src/llamafactory/model/loader.py
+++ b/src/llamafactory/model/loader.py
@@ -6,11 +6,11 @@ from trl import AutoModelForCausalLMWithValueHead
 from ..extras.logging import get_logger
 from ..extras.misc import count_parameters, try_download_model_from_ms
 from .adapter import init_adapter
+from .model_utils.misc import register_autoclass
+from .model_utils.mod import convert_pretrained_model_to_mod, load_mod_pretrained_model
+from .model_utils.unsloth import load_unsloth_pretrained_model
+from .model_utils.valuehead import load_valuehead_params
 from .patcher import patch_config, patch_model, patch_tokenizer, patch_valuehead_model
-from .utils.misc import register_autoclass
-from .utils.mod import convert_pretrained_model_to_mod, load_mod_pretrained_model
-from .utils.unsloth import load_unsloth_pretrained_model
-from .utils.valuehead import load_valuehead_params
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/model/utils/__init__.py b/src/llamafactory/model/model_utils/__init__.py
similarity index 100%
rename from src/llamafactory/model/utils/__init__.py
rename to src/llamafactory/model/model_utils/__init__.py
diff --git a/src/llamafactory/model/utils/attention.py b/src/llamafactory/model/model_utils/attention.py
similarity index 100%
rename from src/llamafactory/model/utils/attention.py
rename to src/llamafactory/model/model_utils/attention.py
diff --git a/src/llamafactory/model/utils/checkpointing.py b/src/llamafactory/model/model_utils/checkpointing.py
similarity index 100%
rename from src/llamafactory/model/utils/checkpointing.py
rename to src/llamafactory/model/model_utils/checkpointing.py
diff --git a/src/llamafactory/model/utils/embedding.py b/src/llamafactory/model/model_utils/embedding.py
similarity index 100%
rename from src/llamafactory/model/utils/embedding.py
rename to src/llamafactory/model/model_utils/embedding.py
diff --git a/src/llamafactory/model/utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py
similarity index 100%
rename from src/llamafactory/model/utils/longlora.py
rename to src/llamafactory/model/model_utils/longlora.py
diff --git a/src/llamafactory/model/utils/misc.py b/src/llamafactory/model/model_utils/misc.py
similarity index 100%
rename from src/llamafactory/model/utils/misc.py
rename to src/llamafactory/model/model_utils/misc.py
diff --git a/src/llamafactory/model/utils/mod.py b/src/llamafactory/model/model_utils/mod.py
similarity index 100%
rename from src/llamafactory/model/utils/mod.py
rename to src/llamafactory/model/model_utils/mod.py
diff --git a/src/llamafactory/model/utils/moe.py b/src/llamafactory/model/model_utils/moe.py
similarity index 100%
rename from src/llamafactory/model/utils/moe.py
rename to src/llamafactory/model/model_utils/moe.py
diff --git a/src/llamafactory/model/utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py
similarity index 100%
rename from src/llamafactory/model/utils/quantization.py
rename to src/llamafactory/model/model_utils/quantization.py
diff --git a/src/llamafactory/model/utils/rope.py b/src/llamafactory/model/model_utils/rope.py
similarity index 100%
rename from src/llamafactory/model/utils/rope.py
rename to src/llamafactory/model/model_utils/rope.py
diff --git a/src/llamafactory/model/utils/unsloth.py b/src/llamafactory/model/model_utils/unsloth.py
similarity index 100%
rename from src/llamafactory/model/utils/unsloth.py
rename to src/llamafactory/model/model_utils/unsloth.py
diff --git a/src/llamafactory/model/utils/valuehead.py b/src/llamafactory/model/model_utils/valuehead.py
similarity index 100%
rename from src/llamafactory/model/utils/valuehead.py
rename to src/llamafactory/model/model_utils/valuehead.py
diff --git a/src/llamafactory/model/utils/visual.py b/src/llamafactory/model/model_utils/visual.py
similarity index 100%
rename from src/llamafactory/model/utils/visual.py
rename to src/llamafactory/model/model_utils/visual.py
diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py
index 1a8ce607..87c92315 100644
--- a/src/llamafactory/model/patcher.py
+++ b/src/llamafactory/model/patcher.py
@@ -10,15 +10,15 @@ from transformers.modeling_utils import is_fsdp_enabled
 
 from ..extras.logging import get_logger
 from ..extras.misc import infer_optim_dtype
-from .utils.attention import configure_attn_implementation, print_attn_implementation
-from .utils.checkpointing import prepare_model_for_training
-from .utils.embedding import resize_embedding_layer
-from .utils.longlora import configure_longlora
-from .utils.moe import add_z3_leaf_module, configure_moe
-from .utils.quantization import configure_quantization
-from .utils.rope import configure_rope
-from .utils.valuehead import prepare_valuehead_model
-from .utils.visual import autocast_projector_dtype, configure_visual_model
+from .model_utils.attention import configure_attn_implementation, print_attn_implementation
+from .model_utils.checkpointing import prepare_model_for_training
+from .model_utils.embedding import resize_embedding_layer
+from .model_utils.longlora import configure_longlora
+from .model_utils.moe import add_z3_leaf_module, configure_moe
+from .model_utils.quantization import configure_quantization
+from .model_utils.rope import configure_rope
+from .model_utils.valuehead import prepare_valuehead_model
+from .model_utils.visual import autocast_projector_dtype, configure_visual_model
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py
index 6f1da34e..f64c287f 100644
--- a/src/llamafactory/train/dpo/trainer.py
+++ b/src/llamafactory/train/dpo/trainer.py
@@ -10,7 +10,7 @@ from trl import DPOTrainer
 from trl.trainer import disable_dropout_in_model
 
 from ...extras.constants import IGNORE_INDEX
-from ..utils import create_custom_optimzer, create_custom_scheduler, get_ref_context
+from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_ref_context
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/train/dpo/workflow.py b/src/llamafactory/train/dpo/workflow.py
index 61a3e2f0..992985b0 100644
--- a/src/llamafactory/train/dpo/workflow.py
+++ b/src/llamafactory/train/dpo/workflow.py
@@ -7,7 +7,7 @@ from ...extras.constants import IGNORE_INDEX
 from ...extras.ploting import plot_loss
 from ...hparams import ModelArguments
 from ...model import load_model, load_tokenizer
-from ..utils import create_modelcard_and_push, create_ref_model
+from ..trainer_utils import create_modelcard_and_push, create_ref_model
 from .trainer import CustomDPOTrainer
 
 
diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py
index 03cad5a7..1610ccfa 100644
--- a/src/llamafactory/train/kto/trainer.py
+++ b/src/llamafactory/train/kto/trainer.py
@@ -9,7 +9,7 @@ from trl import KTOTrainer
 from trl.trainer import disable_dropout_in_model
 
 from ...extras.constants import IGNORE_INDEX
-from ..utils import create_custom_optimzer, create_custom_scheduler, get_ref_context
+from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_ref_context
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/train/kto/workflow.py b/src/llamafactory/train/kto/workflow.py
index 26dc770c..c79b160b 100644
--- a/src/llamafactory/train/kto/workflow.py
+++ b/src/llamafactory/train/kto/workflow.py
@@ -5,7 +5,7 @@ from ...extras.constants import IGNORE_INDEX
 from ...extras.ploting import plot_loss
 from ...hparams import ModelArguments
 from ...model import load_model, load_tokenizer
-from ..utils import create_modelcard_and_push, create_ref_model
+from ..trainer_utils import create_modelcard_and_push, create_ref_model
 from .trainer import CustomKTOTrainer
 
 
diff --git a/src/llamafactory/train/ppo/utils.py b/src/llamafactory/train/ppo/ppo_utils.py
similarity index 100%
rename from src/llamafactory/train/ppo/utils.py
rename to src/llamafactory/train/ppo/ppo_utils.py
diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py
index b0c7e25d..7addfc3c 100644
--- a/src/llamafactory/train/ppo/trainer.py
+++ b/src/llamafactory/train/ppo/trainer.py
@@ -19,8 +19,8 @@ from trl.models.utils import unwrap_model_for_generation
 from ...extras.callbacks import FixValueHeadModelCallback, LogCallback
 from ...extras.logging import get_logger
 from ...extras.misc import AverageMeter, count_parameters, get_current_device, get_logits_processor
-from ..utils import create_custom_optimzer, create_custom_scheduler
-from .utils import dump_layernorm, get_rewards_from_server, replace_model, restore_layernorm
+from ..trainer_utils import create_custom_optimzer, create_custom_scheduler
+from .ppo_utils import dump_layernorm, get_rewards_from_server, replace_model, restore_layernorm
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/train/ppo/workflow.py b/src/llamafactory/train/ppo/workflow.py
index 4383bcdc..111704c6 100644
--- a/src/llamafactory/train/ppo/workflow.py
+++ b/src/llamafactory/train/ppo/workflow.py
@@ -9,7 +9,7 @@ from ...extras.callbacks import FixValueHeadModelCallback
 from ...extras.misc import fix_valuehead_checkpoint
 from ...extras.ploting import plot_loss
 from ...model import load_model, load_tokenizer
-from ..utils import create_ref_model, create_reward_model
+from ..trainer_utils import create_ref_model, create_reward_model
 from .trainer import CustomPPOTrainer
 
 
diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py
index b7b80f88..1d96e82f 100644
--- a/src/llamafactory/train/pt/trainer.py
+++ b/src/llamafactory/train/pt/trainer.py
@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Dict, Optional
 from transformers import Trainer
 
 from ...extras.logging import get_logger
-from ..utils import create_custom_optimzer, create_custom_scheduler
+from ..trainer_utils import create_custom_optimzer, create_custom_scheduler
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/train/pt/workflow.py b/src/llamafactory/train/pt/workflow.py
index 9f945901..8a635567 100644
--- a/src/llamafactory/train/pt/workflow.py
+++ b/src/llamafactory/train/pt/workflow.py
@@ -8,7 +8,7 @@ from transformers import DataCollatorForLanguageModeling
 from ...data import get_dataset, split_dataset
 from ...extras.ploting import plot_loss
 from ...model import load_model, load_tokenizer
-from ..utils import create_modelcard_and_push
+from ..trainer_utils import create_modelcard_and_push
 from .trainer import CustomTrainer
 
 
diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py
index d49dd67b..bfb344dc 100644
--- a/src/llamafactory/train/rm/trainer.py
+++ b/src/llamafactory/train/rm/trainer.py
@@ -7,7 +7,7 @@ import torch
 from transformers import Trainer
 
 from ...extras.logging import get_logger
-from ..utils import create_custom_optimzer, create_custom_scheduler
+from ..trainer_utils import create_custom_optimzer, create_custom_scheduler
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/train/rm/workflow.py b/src/llamafactory/train/rm/workflow.py
index 621d03b7..2e9e194b 100644
--- a/src/llamafactory/train/rm/workflow.py
+++ b/src/llamafactory/train/rm/workflow.py
@@ -7,7 +7,7 @@ from ...extras.callbacks import FixValueHeadModelCallback
 from ...extras.misc import fix_valuehead_checkpoint
 from ...extras.ploting import plot_loss
 from ...model import load_model, load_tokenizer
-from ..utils import create_modelcard_and_push
+from ..trainer_utils import create_modelcard_and_push
 from .metric import compute_accuracy
 from .trainer import PairwiseTrainer
 
diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py
index 35671e1b..c063b214 100644
--- a/src/llamafactory/train/sft/trainer.py
+++ b/src/llamafactory/train/sft/trainer.py
@@ -9,7 +9,7 @@ from transformers import Seq2SeqTrainer
 
 from ...extras.constants import IGNORE_INDEX
 from ...extras.logging import get_logger
-from ..utils import create_custom_optimzer, create_custom_scheduler
+from ..trainer_utils import create_custom_optimzer, create_custom_scheduler
 
 
 if TYPE_CHECKING:
diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py
index d9d7c8e9..f09b5173 100644
--- a/src/llamafactory/train/sft/workflow.py
+++ b/src/llamafactory/train/sft/workflow.py
@@ -9,7 +9,7 @@ from ...extras.constants import IGNORE_INDEX
 from ...extras.misc import get_logits_processor
 from ...extras.ploting import plot_loss
 from ...model import load_model, load_tokenizer
-from ..utils import create_modelcard_and_push
+from ..trainer_utils import create_modelcard_and_push
 from .metric import ComputeMetrics
 from .trainer import CustomSeq2SeqTrainer
 
diff --git a/src/llamafactory/train/utils.py b/src/llamafactory/train/trainer_utils.py
similarity index 100%
rename from src/llamafactory/train/utils.py
rename to src/llamafactory/train/trainer_utils.py
diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py
index ca093584..c794d0aa 100644
--- a/src/llamafactory/webui/components/top.py
+++ b/src/llamafactory/webui/components/top.py
@@ -1,6 +1,6 @@
 from typing import TYPE_CHECKING, Dict
 
-from ...data import templates
+from ...data import TEMPLATES
 from ...extras.constants import METHODS, SUPPORTED_MODELS
 from ...extras.packages import is_gradio_available
 from ..common import get_model_info, list_checkpoints, save_config
@@ -30,7 +30,7 @@ def create_top() -> Dict[str, "Component"]:
     with gr.Accordion(open=False) as advanced_tab:
         with gr.Row():
             quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", scale=2)
-            template = gr.Dropdown(choices=list(templates.keys()), value="default", scale=2)
+            template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=2)
             rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=3)
             booster = gr.Radio(choices=["none", "flashattn2", "unsloth"], value="none", scale=3)
             visual_inputs = gr.Checkbox(scale=1)

From 093abed7ccdb653219778dc8b68fee1e2044e003 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Fri, 7 Jun 2024 00:22:57 +0800
Subject: [PATCH 090/162] add qwen2 models

Former-commit-id: 49cb694d02c876e3740a003a8b332349f4310ad3
---
 README.md                            |  7 ++-
 README_zh.md                         |  7 ++-
 src/llamafactory/extras/constants.py | 83 ++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 77684757..1358b6e8 100644
--- a/README.md
+++ b/README.md
@@ -71,14 +71,16 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 
 ## Changelog
 
+[24/06/07] We supported fine-tuning the **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** series models.
+
 [24/06/05] We supported fine-tuning the **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** models.
 
 [24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage.
 
-[24/05/20] We supported fine-tuning the **PaliGemma** series models. Note that the PaliGemma models are pre-trained models, you need to fine-tune them with `gemma` template for chat completion.
-
 <details><summary>Full Changelog</summary>
 
+[24/05/20] We supported fine-tuning the **PaliGemma** series models. Note that the PaliGemma models are pre-trained models, you need to fine-tune them with `gemma` template for chat completion.
+
 [24/05/18] We supported **[KTO](https://arxiv.org/abs/2402.01306)** algorithm for preference learning. See [examples](examples/README.md) for usage.
 
 [24/05/14] We supported training and inference on the Ascend NPU devices. Check [installation](#installation) section for details.
@@ -172,6 +174,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [Phi-3](https://huggingface.co/microsoft)                | 4B/7B/14B                        | phi       |
 | [Qwen](https://huggingface.co/Qwen)                      | 1.8B/7B/14B/72B                  | qwen      |
 | [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen)        | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen      |
+| [Qwen2 (MoE)](https://huggingface.co/Qwen)               | 0.5B/1.5B/7B/57B/72B             | qwen      |
 | [StarCoder2](https://huggingface.co/bigcode)             | 3B/7B/15B                        | -         |
 | [XVERSE](https://huggingface.co/xverse)                  | 7B/13B/65B                       | xverse    |
 | [Yi (1/1.5)](https://huggingface.co/01-ai)               | 6B/9B/34B                        | yi        |
diff --git a/README_zh.md b/README_zh.md
index da5ff079..becec988 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -71,14 +71,16 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 
 ## 更新日志
 
+[24/06/07] 我们支持了 **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** 系列模型的微调。
+
 [24/06/05] 我们支持了 **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** 模型的微调。
 
 [24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。
 
-[24/05/20] 我们支持了 **PaliGemma** 系列模型的微调。注意 PaliGemma 是预训练模型，你需要使用 `gemma` 模板进行微调使其获得对话能力。
-
 <details><summary>展开日志</summary>
 
+[24/05/20] 我们支持了 **PaliGemma** 系列模型的微调。注意 PaliGemma 是预训练模型，你需要使用 `gemma` 模板进行微调使其获得对话能力。
+
 [24/05/18] 我们支持了 **[KTO](https://arxiv.org/abs/2402.01306)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。
 
 [24/05/14] 我们支持了昇腾 NPU 设备的训练和推理。详情请查阅[安装](#安装-llama-factory)部分。
@@ -172,6 +174,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 | [Phi-3](https://huggingface.co/microsoft)                | 4B/7B/14B                        | phi       |
 | [Qwen](https://huggingface.co/Qwen)                      | 1.8B/7B/14B/72B                  | qwen      |
 | [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen)        | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen      |
+| [Qwen2 (MoE)](https://huggingface.co/Qwen)               | 0.5B/1.5B/7B/57B/72B             | qwen      |
 | [StarCoder2](https://huggingface.co/bigcode)             | 3B/7B/15B                        | -         |
 | [XVERSE](https://huggingface.co/xverse)                  | 7B/13B/65B                       | xverse    |
 | [Yi (1/1.5)](https://huggingface.co/01-ai)               | 6B/9B/34B                        | yi        |
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 4d9cb26d..4099fe56 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -1078,6 +1078,89 @@ register_model_group(
 )
 
 
+register_model_group(
+    models={
+        "Qwen2-0.5B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B",
+        },
+        "Qwen2-1.5B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B",
+        },
+        "Qwen2-7B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-7B",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-7B",
+        },
+        "Qwen2-72B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-72B",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-72B",
+        },
+        "Qwen2-MoE-57B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-57B-A14B",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-57B-A14B",
+        },
+        "Qwen2-0.5B-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct",
+        },
+        "Qwen2-1.5B-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct",
+        },
+        "Qwen2-7B-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct",
+        },
+        "Qwen2-72B-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-72B-Instruct",
+        },
+        "Qwen2-MoE-57B-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-57B-A14B-Instruct",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-57B-A14B-Instruct",
+        },
+        "Qwen2-0.5B-int8-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct-GPTQ-Int8",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct-GPTQ-Int8",
+        },
+        "Qwen2-0.5B-int4-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct-AWQ",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct-AWQ",
+        },
+        "Qwen2-1.5B-int8-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct-GPTQ-Int8",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct-GPTQ-Int8",
+        },
+        "Qwen2-1.5B-int4-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct-AWQ",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct-AWQ",
+        },
+        "Qwen2-7B-int8-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct-GPTQ-Int8",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct-GPTQ-Int8",
+        },
+        "Qwen2-7B-int4-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct-AWQ",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct-AWQ",
+        },
+        "Qwen2-72B-int8-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct-GPTQ-Int8",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-72B-Instruct-GPTQ-Int8",
+        },
+        "Qwen2-72B-int4-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct-AWQ",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-72B-Instruct-AWQ",
+        },
+        "Qwen2-MoE-57B-int4-Chat": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4",
+            DownloadSource.MODELSCOPE: "qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4",
+        },
+    },
+    template="qwen",
+)
+
+
 register_model_group(
     models={
         "SOLAR-10.7B": {

From b5421ac8ebf5ade217fd4fd82ebe8b7e80f33194 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Fri, 7 Jun 2024 03:38:04 +0800
Subject: [PATCH 091/162] Update supervised.py

Former-commit-id: 67625b5278a839c12a3e4245f9e90af67d8b11b4
---
 .../data/processors/supervised.py             | 233 ++++++++----------
 1 file changed, 107 insertions(+), 126 deletions(-)

diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py
index eaceb5b8..cd49fd0c 100644
--- a/src/llamafactory/data/processors/supervised.py
+++ b/src/llamafactory/data/processors/supervised.py
@@ -1,10 +1,10 @@
-import itertools
+import bisect
 from collections import defaultdict
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
 
 from ...extras.constants import IGNORE_INDEX
 from ...extras.logging import get_logger
-from .mm_utils import get_paligemma_token_type_ids, get_pixel_values
+from .mm_utils import get_paligemma_token_type_ids, get_pixel_values, greedy_knapsack
 
 
 if TYPE_CHECKING:
@@ -18,29 +18,19 @@ if TYPE_CHECKING:
 logger = get_logger(__name__)
 
 
-def binary_search_for_fit(numbers, capacity):
+def search_for_fit(numbers: Sequence[int], capacity: int) -> int:
+    r"""
+    Finds the index of largest number that fits into the knapsack with the given capacity.
     """
-    Perform binary search to find the largest number that fits into the knapsack with the given capacity.
-    """
-    left, right = 0, len(numbers) - 1
-    result = -1  # If no number fits, return -1
-
-    while left <= right:
-        mid = (left + right) // 2
-        if numbers[mid] <= capacity:
-            result = mid
-            left = mid + 1
-        else:
-            right = mid - 1
-
-    return result
+    index = bisect.bisect(numbers, capacity)
+    return -1 if index == 0 else (index - 1)
 
 
-def efficient_greedy_knapsack(numbers, capacity):
-    """
+def greedy_knapsack(numbers: List[int], capacity: int) -> List[List[int]]:
+    r"""
     An efficient greedy algorithm with binary search for the knapsack problem.
     """
-    numbers.sort()  # Sort numbers in ascending order for binary search
+    numbers.sort()  # sort numbers in ascending order for binary search
     knapsacks = []
 
     while numbers:
@@ -48,22 +38,60 @@ def efficient_greedy_knapsack(numbers, capacity):
         remaining_capacity = capacity
 
         while True:
-            index = binary_search_for_fit(numbers, remaining_capacity)
+            index = search_for_fit(numbers, remaining_capacity)
             if index == -1:
-                break  # No more numbers fit in this knapsack
+                break  # no more numbers fit in this knapsack
 
-            # Add the found number to the knapsack and update the remaining capacity
-            current_knapsack.append(numbers[index])
-            remaining_capacity -= numbers[index]
-
-            # Remove the number from the list
-            numbers.pop(index)
+            remaining_capacity -= numbers[index]  # update the remaining capacity
+            current_knapsack.append(numbers.pop(index))  # add the number to knapsack
 
         knapsacks.append(current_knapsack)
 
     return knapsacks
 
 
+def _encode_supervised_example(
+    prompt: Sequence[Dict[str, str]],
+    response: Sequence[Dict[str, str]],
+    system: Optional[str],
+    tools: Optional[str],
+    template: "Template",
+    tokenizer: "PreTrainedTokenizer",
+    processor: Optional["ProcessorMixin"],
+    data_args: "DataArguments",
+) -> Tuple[List[int], List[int]]:
+    if processor is not None and not hasattr(processor, "image_seq_length"):  # llava-like models
+        prompt[0]["content"] = template.image_token + prompt[0]["content"]
+
+    messages = prompt + response
+    input_ids, labels = [], []
+
+    if processor is not None and hasattr(processor, "image_seq_length"):  # paligemma models
+        image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
+        input_ids += [image_token_id] * getattr(processor, "image_seq_length")
+        labels += [IGNORE_INDEX] * getattr(processor, "image_seq_length")
+
+    encoded_pairs = template.encode_multiturn(
+        tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
+    )
+    for turn_idx, (source_ids, target_ids) in enumerate(encoded_pairs):
+        if data_args.train_on_prompt:
+            source_mask = source_ids
+        elif turn_idx != 0 and template.efficient_eos:
+            source_mask = [tokenizer.eos_token_id] + [IGNORE_INDEX] * (len(source_ids) - 1)
+        else:
+            source_mask = [IGNORE_INDEX] * len(source_ids)
+
+        input_ids += source_ids + target_ids
+        labels += source_mask + target_ids
+
+    if template.efficient_eos:
+        input_ids += [tokenizer.eos_token_id]
+        labels += [tokenizer.eos_token_id]
+
+    return input_ids, labels
+
+
 def preprocess_supervised_dataset(
     examples: Dict[str, List[Any]],
     template: "Template",
@@ -84,41 +112,16 @@ def preprocess_supervised_dataset(
             logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i]))
             continue
 
-        if processor is not None and not hasattr(processor, "image_seq_length"):  # llava-like models
-            examples["prompt"][i][0]["content"] = template.image_token + examples["prompt"][i][0]["content"]
-
-        messages = examples["prompt"][i] + examples["response"][i]
-        input_ids, labels = [], []
-
-        if processor is not None and hasattr(processor, "image_seq_length"):  # paligemma models
-            image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
-            input_ids += [image_token_id] * getattr(processor, "image_seq_length")
-            labels += [IGNORE_INDEX] * getattr(processor, "image_seq_length")
-
-        for turn_idx, (source_ids, target_ids) in enumerate(
-            template.encode_multiturn(
-                tokenizer,
-                messages,
-                examples["system"][i],
-                examples["tools"][i],
-                data_args.cutoff_len,
-                data_args.reserved_label_len,
-            )
-        ):
-            if data_args.train_on_prompt:
-                source_mask = source_ids
-            elif turn_idx != 0 and template.efficient_eos:
-                source_mask = [tokenizer.eos_token_id] + [IGNORE_INDEX] * (len(source_ids) - 1)
-            else:
-                source_mask = [IGNORE_INDEX] * len(source_ids)
-
-            input_ids += source_ids + target_ids
-            labels += source_mask + target_ids
-
-        if template.efficient_eos:
-            input_ids += [tokenizer.eos_token_id]
-            labels += [tokenizer.eos_token_id]
-
+        input_ids, labels = _encode_supervised_example(
+            prompt=examples["prompt"][i],
+            response=examples["response"][i],
+            system=examples["system"][i],
+            tools=examples["tools"][i],
+            template=template,
+            tokenizer=tokenizer,
+            processor=processor,
+            data_args=data_args,
+        )
         model_inputs["input_ids"].append(input_ids)
         model_inputs["attention_mask"].append([1] * len(input_ids))
         model_inputs["labels"].append(labels)
@@ -138,76 +141,54 @@ def preprocess_packed_supervised_dataset(
 ) -> Dict[str, List[List[int]]]:
     # build inputs with format `<bos> X1 Y1 <eos> <bos> X2 Y2 <eos>`
     # and labels with format `<ignore> ... <ignore> Y1 <eos> <ignore> ... <ignore> Y2 <eos>`
-    model_inputs = {"input_ids": [], "attention_mask": [], "labels": []}
-    input_ids, labels = [], []
+    valid_num = 0
+    batch_input_ids, batch_labels = [], []
+    lengths = []
+    length2indexes = defaultdict(list)
     for i in range(len(examples["prompt"])):
         if len(examples["prompt"][i]) % 2 != 1 or len(examples["response"][i]) != 1:
             logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i]))
             continue
 
-        messages = examples["prompt"][i] + examples["response"][i]
-        for source_ids, target_ids in template.encode_multiturn(
-            tokenizer, messages, examples["system"][i], examples["tools"][i]
-        ):
-            if data_args.train_on_prompt:
-                source_mask = source_ids
-            else:
-                source_mask = [IGNORE_INDEX] * len(source_ids)
-
-            input_ids.append(source_ids + target_ids)
-            labels.append(source_mask + target_ids)
-
-    # prepare for packing
-    lengths = []
-    length2examples_idx = defaultdict(list)
-    for idx, example in enumerate(input_ids):
-        length = len(example)
+        input_ids, labels = _encode_supervised_example(
+            prompt=examples["prompt"][i],
+            response=examples["response"][i],
+            system=examples["system"][i],
+            tools=examples["tools"][i],
+            template=template,
+            tokenizer=tokenizer,
+            processor=None,
+            data_args=data_args,
+        )
+        length = len(input_ids)
         if length > data_args.cutoff_len:
-            logger.warning("Dropped example with length {} > cutoff_len {}".format(length, data_args.cutoff_len))
-            continue
-        lengths.append(length)
-        length2examples_idx[length].append(idx)
-
-    # cutoff_len - 1 for efficient_eos
-    knapsacks = efficient_greedy_knapsack(lengths, data_args.cutoff_len - int(template.efficient_eos))
-
-    for knapsack in knapsacks:
-        packed_input_ids = []
-        packed_labels = []
-
-        total_length = 0
-        for length in knapsack:
-            total_length += length
-            idx = length2examples_idx[length].pop()
-            packed_input_ids.append(input_ids[idx])
-            packed_labels.append(labels[idx])
-
-        # padding to cutoff_len
-        if total_length < data_args.cutoff_len:
-            pad_length = data_args.cutoff_len - total_length
-            if template.efficient_eos:
-                # 确保有 eos
-                packed_input_ids.append([tokenizer.eos_token_id] * pad_length)
-                packed_labels.append([tokenizer.eos_token_id] + [IGNORE_INDEX] * (pad_length - 1))
-            else:
-                # 无 eos 的情况下，使用 0 填充？
-                packed_input_ids.append([0] * pad_length)
-                packed_labels.append([tokenizer.eos_token_id] + [IGNORE_INDEX] * (pad_length - 1))
-
-        elif total_length == data_args.cutoff_len:
-            pad_length = 0
+            logger.warning("Dropped lengthy example with length {} > {}.".format(length, data_args.cutoff_len))
         else:
-            logger.warning(
-                "Dropped packed example with total length {} > cutoff_len {}".format(
-                    total_length, data_args.cutoff_len
-                )
-            )
-            continue
+            lengths.append(length)
+            length2indexes[length].append(valid_num)
+            batch_input_ids.append(input_ids)
+            batch_labels.append(labels)
+            valid_num += 1
 
-        # concat all
-        model_inputs["input_ids"].append(list(itertools.chain(*packed_input_ids)))
-        model_inputs["labels"].append(list(itertools.chain(*packed_labels)))
-        model_inputs["attention_mask"].append([1] * total_length + [0] * pad_length)
+    model_inputs = {"input_ids": [], "attention_mask": [], "labels": []}
+    knapsacks = greedy_knapsack(lengths, data_args.cutoff_len)
+    for knapsack in knapsacks:
+        packed_input_ids, packed_labels = [], []
+        for length in knapsack:
+            index = length2indexes[length].pop()
+            packed_input_ids += batch_input_ids[index]
+            packed_labels += batch_labels[index]
+
+        if len(packed_input_ids) <= data_args.cutoff_len:
+            pad_length = data_args.cutoff_len - len(packed_input_ids)
+            packed_input_ids += [tokenizer.pad_token_id] * pad_length
+            packed_labels += [IGNORE_INDEX] * pad_length
+        else:
+            raise ValueError("The length of packed example exceeds the cutoff length.")
+
+        model_inputs["input_ids"].append(packed_input_ids)
+        model_inputs["attention_mask"].append([1] * len(packed_input_ids))
+        model_inputs["labels"].append(packed_labels)
 
     return model_inputs
 

From 9de42c4b3a0fa3087287328553d14dfcdd1bbe96 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Fri, 7 Jun 2024 03:38:23 +0800
Subject: [PATCH 092/162] Update supervised.py

Former-commit-id: 49993c4f4e1f871a22ff0196afe60026b668a4dc
---
 src/llamafactory/data/processors/supervised.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py
index cd49fd0c..502b591c 100644
--- a/src/llamafactory/data/processors/supervised.py
+++ b/src/llamafactory/data/processors/supervised.py
@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
 
 from ...extras.constants import IGNORE_INDEX
 from ...extras.logging import get_logger
-from .mm_utils import get_paligemma_token_type_ids, get_pixel_values, greedy_knapsack
+from .mm_utils import get_paligemma_token_type_ids, get_pixel_values
 
 
 if TYPE_CHECKING:

From 4a4b8d0b399089b46d939aa329c7304d4e1fa442 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Fri, 7 Jun 2024 03:42:08 +0800
Subject: [PATCH 093/162] Update supervised.py

Former-commit-id: 04b6c2a754e602e0b698cfe6c255c2f2486d8865
---
 src/llamafactory/data/processors/supervised.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py
index 502b591c..a340a1ab 100644
--- a/src/llamafactory/data/processors/supervised.py
+++ b/src/llamafactory/data/processors/supervised.py
@@ -179,15 +179,16 @@ def preprocess_packed_supervised_dataset(
             packed_input_ids += batch_input_ids[index]
             packed_labels += batch_labels[index]
 
-        if len(packed_input_ids) <= data_args.cutoff_len:
+        if len(packed_input_ids) < data_args.cutoff_len:
             pad_length = data_args.cutoff_len - len(packed_input_ids)
             packed_input_ids += [tokenizer.pad_token_id] * pad_length
             packed_labels += [IGNORE_INDEX] * pad_length
-        else:
-            raise ValueError("The length of packed example exceeds the cutoff length.")
+
+        if len(packed_input_ids) != data_args.cutoff_len:
+            raise ValueError("The length of packed example should be identical to the cutoff length.")
 
         model_inputs["input_ids"].append(packed_input_ids)
-        model_inputs["attention_mask"].append([1] * len(packed_input_ids))
+        model_inputs["attention_mask"].append([1] * data_args.cutoff_len)
         model_inputs["labels"].append(packed_labels)
 
     return model_inputs

From 72c91be9435197edb8ef616ac7d3381ab9e0e803 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Fri, 7 Jun 2024 04:15:40 +0800
Subject: [PATCH 094/162] update data processors

Former-commit-id: 04b138cbcb8b9a72e4bbda6c65843bb459e525e7
---
 src/llamafactory/cli.py                       |   2 +-
 src/llamafactory/data/processors/feedback.py  | 104 ++++++++++--------
 src/llamafactory/data/processors/pairwise.py  |  82 ++++++++------
 .../data/processors/processor_utils.py        |  41 ++++++-
 .../data/processors/supervised.py             |  35 +-----
 .../data/processors/unsupervised.py           |  65 +++++++----
 6 files changed, 190 insertions(+), 139 deletions(-)

diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py
index 092f4cf7..b9e734e4 100644
--- a/src/llamafactory/cli.py
+++ b/src/llamafactory/cli.py
@@ -72,7 +72,7 @@ def main():
     elif command == Command.EXPORT:
         export_model()
     elif command == Command.TRAIN:
-        disable_torchrun = os.environ.get("DISABLE_TORCHRUN", "0").lower() in ["true", "1"]
+        disable_torchrun = os.environ.get("TORCHRUN_DISABLED", "0").lower() in ["true", "1"]
         if disable_torchrun and get_device_count() > 1:
             logger.warning("`torchrun` cannot be disabled when device count > 1.")
             disable_torchrun = False
diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py
index dc7d817c..98d83658 100644
--- a/src/llamafactory/data/processors/feedback.py
+++ b/src/llamafactory/data/processors/feedback.py
@@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
 
 from ...extras.constants import IGNORE_INDEX
 from ...extras.logging import get_logger
@@ -16,6 +16,55 @@ if TYPE_CHECKING:
 logger = get_logger(__name__)
 
 
+def _encode_feedback_example(
+    prompt: Sequence[Dict[str, str]],
+    response: Sequence[Dict[str, str]],
+    kl_response: Sequence[Dict[str, str]],
+    system: Optional[str],
+    tools: Optional[str],
+    template: "Template",
+    tokenizer: "PreTrainedTokenizer",
+    processor: Optional["ProcessorMixin"],
+    data_args: "DataArguments",
+) -> Tuple[List[int], List[int], List[int], List[int], bool]:
+    if processor is not None and not hasattr(processor, "image_seq_length"):  # llava-like models
+        prompt[0]["content"] = template.image_token + prompt[0]["content"]
+
+    if response[0]["content"]:  # desired example
+        kto_tag = True
+        messages = prompt + [response[0]]
+    else:  # undesired example
+        kto_tag = False
+        messages = prompt + [response[1]]
+
+    if kl_response[0]["content"]:
+        kl_messages = prompt + [kl_response[0]]
+    else:
+        kl_messages = prompt + [kl_response[1]]
+
+    prompt_ids, response_ids = template.encode_oneturn(
+        tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
+    )
+    _, kl_response_ids = template.encode_oneturn(
+        tokenizer, kl_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
+    )
+
+    if template.efficient_eos:
+        response_ids += [tokenizer.eos_token_id]
+        kl_response_ids += [tokenizer.eos_token_id]
+
+    if processor is not None and hasattr(processor, "image_seq_length"):  # paligemma models
+        image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
+        prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids
+
+    input_ids = prompt_ids + response_ids
+    labels = [IGNORE_INDEX] * len(prompt_ids) + response_ids
+    kl_input_ids = prompt_ids + kl_response_ids
+    kl_labels = [IGNORE_INDEX] * len(prompt_ids) + kl_response_ids
+
+    return input_ids, labels, kl_input_ids, kl_labels, kto_tag
+
+
 def preprocess_feedback_dataset(
     examples: Dict[str, List[Any]],
     template: "Template",
@@ -45,50 +94,17 @@ def preprocess_feedback_dataset(
             logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i]))
             continue
 
-        if processor is not None and not hasattr(processor, "image_seq_length"):  # llava-like models
-            examples["prompt"][i][0]["content"] = template.image_token + examples["prompt"][i][0]["content"]
-
-        if examples["response"][i][0]["content"]:  # desired example
-            kto_tag = True
-            messages = examples["prompt"][i] + [examples["response"][i][0]]
-        else:  # undesired example
-            kto_tag = False
-            messages = examples["prompt"][i] + [examples["response"][i][1]]
-
-        if kl_response[i][0]["content"]:
-            kl_messages = examples["prompt"][i] + [kl_response[i][0]]
-        else:
-            kl_messages = examples["prompt"][i] + [kl_response[i][1]]
-
-        prompt_ids, response_ids = template.encode_oneturn(
-            tokenizer,
-            messages,
-            examples["system"][i],
-            examples["tools"][i],
-            data_args.cutoff_len,
-            data_args.reserved_label_len,
+        input_ids, labels, kl_input_ids, kl_labels, kto_tag = _encode_feedback_example(
+            prompt=examples["prompt"][i],
+            response=examples["response"][i],
+            kl_response=kl_response[i],
+            system=examples["system"][i],
+            tools=examples["tools"][i],
+            template=template,
+            tokenizer=tokenizer,
+            processor=processor,
+            data_args=data_args,
         )
-        _, kl_response_ids = template.encode_oneturn(
-            tokenizer,
-            kl_messages,
-            examples["system"][i],
-            examples["tools"][i],
-            data_args.cutoff_len,
-            data_args.reserved_label_len,
-        )
-
-        if template.efficient_eos:
-            response_ids += [tokenizer.eos_token_id]
-            kl_response_ids += [tokenizer.eos_token_id]
-
-        if processor is not None and hasattr(processor, "image_seq_length"):  # paligemma models
-            image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
-            prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids
-
-        input_ids = prompt_ids + response_ids
-        labels = [IGNORE_INDEX] * len(prompt_ids) + response_ids
-        kl_input_ids = prompt_ids + kl_response_ids
-        kl_labels = [IGNORE_INDEX] * len(prompt_ids) + kl_response_ids
         model_inputs["input_ids"].append(input_ids)
         model_inputs["attention_mask"].append([1] * len(input_ids))
         model_inputs["labels"].append(labels)
diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py
index 8ad3979f..fe984efa 100644
--- a/src/llamafactory/data/processors/pairwise.py
+++ b/src/llamafactory/data/processors/pairwise.py
@@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
 
 from ...extras.constants import IGNORE_INDEX
 from ...extras.logging import get_logger
@@ -16,6 +16,44 @@ if TYPE_CHECKING:
 logger = get_logger(__name__)
 
 
+def _encode_pairwise_example(
+    prompt: Sequence[Dict[str, str]],
+    response: Sequence[Dict[str, str]],
+    system: Optional[str],
+    tools: Optional[str],
+    template: "Template",
+    tokenizer: "PreTrainedTokenizer",
+    processor: Optional["ProcessorMixin"],
+    data_args: "DataArguments",
+) -> Tuple[List[int], List[int], List[int], List[int]]:
+    if processor is not None and not hasattr(processor, "image_seq_length"):  # llava-like models
+        prompt[0]["content"] = template.image_token + prompt[0]["content"]
+
+    chosen_messages = prompt + [response[0]]
+    rejected_messages = prompt + [response[1]]
+    prompt_ids, chosen_ids = template.encode_oneturn(
+        tokenizer, chosen_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
+    )
+    _, rejected_ids = template.encode_oneturn(
+        tokenizer, rejected_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
+    )
+
+    if template.efficient_eos:
+        chosen_ids += [tokenizer.eos_token_id]
+        rejected_ids += [tokenizer.eos_token_id]
+
+    if processor is not None and hasattr(processor, "image_seq_length"):  # paligemma models
+        image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
+        prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids
+
+    chosen_input_ids = prompt_ids + chosen_ids
+    chosen_labels = [IGNORE_INDEX] * len(prompt_ids) + chosen_ids
+    rejected_input_ids = prompt_ids + rejected_ids
+    rejected_labels = [IGNORE_INDEX] * len(prompt_ids) + rejected_ids
+
+    return chosen_input_ids, chosen_labels, rejected_input_ids, rejected_labels
+
+
 def preprocess_pairwise_dataset(
     examples: Dict[str, List[Any]],
     template: "Template",
@@ -43,40 +81,16 @@ def preprocess_pairwise_dataset(
             logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i]))
             continue
 
-        if processor is not None and not hasattr(processor, "image_seq_length"):  # llava-like models
-            examples["prompt"][i][0]["content"] = template.image_token + examples["prompt"][i][0]["content"]
-
-        chosen_messages = examples["prompt"][i] + [examples["response"][i][0]]
-        rejected_messages = examples["prompt"][i] + [examples["response"][i][1]]
-        prompt_ids, chosen_ids = template.encode_oneturn(
-            tokenizer,
-            chosen_messages,
-            examples["system"][i],
-            examples["tools"][i],
-            data_args.cutoff_len,
-            data_args.reserved_label_len,
+        chosen_input_ids, chosen_labels, rejected_input_ids, rejected_labels = _encode_pairwise_example(
+            prompt=examples["prompt"][i],
+            response=examples["response"][i],
+            system=examples["system"][i],
+            tools=examples["tools"][i],
+            template=template,
+            tokenizer=tokenizer,
+            processor=processor,
+            data_args=data_args,
         )
-        _, rejected_ids = template.encode_oneturn(
-            tokenizer,
-            rejected_messages,
-            examples["system"][i],
-            examples["tools"][i],
-            data_args.cutoff_len,
-            data_args.reserved_label_len,
-        )
-
-        if template.efficient_eos:
-            chosen_ids += [tokenizer.eos_token_id]
-            rejected_ids += [tokenizer.eos_token_id]
-
-        if processor is not None and hasattr(processor, "image_seq_length"):  # paligemma models
-            image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
-            prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids
-
-        chosen_input_ids = prompt_ids + chosen_ids
-        chosen_labels = [IGNORE_INDEX] * len(prompt_ids) + chosen_ids
-        rejected_input_ids = prompt_ids + rejected_ids
-        rejected_labels = [IGNORE_INDEX] * len(prompt_ids) + rejected_ids
         model_inputs["chosen_input_ids"].append(chosen_input_ids)
         model_inputs["chosen_attention_mask"].append([1] * len(chosen_input_ids))
         model_inputs["chosen_labels"].append(chosen_labels)
diff --git a/src/llamafactory/data/processors/processor_utils.py b/src/llamafactory/data/processors/processor_utils.py
index abc7c4b2..9903a053 100644
--- a/src/llamafactory/data/processors/processor_utils.py
+++ b/src/llamafactory/data/processors/processor_utils.py
@@ -1,3 +1,4 @@
+import bisect
 from typing import TYPE_CHECKING, List, Sequence
 
 from ...extras.packages import is_pillow_available
@@ -14,14 +15,50 @@ if TYPE_CHECKING:
     from transformers.image_processing_utils import BaseImageProcessor
 
 
+def search_for_fit(numbers: Sequence[int], capacity: int) -> int:
+    r"""
+    Finds the index of largest number that fits into the knapsack with the given capacity.
+    """
+    index = bisect.bisect(numbers, capacity)
+    return -1 if index == 0 else (index - 1)
+
+
+def greedy_knapsack(numbers: List[int], capacity: int) -> List[List[int]]:
+    r"""
+    An efficient greedy algorithm with binary search for the knapsack problem.
+    """
+    numbers.sort()  # sort numbers in ascending order for binary search
+    knapsacks = []
+
+    while numbers:
+        current_knapsack = []
+        remaining_capacity = capacity
+
+        while True:
+            index = search_for_fit(numbers, remaining_capacity)
+            if index == -1:
+                break  # no more numbers fit in this knapsack
+
+            remaining_capacity -= numbers[index]  # update the remaining capacity
+            current_knapsack.append(numbers.pop(index))  # add the number to knapsack
+
+        knapsacks.append(current_knapsack)
+
+    return knapsacks
+
+
 def get_pixel_values(images: Sequence["ImageObject"], processor: "ProcessorMixin") -> "NDArray":
-    # process visual inputs (currently only supports a single image)
+    r"""
+    Processes visual inputs. (currently only supports a single image)
+    """
     image_processor: "BaseImageProcessor" = getattr(processor, "image_processor")
     image = images[0] if len(images) != 0 else Image.new("RGB", (100, 100), (255, 255, 255))
     return image_processor(image, return_tensors="pt")["pixel_values"][0]  # shape (C, H, W)
 
 
 def get_paligemma_token_type_ids(input_len: int, processor: "ProcessorMixin") -> List[int]:
-    # get paligemma token type ids for computing loss
+    r"""
+    Gets paligemma token type ids for computing loss.
+    """
     image_seq_length = getattr(processor, "image_seq_length")
     return [0] * image_seq_length + [1] * (input_len - image_seq_length)
diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py
index 188c9f80..19d60280 100644
--- a/src/llamafactory/data/processors/supervised.py
+++ b/src/llamafactory/data/processors/supervised.py
@@ -1,10 +1,9 @@
-import bisect
 from collections import defaultdict
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
 
 from ...extras.constants import IGNORE_INDEX
 from ...extras.logging import get_logger
-from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
+from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, greedy_knapsack
 
 
 if TYPE_CHECKING:
@@ -18,38 +17,6 @@ if TYPE_CHECKING:
 logger = get_logger(__name__)
 
 
-def search_for_fit(numbers: Sequence[int], capacity: int) -> int:
-    r"""
-    Finds the index of largest number that fits into the knapsack with the given capacity.
-    """
-    index = bisect.bisect(numbers, capacity)
-    return -1 if index == 0 else (index - 1)
-
-
-def greedy_knapsack(numbers: List[int], capacity: int) -> List[List[int]]:
-    r"""
-    An efficient greedy algorithm with binary search for the knapsack problem.
-    """
-    numbers.sort()  # sort numbers in ascending order for binary search
-    knapsacks = []
-
-    while numbers:
-        current_knapsack = []
-        remaining_capacity = capacity
-
-        while True:
-            index = search_for_fit(numbers, remaining_capacity)
-            if index == -1:
-                break  # no more numbers fit in this knapsack
-
-            remaining_capacity -= numbers[index]  # update the remaining capacity
-            current_knapsack.append(numbers.pop(index))  # add the number to knapsack
-
-        knapsacks.append(current_knapsack)
-
-    return knapsacks
-
-
 def _encode_supervised_example(
     prompt: Sequence[Dict[str, str]],
     response: Sequence[Dict[str, str]],
diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py
index e00bde55..f711eeac 100644
--- a/src/llamafactory/data/processors/unsupervised.py
+++ b/src/llamafactory/data/processors/unsupervised.py
@@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
 
 from ...extras.logging import get_logger
 from ..data_utils import Role
@@ -16,6 +16,37 @@ if TYPE_CHECKING:
 logger = get_logger(__name__)
 
 
+def _encode_unsupervised_example(
+    prompt: Sequence[Dict[str, str]],
+    response: Sequence[Dict[str, str]],
+    system: Optional[str],
+    tools: Optional[str],
+    template: "Template",
+    tokenizer: "PreTrainedTokenizer",
+    processor: Optional["ProcessorMixin"],
+    data_args: "DataArguments",
+) -> Tuple[List[int], List[int]]:
+    if processor is not None and not hasattr(processor, "image_seq_length"):  # llava-like models
+        prompt[0]["content"] = template.image_token + prompt[0]["content"]
+
+    if len(response) == 1:
+        messages = prompt + response
+    else:
+        messages = prompt + [{"role": Role.ASSISTANT.value, "content": ""}]
+
+    input_ids, labels = template.encode_oneturn(
+        tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
+    )
+    if template.efficient_eos:
+        labels += [tokenizer.eos_token_id]
+
+    if processor is not None and hasattr(processor, "image_seq_length"):  # paligemma models
+        image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
+        input_ids = [image_token_id] * getattr(processor, "image_seq_length") + input_ids
+
+    return input_ids, labels
+
+
 def preprocess_unsupervised_dataset(
     examples: Dict[str, List[Any]],
     template: "Template",
@@ -35,30 +66,16 @@ def preprocess_unsupervised_dataset(
             logger.warning("Dropped invalid example: {}".format(examples["prompt"][i] + examples["response"][i]))
             continue
 
-        if processor is not None and not hasattr(processor, "image_seq_length"):  # llava-like models
-            examples["prompt"][i][0]["content"] = template.image_token + examples["prompt"][i][0]["content"]
-
-        if len(examples["response"][i]) == 1:
-            messages = examples["prompt"][i] + examples["response"][i]
-        else:
-            messages = examples["prompt"][i] + [{"role": Role.ASSISTANT.value, "content": ""}]
-
-        input_ids, labels = template.encode_oneturn(
-            tokenizer,
-            messages,
-            examples["system"][i],
-            examples["tools"][i],
-            data_args.cutoff_len,
-            data_args.reserved_label_len,
+        input_ids, labels = _encode_unsupervised_example(
+            prompt=examples["prompt"][i],
+            response=examples["response"][i],
+            system=examples["system"][i],
+            tools=examples["tools"][i],
+            template=template,
+            tokenizer=tokenizer,
+            processor=processor,
+            data_args=data_args,
         )
-
-        if template.efficient_eos:
-            labels += [tokenizer.eos_token_id]
-
-        if processor is not None and hasattr(processor, "image_seq_length"):  # paligemma models
-            image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
-            input_ids = [image_token_id] * getattr(processor, "image_seq_length") + input_ids
-
         model_inputs["input_ids"].append(input_ids)
         model_inputs["attention_mask"].append([1] * len(input_ids))
         model_inputs["labels"].append(labels)

From 8cc3bbdc62fdd1a6aca2d34410f71c0779b69337 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Fri, 7 Jun 2024 04:18:05 +0800
Subject: [PATCH 095/162] fix #4120

Former-commit-id: 2a44da678a5e360a9c0f9056397ac9e801329321
---
 README.md                               |  2 +-
 README_zh.md                            |  2 +-
 requirements.txt                        |  2 +-
 src/llamafactory/extras/misc.py         |  2 +-
 src/llamafactory/train/dpo/trainer.py   |  9 +---
 src/llamafactory/train/kto/trainer.py   | 60 ++++++++-----------------
 src/llamafactory/train/trainer_utils.py | 24 +++++++++-
 7 files changed, 47 insertions(+), 54 deletions(-)

diff --git a/README.md b/README.md
index 1358b6e8..036ee648 100644
--- a/README.md
+++ b/README.md
@@ -298,7 +298,7 @@ huggingface-cli login
 | datasets     | 2.16.0  | 2.19.2    |
 | accelerate   | 0.30.1  | 0.30.1    |
 | peft         | 0.11.1  | 0.11.1    |
-| trl          | 0.9.3   | 0.9.3     |
+| trl          | 0.8.6   | 0.9.3     |
 
 | Optional     | Minimum | Recommend |
 | ------------ | ------- | --------- |
diff --git a/README_zh.md b/README_zh.md
index becec988..fb0d790e 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -298,7 +298,7 @@ huggingface-cli login
 | datasets     | 2.16.0  | 2.19.2    |
 | accelerate   | 0.30.1  | 0.30.1    |
 | peft         | 0.11.1  | 0.11.1    |
-| trl          | 0.9.3   | 0.9.3     |
+| trl          | 0.8.6   | 0.9.3     |
 
 | 可选项       | 至少     | 推荐      |
 | ------------ | ------- | --------- |
diff --git a/requirements.txt b/requirements.txt
index 7b6cbee9..9e00555e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ transformers>=4.41.2
 datasets>=2.16.0
 accelerate>=0.30.1
 peft>=0.11.1
-trl>=0.9.3
+trl>=0.8.6
 gradio>=4.0.0
 scipy
 einops
diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py
index 48476f9c..fc33f77e 100644
--- a/src/llamafactory/extras/misc.py
+++ b/src/llamafactory/extras/misc.py
@@ -65,7 +65,7 @@ def check_dependencies() -> None:
         require_version("datasets>=2.16.0", "To fix: pip install datasets>=2.16.0")
         require_version("accelerate>=0.30.1", "To fix: pip install accelerate>=0.30.1")
         require_version("peft>=0.11.1", "To fix: pip install peft>=0.11.1")
-        require_version("trl>=0.9.3", "To fix: pip install trl>=0.9.3")
+        require_version("trl>=0.8.6", "To fix: pip install trl>=0.8.6")
 
 
 def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py
index f64c287f..d860b29a 100644
--- a/src/llamafactory/train/dpo/trainer.py
+++ b/src/llamafactory/train/dpo/trainer.py
@@ -10,7 +10,7 @@ from trl import DPOTrainer
 from trl.trainer import disable_dropout_in_model
 
 from ...extras.constants import IGNORE_INDEX
-from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_ref_context
+from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps, get_ref_context
 
 
 if TYPE_CHECKING:
@@ -155,12 +155,7 @@ class CustomDPOTrainer(DPOTrainer):
 
         all_logits: "torch.Tensor" = model(**batch, return_dict=True, use_cache=False).logits.to(torch.float32)
 
-        all_logps, valid_length = self.get_batch_logps(
-            logits=all_logits,
-            labels=batch["labels"],
-            is_encoder_decoder=self.is_encoder_decoder,
-            label_pad_token_id=self.label_pad_token_id,
-        )
+        all_logps, valid_length = get_batch_logps(logits=all_logits, labels=batch["labels"])
         if self.loss_type in ["ipo", "orpo", "simpo"]:
             all_logps = all_logps / valid_length
 
diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py
index 1610ccfa..22a84e4a 100644
--- a/src/llamafactory/train/kto/trainer.py
+++ b/src/llamafactory/train/kto/trainer.py
@@ -9,7 +9,7 @@ from trl import KTOTrainer
 from trl.trainer import disable_dropout_in_model
 
 from ...extras.constants import IGNORE_INDEX
-from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_ref_context
+from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps, get_ref_context
 
 
 if TYPE_CHECKING:
@@ -98,16 +98,6 @@ class CustomKTOTrainer(KTOTrainer):
             output_dir = output_dir if output_dir is not None else self.args.output_dir
             getattr(self.processor, "image_processor").save_pretrained(output_dir)
 
-    def sft_loss(self, chosen_logits: "torch.FloatTensor", chosen_labels: "torch.LongTensor") -> "torch.Tensor":
-        r"""
-        Computes supervised cross-entropy loss of given labels under the given logits.
-
-        Returns:
-            A tensor of shape (batch_size,) containing the cross-entropy loss of each samples.
-        """
-        all_logps = self.get_batch_logps(chosen_logits, chosen_labels, average_log_prob=True)
-        return -all_logps
-
     def forward(
         self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"], prefix: Literal["", "kl_"] = ""
     ) -> Tuple["torch.Tensor", "torch.Tensor"]:
@@ -127,28 +117,23 @@ class CustomKTOTrainer(KTOTrainer):
 
         logits = model(**model_inputs, return_dict=True, use_cache=False).logits.to(torch.float32)
 
-        logps = self.get_batch_logps(
-            logits=logits,
-            labels=batch["{}labels".format(prefix)],
-            average_log_prob=False,
-            is_encoder_decoder=self.is_encoder_decoder,
-            label_pad_token_id=self.label_pad_token_id,
-        )
-        return logits, logps
+        logps, valid_length = get_batch_logps(logits=logits, labels=batch["{}labels".format(prefix)])
+        return logps, logps / valid_length
 
     def concatenated_forward(
         self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"]
-    ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]:
-        target_logits, target_logps = self.forward(model, batch)
+    ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]:
+        target_logps, target_logps_avg = self.forward(model, batch)
         with torch.no_grad():
-            _, kl_logps = self.forward(model, batch, prefix="kl_")
+            kl_logps, _ = self.forward(model, batch, prefix="kl_")
 
         if len(target_logps) != len(batch["kto_tags"]):
             raise ValueError("Mismatched shape of inputs and labels.")
 
-        chosen_logps, rejected_logps = target_logps[batch["kto_tags"]], target_logps[~batch["kto_tags"]]
-        chosen_logits, rejected_logits = target_logits[batch["kto_tags"]], target_logits[~batch["kto_tags"]]
-        return chosen_logps, rejected_logps, chosen_logits, rejected_logits, kl_logps
+        chosen_logps = target_logps[batch["kto_tags"]]
+        rejected_logps = target_logps[~batch["kto_tags"]]
+        chosen_logps_avg = target_logps_avg[batch["kto_tags"]]
+        return chosen_logps, rejected_logps, kl_logps, chosen_logps_avg
 
     def compute_reference_log_probs(
         self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"]
@@ -164,13 +149,9 @@ class CustomKTOTrainer(KTOTrainer):
             ref_context = nullcontext()
 
         with torch.no_grad(), ref_context:
-            (
-                reference_chosen_logps,
-                reference_rejected_logps,
-                _,
-                _,
-                reference_kl_logps,
-            ) = self.concatenated_forward(ref_model, batch)
+            reference_chosen_logps, reference_rejected_logps, reference_kl_logps, _ = self.concatenated_forward(
+                ref_model, batch
+            )
 
         return reference_chosen_logps, reference_rejected_logps, reference_kl_logps
 
@@ -183,14 +164,9 @@ class CustomKTOTrainer(KTOTrainer):
         Computes the DPO loss and other metrics for the given batch of inputs for train or test.
         """
         metrics = {}
-        (
-            policy_chosen_logps,
-            policy_rejected_logps,
-            policy_chosen_logits,
-            _,
-            policy_kl_logps,
-        ) = self.concatenated_forward(model, batch)
-
+        policy_chosen_logps, policy_rejected_logps, policy_kl_logps, policy_chosen_logps_avg = (
+            self.concatenated_forward(model, batch)
+        )
         reference_chosen_logps, reference_rejected_logps, reference_kl_logps = self.compute_reference_log_probs(
             model, batch
         )
@@ -205,8 +181,8 @@ class CustomKTOTrainer(KTOTrainer):
         losses = losses.nanmean()
 
         if self.ftx_gamma > 1e-6 and len(policy_chosen_logps) > 0:  # remember to rescale
-            sft_loss = self.sft_loss(policy_chosen_logits, batch["labels"][batch["kto_tags"]])
-            losses += self.ftx_gamma * sft_loss.nanmean() / len(policy_chosen_logits) * len(batch["labels"])
+            sft_loss = -policy_chosen_logps_avg
+            losses += self.ftx_gamma * sft_loss.nanmean() / len(policy_chosen_logps) * len(batch["labels"])
 
         num_chosen = torch.Tensor([len(chosen_rewards)]).to(self.accelerator.device)
         num_rejected = torch.Tensor([len(rejected_rewards)]).to(self.accelerator.device)
diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py
index 2b33af1c..0ddcdb11 100644
--- a/src/llamafactory/train/trainer_utils.py
+++ b/src/llamafactory/train/trainer_utils.py
@@ -1,5 +1,5 @@
 from contextlib import contextmanager
-from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
 from transformers import Trainer
@@ -7,6 +7,7 @@ from transformers.optimization import get_scheduler
 from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
 from transformers.trainer_pt_utils import get_parameter_names
 
+from ..extras.constants import IGNORE_INDEX
 from ..extras.logging import get_logger
 from ..extras.packages import is_galore_available
 from ..hparams import FinetuningArguments, ModelArguments
@@ -399,3 +400,24 @@ def create_custom_scheduler(
 
         for param in optimizer_dict.keys():
             param.register_post_accumulate_grad_hook(scheduler_hook)
+
+
+def get_batch_logps(
+    logits: "torch.Tensor", labels: "torch.Tensor", label_pad_token_id: int = IGNORE_INDEX
+) -> Tuple["torch.Tensor", "torch.Tensor"]:
+    r"""
+    Computes the log probabilities of the given labels under the given logits.
+
+    Returns:
+        logps: A tensor of shape (batch_size,) containing the sum of log probabilities.
+        valid_length: A tensor of shape (batch_size,) containing the number of non-masked tokens.
+    """
+    if logits.shape[:-1] != labels.shape:
+        raise ValueError("Logits (batchsize x seqlen) and labels must have the same shape.")
+
+    labels = labels[:, 1:].clone()
+    logits = logits[:, :-1, :]
+    loss_mask = labels != label_pad_token_id
+    labels[labels == label_pad_token_id] = 0  # dummy token
+    per_token_logps = torch.gather(logits.log_softmax(-1), dim=2, index=labels.unsqueeze(2)).squeeze(2)
+    return (per_token_logps * loss_mask).sum(-1), loss_mask.sum(-1)

From 7b7dc2f42390893f524f62f1e528b3390e905982 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Fri, 7 Jun 2024 04:48:29 +0800
Subject: [PATCH 096/162] fix ppo in trl 0.8.6

Former-commit-id: 5e0d66a0d80b4bd4a8506e2317209d8fb9d25ff6
---
 README.md                               |  2 +-
 README_zh.md                            |  2 +-
 src/llamafactory/train/ppo/ppo_utils.py | 27 +++++++++++++++------
 src/llamafactory/train/ppo/trainer.py   | 31 ++++++++++++-------------
 4 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index 036ee648..77d9c709 100644
--- a/README.md
+++ b/README.md
@@ -298,7 +298,7 @@ huggingface-cli login
 | datasets     | 2.16.0  | 2.19.2    |
 | accelerate   | 0.30.1  | 0.30.1    |
 | peft         | 0.11.1  | 0.11.1    |
-| trl          | 0.8.6   | 0.9.3     |
+| trl          | 0.8.6   | 0.9.4     |
 
 | Optional     | Minimum | Recommend |
 | ------------ | ------- | --------- |
diff --git a/README_zh.md b/README_zh.md
index fb0d790e..9a52a963 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -298,7 +298,7 @@ huggingface-cli login
 | datasets     | 2.16.0  | 2.19.2    |
 | accelerate   | 0.30.1  | 0.30.1    |
 | peft         | 0.11.1  | 0.11.1    |
-| trl          | 0.8.6   | 0.9.3     |
+| trl          | 0.8.6   | 0.9.4     |
 
 | 可选项       | 至少     | 推荐      |
 | ------------ | ------- | --------- |
diff --git a/src/llamafactory/train/ppo/ppo_utils.py b/src/llamafactory/train/ppo/ppo_utils.py
index 570409f2..cfda0b3d 100644
--- a/src/llamafactory/train/ppo/ppo_utils.py
+++ b/src/llamafactory/train/ppo/ppo_utils.py
@@ -1,7 +1,9 @@
 import json
+from contextlib import nullcontext
 from typing import TYPE_CHECKING, Dict, List, Literal, Optional
 
 import torch
+from transformers.integrations import is_deepspeed_zero3_enabled
 
 from ...extras.packages import is_requests_available
 
@@ -28,16 +30,27 @@ def get_rewards_from_server(server_url: str, messages: List[str]) -> List[torch.
 
 def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["default", "reward"]) -> None:
     r"""
-    Replaces the default/reward modules in the model. The model is already unwrapped (and gathered).
+    Replaces the default/reward modules in the model. The model is already unwrapped.
     """
-    if target == "reward":  # save default head temporarily
-        setattr(model, "default_head_weight", model.v_head.summary.weight.data.detach().clone())
-        setattr(model, "default_head_bias", model.v_head.summary.bias.data.detach().clone())
+    if is_deepspeed_zero3_enabled():
+        import deepspeed  # type: ignore
+
+        params = [model.v_head.summary.weight, model.v_head.summary.bias]
+        context_maybe_zero3 = deepspeed.zero.GatheredParameters(params, modifier_rank=0)
+    else:
+        context_maybe_zero3 = nullcontext()
 
     model.pretrained_model.set_adapter(target)  # set the LoRA adapter to be active
-    device = model.v_head.summary.weight.device
-    model.v_head.summary.weight.data = model.get_buffer("{}_head_weight".format(target)).detach().clone().to(device)
-    model.v_head.summary.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone().to(device)
+    with context_maybe_zero3:
+        if target == "reward":  # save default head temporarily
+            setattr(model, "default_head_weight", model.v_head.summary.weight.data.detach().clone())
+            setattr(model, "default_head_bias", model.v_head.summary.bias.data.detach().clone())
+
+        device = model.v_head.summary.weight.device
+        model.v_head.summary.weight.data = (
+            model.get_buffer("{}_head_weight".format(target)).detach().clone().to(device)
+        )
+        model.v_head.summary.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone().to(device)
 
 
 def dump_layernorm(model: "PreTrainedModel") -> Dict[str, torch.Tensor]:
diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py
index 7addfc3c..0ad1b8e8 100644
--- a/src/llamafactory/train/ppo/trainer.py
+++ b/src/llamafactory/train/ppo/trainer.py
@@ -309,12 +309,6 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
         )
         return lr_scheduler
 
-    def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None:
-        super()._save(output_dir, state_dict)
-        if self.processor is not None:
-            output_dir = output_dir if output_dir is not None else self.args.output_dir
-            getattr(self.processor, "image_processor").save_pretrained(output_dir)
-
     @torch.no_grad()
     def get_inputs(self, batch: Dict[str, "torch.Tensor"]) -> Tuple[List["torch.Tensor"], List["torch.Tensor"]]:
         r"""
@@ -326,6 +320,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
                 batch[k] = v[:, start_index:]
 
         with unwrap_model_for_generation(self.model, self.accelerator) as unwrapped_model:
+            unwrapped_model = self.accelerator.unwrap_model(self.model)  # issue in trl v0.8.6
             if self.model_args.upcast_layernorm:
                 layernorm_params = dump_layernorm(unwrapped_model)
 
@@ -369,19 +364,19 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
             return get_rewards_from_server(self.reward_model, messages)
 
         batch = self.prepare_model_inputs(queries, responses)
+        unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model)
 
-        with unwrap_model_for_generation(self.model, self.accelerator) as unwrapped_model:
-            if self.finetuning_args.reward_model_type == "lora":
-                replace_model(unwrapped_model, target="reward")
-                reward_model = self.model
-            else:
-                reward_model = self.reward_model
+        if self.finetuning_args.reward_model_type == "lora":
+            replace_model(unwrapped_model, target="reward")
+            reward_model = self.model
+        else:
+            reward_model = self.reward_model
 
-            with self.amp_context:  # support bf16
-                _, _, values = reward_model(**batch, output_hidden_states=True, return_dict=True, use_cache=False)
+        with unwrap_model_for_generation(reward_model, self.accelerator), self.amp_context:  # support bf16
+            _, _, values = reward_model(**batch, output_hidden_states=True, return_dict=True, use_cache=False)
 
-            if self.finetuning_args.reward_model_type == "lora":
-                replace_model(unwrapped_model, target="default")
+        if self.finetuning_args.reward_model_type == "lora":
+            replace_model(unwrapped_model, target="default")
 
         if self.is_chatglm_model:  # assume same architecture
             values = torch.transpose(values, 0, 1)
@@ -482,3 +477,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
                 self._save(output_dir, state_dict={})
                 remove_dummy_checkpoint(True, output_dir, [WEIGHTS_NAME, SAFE_WEIGHTS_NAME])
                 self.model.save_checkpoint(output_dir)
+
+            if self.processor is not None:
+                output_dir = output_dir if output_dir is not None else self.args.output_dir
+                getattr(self.processor, "image_processor").save_pretrained(output_dir)

From 90f706c05bc4dfeb64229bcdb1cf4834c498e354 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Fri, 7 Jun 2024 05:14:19 +0800
Subject: [PATCH 097/162] fix ppo trainer save zero3 model

accelerator.get_state_dict(ds_model) should be called at all ranks


Former-commit-id: 3a0f60f0aa072531e4ae5819ec00c8fa42aa0913
---
 src/llamafactory/train/ppo/trainer.py | 29 ++++++++++++++++++---------
 src/llamafactory/train/sft/metric.py  |  3 +++
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py
index 0ad1b8e8..2e1288e4 100644
--- a/src/llamafactory/train/ppo/trainer.py
+++ b/src/llamafactory/train/ppo/trainer.py
@@ -123,9 +123,8 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
 
         self.state = TrainerState()
         self.control = TrainerControl()
-        self.is_deepspeed_enabled = self.accelerator.distributed_type == "DEEPSPEED" and hasattr(
-            self.accelerator.state, "deepspeed_plugin"
-        )
+        self.is_deepspeed_enabled = getattr(self.accelerator.state, "deepspeed_plugin", None) is not None
+        self.is_fsdp_enabled = getattr(self.accelerator.state, "fsdp_plugin", None) is not None
         self.log_callback, self.save_callback = callbacks[0], callbacks[1]
         assert isinstance(self.log_callback, LogCallback) and isinstance(self.save_callback, FixValueHeadModelCallback)
 
@@ -466,18 +465,28 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
 
         Subclass and override to inject custom behavior.
         """
-        if self.args.should_save:
+        if output_dir is None:
+            output_dir = self.args.output_dir
+
+        if self.is_fsdp_enabled or self.is_deepspeed_enabled:
             try:
-                self._save(output_dir, state_dict=self.accelerator.get_state_dict(self.model))
+                state_dict = self.accelerator.get_state_dict(self.model)  # must be called at all ranks
+                if self.args.should_save:
+                    self._save(output_dir, state_dict=state_dict)
             except ValueError:
                 logger.warning(
                     " stage3_gather_16bit_weights_on_model_save=false. Saving the full checkpoint instead,"
                     " use zero_to_fp32.py to recover weights"
                 )
-                self._save(output_dir, state_dict={})
-                remove_dummy_checkpoint(True, output_dir, [WEIGHTS_NAME, SAFE_WEIGHTS_NAME])
+                if self.args.should_save:
+                    self._save(output_dir, state_dict={})
+                # remove the dummy state_dict
+                remove_dummy_checkpoint(self.args.should_save, output_dir, [WEIGHTS_NAME, SAFE_WEIGHTS_NAME])
                 self.model.save_checkpoint(output_dir)
 
-            if self.processor is not None:
-                output_dir = output_dir if output_dir is not None else self.args.output_dir
-                getattr(self.processor, "image_processor").save_pretrained(output_dir)
+        elif self.args.should_save:
+            self._save(output_dir)
+
+        if self.processor is not None and self.args.should_save:
+            output_dir = output_dir if output_dir is not None else self.args.output_dir
+            getattr(self.processor, "image_processor").save_pretrained(output_dir)
diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py
index d1af4c17..b135fcfb 100644
--- a/src/llamafactory/train/sft/metric.py
+++ b/src/llamafactory/train/sft/metric.py
@@ -10,12 +10,15 @@ from ...extras.packages import is_jieba_available, is_nltk_available, is_rouge_a
 if TYPE_CHECKING:
     from transformers.tokenization_utils import PreTrainedTokenizer
 
+
 if is_jieba_available():
     import jieba  # type: ignore
 
+
 if is_nltk_available():
     from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu
 
+
 if is_rouge_available():
     from rouge_chinese import Rouge
 

From 4785c723f84e992d396581d7bb40568fb14604f5 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Fri, 7 Jun 2024 05:19:21 +0800
Subject: [PATCH 098/162] tiny fix

Former-commit-id: 0621bcad1dfbe8ce2464f741d4256c5df2a8d1b6
---
 src/llamafactory/train/ppo/ppo_utils.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/llamafactory/train/ppo/ppo_utils.py b/src/llamafactory/train/ppo/ppo_utils.py
index cfda0b3d..fec3fc1e 100644
--- a/src/llamafactory/train/ppo/ppo_utils.py
+++ b/src/llamafactory/train/ppo/ppo_utils.py
@@ -32,10 +32,11 @@ def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["d
     r"""
     Replaces the default/reward modules in the model. The model is already unwrapped.
     """
+    v_head_layer = model.v_head.summary
     if is_deepspeed_zero3_enabled():
         import deepspeed  # type: ignore
 
-        params = [model.v_head.summary.weight, model.v_head.summary.bias]
+        params = [v_head_layer.weight, v_head_layer.bias]
         context_maybe_zero3 = deepspeed.zero.GatheredParameters(params, modifier_rank=0)
     else:
         context_maybe_zero3 = nullcontext()
@@ -43,14 +44,12 @@ def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["d
     model.pretrained_model.set_adapter(target)  # set the LoRA adapter to be active
     with context_maybe_zero3:
         if target == "reward":  # save default head temporarily
-            setattr(model, "default_head_weight", model.v_head.summary.weight.data.detach().clone())
-            setattr(model, "default_head_bias", model.v_head.summary.bias.data.detach().clone())
+            setattr(model, "default_head_weight", v_head_layer.weight.data.detach().clone())
+            setattr(model, "default_head_bias", v_head_layer.bias.data.detach().clone())
 
-        device = model.v_head.summary.weight.device
-        model.v_head.summary.weight.data = (
-            model.get_buffer("{}_head_weight".format(target)).detach().clone().to(device)
-        )
-        model.v_head.summary.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone().to(device)
+        device = v_head_layer.weight.device
+        v_head_layer.weight.data = model.get_buffer("{}_head_weight".format(target)).detach().clone().to(device)
+        v_head_layer.bias.data = model.get_buffer("{}_head_bias".format(target)).detach().clone().to(device)
 
 
 def dump_layernorm(model: "PreTrainedModel") -> Dict[str, torch.Tensor]:

From 4377ad4391754fbdbf6d8fc95bd677a184a53ad1 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Fri, 7 Jun 2024 19:16:06 +0800
Subject: [PATCH 099/162] fix #4137

Former-commit-id: cdc0d6f5a2e5040e145c82c4801f37bd76529047
---
 src/llamafactory/cli.py          | 8 ++------
 src/llamafactory/webui/runner.py | 3 +++
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py
index b9e734e4..5042e53c 100644
--- a/src/llamafactory/cli.py
+++ b/src/llamafactory/cli.py
@@ -72,12 +72,8 @@ def main():
     elif command == Command.EXPORT:
         export_model()
     elif command == Command.TRAIN:
-        disable_torchrun = os.environ.get("TORCHRUN_DISABLED", "0").lower() in ["true", "1"]
-        if disable_torchrun and get_device_count() > 1:
-            logger.warning("`torchrun` cannot be disabled when device count > 1.")
-            disable_torchrun = False
-
-        if (not disable_torchrun) and (get_device_count() > 0):
+        force_torchrun = os.environ.get("FORCE_TORCHRUN", "0").lower() in ["true", "1"]
+        if force_torchrun or get_device_count() > 1:
             master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
             master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
             logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))
diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index e8fdd129..c046152c 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -278,6 +278,9 @@ class Runner:
             args = self._parse_train_args(data) if do_train else self._parse_eval_args(data)
             env = deepcopy(os.environ)
             env["LLAMABOARD_ENABLED"] = "1"
+            if args.get("deepspeed", None) is not None:
+                env["FORCE_TORCHRUN"] = "1"
+
             self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True)
             yield from self.monitor()
 

From 5606780ab69d4a0d9c58cab501a7599ef7e4ad27 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 00:22:16 +0800
Subject: [PATCH 100/162] add resume args in webui

Former-commit-id: 1d86ad768b1f36e54b4c2a9f18f6ea5a7df04c90
---
 src/llamafactory/extras/constants.py       |  6 ++-
 src/llamafactory/model/adapter.py          |  4 +-
 src/llamafactory/webui/common.py           | 17 ++++----
 src/llamafactory/webui/components/top.py   |  3 +-
 src/llamafactory/webui/components/train.py | 10 ++++-
 src/llamafactory/webui/runner.py           | 48 ++++++++++++++++------
 src/llamafactory/webui/utils.py            | 29 ++++---------
 7 files changed, 68 insertions(+), 49 deletions(-)

diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 4099fe56..7d96fb5f 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -35,6 +35,8 @@ IGNORE_INDEX = -100
 
 LAYERNORM_NAMES = {"norm", "ln"}
 
+LLAMABOARD_CONFIG = "llamaboard_config.yaml"
+
 METHODS = ["full", "freeze", "lora"]
 
 MOD_SUPPORTED_MODELS = {"bloom", "falcon", "gemma", "llama", "mistral", "mixtral", "phi", "starcoder2"}
@@ -47,10 +49,10 @@ SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"]
 
 SUPPORTED_MODELS = OrderedDict()
 
-TRAINER_CONFIG = "trainer_config.yaml"
-
 TRAINER_LOG = "trainer_log.jsonl"
 
+TRAINING_ARGS = "training_args.yaml"
+
 TRAINING_STAGES = {
     "Supervised Fine-Tuning": "sft",
     "Reward Modeling": "rm",
diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py
index 1a77d613..d17873f7 100644
--- a/src/llamafactory/model/adapter.py
+++ b/src/llamafactory/model/adapter.py
@@ -50,7 +50,7 @@ def init_adapter(
         logger.info("Upcasting trainable params to float32.")
         cast_trainable_params_to_fp32 = True
 
-    if finetuning_args.finetuning_type == "full" and is_trainable:
+    if is_trainable and finetuning_args.finetuning_type == "full":
         logger.info("Fine-tuning method: Full")
 
         forbidden_modules = set()
@@ -67,7 +67,7 @@ def init_adapter(
             else:
                 param.requires_grad_(False)
 
-    if finetuning_args.finetuning_type == "freeze" and is_trainable:
+    if is_trainable and finetuning_args.finetuning_type == "freeze":
         logger.info("Fine-tuning method: Freeze")
 
         if model_args.visual_inputs:
diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py
index 304b56a5..37b38df0 100644
--- a/src/llamafactory/webui/common.py
+++ b/src/llamafactory/webui/common.py
@@ -50,13 +50,6 @@ def get_config_path() -> os.PathLike:
     return os.path.join(DEFAULT_CACHE_DIR, USER_CONFIG)
 
 
-def get_arg_save_path(config_path: str) -> os.PathLike:
-    r"""
-    Gets the path to saved arguments.
-    """
-    return os.path.join(DEFAULT_CONFIG_DIR, config_path)
-
-
 def load_config() -> Dict[str, Any]:
     r"""
     Loads user config if exists.
@@ -77,24 +70,28 @@ def save_config(lang: str, model_name: Optional[str] = None, model_path: Optiona
     user_config["lang"] = lang or user_config["lang"]
     if model_name:
         user_config["last_model"] = model_name
+
+    if model_name and model_path:
         user_config["path_dict"][model_name] = model_path
+
     with open(get_config_path(), "w", encoding="utf-8") as f:
         safe_dump(user_config, f)
 
 
-def get_model_path(model_name: str) -> Optional[str]:
+def get_model_path(model_name: str) -> str:
     r"""
     Gets the model path according to the model name.
     """
     user_config = load_config()
-    path_dict: Dict[DownloadSource, str] = SUPPORTED_MODELS.get(model_name, defaultdict(str))
-    model_path = user_config["path_dict"].get(model_name, None) or path_dict.get(DownloadSource.DEFAULT, None)
+    path_dict: Dict["DownloadSource", str] = SUPPORTED_MODELS.get(model_name, defaultdict(str))
+    model_path = user_config["path_dict"].get(model_name, "") or path_dict.get(DownloadSource.DEFAULT, "")
     if (
         use_modelscope()
         and path_dict.get(DownloadSource.MODELSCOPE)
         and model_path == path_dict.get(DownloadSource.DEFAULT)
     ):  # replace path
         model_path = path_dict.get(DownloadSource.MODELSCOPE)
+
     return model_path
 
 
diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py
index c794d0aa..fd0ead3d 100644
--- a/src/llamafactory/webui/components/top.py
+++ b/src/llamafactory/webui/components/top.py
@@ -36,7 +36,8 @@ def create_top() -> Dict[str, "Component"]:
             visual_inputs = gr.Checkbox(scale=1)
 
     model_name.change(get_model_info, [model_name], [model_path, template, visual_inputs], queue=False)
-    model_path.change(save_config, inputs=[lang, model_name, model_path], queue=False)
+    model_name.input(save_config, inputs=[lang, model_name], queue=False)
+    model_path.input(save_config, inputs=[lang, model_name, model_path], queue=False)
     finetuning_type.change(can_quantize, [finetuning_type], [quantization_bit], queue=False)
     checkpoint_path.focus(list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False)
 
diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py
index 74f8ef2a..72dfc858 100644
--- a/src/llamafactory/webui/components/train.py
+++ b/src/llamafactory/webui/components/train.py
@@ -6,7 +6,7 @@ from ...extras.constants import TRAINING_STAGES
 from ...extras.misc import get_device_count
 from ...extras.packages import is_gradio_available
 from ..common import DEFAULT_DATA_DIR, list_checkpoints, list_datasets
-from ..utils import change_stage, check_output_dir, list_config_paths, list_output_dirs
+from ..utils import change_stage, list_config_paths, list_output_dirs
 from .data import create_preview_box
 
 
@@ -319,7 +319,13 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
     finetuning_type.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)
     output_dir.change(
         list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], concurrency_limit=None
-    ).then(check_output_dir, inputs=[lang, model_name, finetuning_type, output_dir], concurrency_limit=None)
+    )
+    output_dir.input(
+        engine.runner.check_output_dir,
+        [lang, model_name, finetuning_type, output_dir],
+        list(input_elems) + [output_box],
+        concurrency_limit=None,
+    )
     config_path.change(list_config_paths, [current_time], [config_path], queue=False)
 
     return elem_dict
diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index c046152c..35014628 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -5,11 +5,11 @@ from typing import TYPE_CHECKING, Any, Dict, Generator, Optional
 
 from transformers.trainer import TRAINING_ARGS_NAME
 
-from ..extras.constants import PEFT_METHODS, TRAINING_STAGES
+from ..extras.constants import LLAMABOARD_CONFIG, PEFT_METHODS, TRAINING_STAGES
 from ..extras.misc import is_gpu_or_npu_available, torch_gc
 from ..extras.packages import is_gradio_available
-from .common import DEFAULT_CACHE_DIR, get_save_dir, load_config
-from .locales import ALERTS
+from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_save_dir, load_config
+from .locales import ALERTS, LOCALES
 from .utils import abort_leaf_process, gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd
 
 
@@ -276,6 +276,10 @@ class Runner:
         else:
             self.do_train, self.running_data = do_train, data
             args = self._parse_train_args(data) if do_train else self._parse_eval_args(data)
+
+            os.makedirs(args["output_dir"], exist_ok=True)
+            save_args(os.path.join(args["output_dir"], LLAMABOARD_CONFIG), self._form_config_dict(data))
+
             env = deepcopy(os.environ)
             env["LLAMABOARD_ENABLED"] = "1"
             if args.get("deepspeed", None) is not None:
@@ -284,6 +288,16 @@ class Runner:
             self.trainer = Popen("llamafactory-cli train {}".format(save_cmd(args)), env=env, shell=True)
             yield from self.monitor()
 
+    def _form_config_dict(self, data: Dict["Component", Any]) -> Dict[str, Any]:
+        config_dict = {}
+        skip_ids = ["top.lang", "top.model_path", "train.output_dir", "train.config_path", "train.device_count"]
+        for elem, value in data.items():
+            elem_id = self.manager.get_id_by_elem(elem)
+            if elem_id not in skip_ids:
+                config_dict[elem_id] = value
+
+        return config_dict
+
     def preview_train(self, data):
         yield from self._preview(data, do_train=True)
 
@@ -349,28 +363,24 @@ class Runner:
         }
         yield return_dict
 
-    def save_args(self, data: dict):
+    def save_args(self, data):
         output_box = self.manager.get_elem_by_id("train.output_box")
         error = self._initialize(data, do_train=True, from_preview=True)
         if error:
             gr.Warning(error)
             return {output_box: error}
 
-        config_dict: Dict[str, Any] = {}
         lang = data[self.manager.get_elem_by_id("top.lang")]
         config_path = data[self.manager.get_elem_by_id("train.config_path")]
-        skip_ids = ["top.lang", "top.model_path", "train.output_dir", "train.config_path", "train.device_count"]
-        for elem, value in data.items():
-            elem_id = self.manager.get_id_by_elem(elem)
-            if elem_id not in skip_ids:
-                config_dict[elem_id] = value
+        os.makedirs(DEFAULT_CONFIG_DIR, exist_ok=True)
+        save_path = os.path.join(DEFAULT_CONFIG_DIR, config_path)
 
-        save_path = save_args(config_path, config_dict)
+        save_args(save_path, self._form_config_dict(data))
         return {output_box: ALERTS["info_config_saved"][lang] + save_path}
 
     def load_args(self, lang: str, config_path: str):
         output_box = self.manager.get_elem_by_id("train.output_box")
-        config_dict = load_args(config_path)
+        config_dict = load_args(os.path.join(DEFAULT_CONFIG_DIR, config_path))
         if config_dict is None:
             gr.Warning(ALERTS["err_config_not_found"][lang])
             return {output_box: ALERTS["err_config_not_found"][lang]}
@@ -380,3 +390,17 @@ class Runner:
             output_dict[self.manager.get_elem_by_id(elem_id)] = value
 
         return output_dict
+
+    def check_output_dir(self, lang: str, model_name: str, finetuning_type: str, output_dir: str):
+        output_box = self.manager.get_elem_by_id("train.output_box")
+        output_dict: Dict["Component", Any] = {output_box: LOCALES["output_box"][lang]["value"]}
+        if model_name and output_dir and os.path.isdir(get_save_dir(model_name, finetuning_type, output_dir)):
+            gr.Warning(ALERTS["warn_output_dir_exists"][lang])
+            output_dict[output_box] = ALERTS["warn_output_dir_exists"][lang]
+
+            output_dir = get_save_dir(model_name, finetuning_type, output_dir)
+            config_dict = load_args(os.path.join(output_dir, LLAMABOARD_CONFIG))  # load llamaboard config
+            for elem_id, value in config_dict.items():
+                output_dict[self.manager.get_elem_by_id(elem_id)] = value
+
+        return output_dict
diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py
index 23e62dca..e39f2aa4 100644
--- a/src/llamafactory/webui/utils.py
+++ b/src/llamafactory/webui/utils.py
@@ -8,10 +8,10 @@ import psutil
 from transformers.trainer_utils import get_last_checkpoint
 from yaml import safe_dump, safe_load
 
-from ..extras.constants import PEFT_METHODS, RUNNING_LOG, TRAINER_CONFIG, TRAINER_LOG, TRAINING_STAGES
+from ..extras.constants import PEFT_METHODS, RUNNING_LOG, TRAINER_LOG, TRAINING_ARGS, TRAINING_STAGES
 from ..extras.packages import is_gradio_available, is_matplotlib_available
 from ..extras.ploting import gen_loss_plot
-from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_arg_save_path, get_save_dir
+from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_save_dir
 from .locales import ALERTS
 
 
@@ -93,10 +93,10 @@ def save_cmd(args: Dict[str, Any]) -> str:
     output_dir = args["output_dir"]
     os.makedirs(output_dir, exist_ok=True)
 
-    with open(os.path.join(output_dir, TRAINER_CONFIG), "w", encoding="utf-8") as f:
+    with open(os.path.join(output_dir, TRAINING_ARGS), "w", encoding="utf-8") as f:
         safe_dump(clean_cmd(args), f)
 
-    return os.path.join(output_dir, TRAINER_CONFIG)
+    return os.path.join(output_dir, TRAINING_ARGS)
 
 
 def get_eval_results(path: os.PathLike) -> str:
@@ -157,22 +157,19 @@ def load_args(config_path: str) -> Optional[Dict[str, Any]]:
     Loads saved arguments.
     """
     try:
-        with open(get_arg_save_path(config_path), "r", encoding="utf-8") as f:
+        with open(config_path, "r", encoding="utf-8") as f:
             return safe_load(f)
     except Exception:
         return None
 
 
-def save_args(config_path: str, config_dict: Dict[str, Any]) -> str:
+def save_args(config_path: str, config_dict: Dict[str, Any]):
     r"""
     Saves arguments.
     """
-    os.makedirs(DEFAULT_CONFIG_DIR, exist_ok=True)
-    with open(get_arg_save_path(config_path), "w", encoding="utf-8") as f:
+    with open(config_path, "w", encoding="utf-8") as f:
         safe_dump(config_dict, f)
 
-    return str(get_arg_save_path(config_path))
-
 
 def list_config_paths(current_time: str) -> "gr.Dropdown":
     r"""
@@ -181,13 +178,13 @@ def list_config_paths(current_time: str) -> "gr.Dropdown":
     config_files = ["{}.yaml".format(current_time)]
     if os.path.isdir(DEFAULT_CONFIG_DIR):
         for file_name in os.listdir(DEFAULT_CONFIG_DIR):
-            if file_name.endswith(".yaml"):
+            if file_name.endswith(".yaml") and file_name not in config_files:
                 config_files.append(file_name)
 
     return gr.Dropdown(choices=config_files)
 
 
-def list_output_dirs(model_name: str, finetuning_type: str, current_time: str) -> "gr.Dropdown":
+def list_output_dirs(model_name: Optional[str], finetuning_type: str, current_time: str) -> "gr.Dropdown":
     r"""
     Lists all the directories that can resume from.
     """
@@ -203,14 +200,6 @@ def list_output_dirs(model_name: str, finetuning_type: str, current_time: str) -
     return gr.Dropdown(choices=output_dirs)
 
 
-def check_output_dir(lang: str, model_name: str, finetuning_type: str, output_dir: str) -> None:
-    r"""
-    Check if output dir exists.
-    """
-    if model_name and output_dir and os.path.isdir(get_save_dir(model_name, finetuning_type, output_dir)):
-        gr.Warning(ALERTS["warn_output_dir_exists"][lang])
-
-
 def create_ds_config() -> None:
     r"""
     Creates deepspeed config.

From 17c66e9502d5e4e63c4859be395134075bb24bde Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Sat, 8 Jun 2024 00:45:02 +0800
Subject: [PATCH 101/162] fix #4139

Former-commit-id: c025a4d74f293c14c2705e68af20a82a84608520
---
 src/llamafactory/model/adapter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py
index d17873f7..bd14a52f 100644
--- a/src/llamafactory/model/adapter.py
+++ b/src/llamafactory/model/adapter.py
@@ -239,7 +239,7 @@ def init_adapter(
                 )
                 model = get_peft_model(model, lora_config)
 
-        if cast_trainable_params_to_fp32:
+        if is_trainable and cast_trainable_params_to_fp32:
             for param in filter(lambda p: p.requires_grad, model.parameters()):
                 param.data = param.data.to(torch.float32)
 

From 35a36d96e546e72a8ea99f76436dd3366321744c Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 00:47:23 +0800
Subject: [PATCH 102/162] reorganize adapter code

Former-commit-id: b26c2df9d97f4efffccbf7d28de13619b43f10dd
---
 src/llamafactory/hparams/model_args.py |   9 +-
 src/llamafactory/model/adapter.py      | 408 +++++++++++++------------
 2 files changed, 224 insertions(+), 193 deletions(-)

diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 20271173..6352a420 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -15,7 +15,12 @@ class ModelArguments:
     )
     adapter_name_or_path: Optional[str] = field(
         default=None,
-        metadata={"help": "Path to the adapter weight or identifier from huggingface.co/models."},
+        metadata={
+            "help": (
+                "Path to the adapter weight or identifier from huggingface.co/models. "
+                "Use commas to separate multiple adapters."
+            )
+        },
     )
     cache_dir: Optional[str] = field(
         default=None,
@@ -35,7 +40,7 @@ class ModelArguments:
     )
     new_special_tokens: Optional[str] = field(
         default=None,
-        metadata={"help": "Special tokens to be added into the tokenizer."},
+        metadata={"help": "Special tokens to be added into the tokenizer. Use commas to separate multiple tokens."},
     )
     model_revision: str = field(
         default="main",
diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py
index bd14a52f..f4e501a7 100644
--- a/src/llamafactory/model/adapter.py
+++ b/src/llamafactory/model/adapter.py
@@ -21,6 +21,218 @@ if TYPE_CHECKING:
 logger = get_logger(__name__)
 
 
+def _setup_full_tuning(
+    model: "PreTrainedModel",
+    model_args: "ModelArguments",
+    finetuning_args: "FinetuningArguments",
+    cast_trainable_params_to_fp32: bool,
+) -> None:
+    logger.info("Fine-tuning method: Full")
+    forbidden_modules = set()
+    if model_args.visual_inputs and finetuning_args.freeze_vision_tower:
+        forbidden_modules.add("vision_tower")
+
+    if model_args.visual_inputs and finetuning_args.train_mm_proj_only:
+        forbidden_modules.add("language_model")
+
+    for name, param in model.named_parameters():
+        if not any(forbidden_module in name for forbidden_module in forbidden_modules):
+            if cast_trainable_params_to_fp32:
+                param.data = param.data.to(torch.float32)
+        else:
+            param.requires_grad_(False)
+
+
+def _setup_freeze_tuning(
+    model: "PreTrainedModel",
+    model_args: "ModelArguments",
+    finetuning_args: "FinetuningArguments",
+    cast_trainable_params_to_fp32: bool,
+) -> None:
+    logger.info("Fine-tuning method: Freeze")
+    if model_args.visual_inputs:
+        config = model.config.text_config
+    else:
+        config = model.config
+
+    num_layers = (
+        getattr(config, "num_hidden_layers", None)
+        or getattr(config, "num_layers", None)
+        or getattr(config, "n_layer", None)
+    )
+    if not num_layers:
+        raise ValueError("Current model does not support freeze tuning.")
+
+    if finetuning_args.use_llama_pro:
+        if num_layers % finetuning_args.freeze_trainable_layers != 0:
+            raise ValueError(
+                "`num_layers` {} should be divisible by `num_layer_trainable` {}.".format(
+                    num_layers, finetuning_args.freeze_trainable_layers
+                )
+            )
+
+        stride = num_layers // finetuning_args.freeze_trainable_layers
+        trainable_layer_ids = range(stride - 1, num_layers + stride - 1, stride)
+    elif finetuning_args.freeze_trainable_layers > 0:  # fine-tuning the last n layers if num_layer_trainable > 0
+        trainable_layer_ids = range(max(0, num_layers - finetuning_args.freeze_trainable_layers), num_layers)
+    else:  # fine-tuning the first n layers if num_layer_trainable < 0
+        trainable_layer_ids = range(min(-finetuning_args.freeze_trainable_layers, num_layers))
+
+    hidden_modules = set()
+    non_hidden_modules = set()
+    for name, _ in model.named_parameters():
+        if ".0." in name:
+            hidden_modules.add(name.split(".0.")[-1].split(".")[0])
+        elif ".1." in name:  # MoD starts from layer 1
+            hidden_modules.add(name.split(".1.")[-1].split(".")[0])
+
+        if re.search(r"\.\d+\.", name) is None:
+            non_hidden_modules.add(name.split(".")[-2])
+
+    trainable_layers = []
+    for module_name in finetuning_args.freeze_trainable_modules:
+        if module_name != "all" and module_name not in hidden_modules:
+            raise ValueError(
+                "Module {} is not found, please choose from {}".format(module_name, ", ".join(hidden_modules))
+            )
+
+        for idx in trainable_layer_ids:
+            trainable_layers.append(".{:d}.{}".format(idx, module_name if module_name != "all" else ""))
+
+    if finetuning_args.freeze_extra_modules:
+        for module_name in finetuning_args.freeze_extra_modules:
+            if module_name not in non_hidden_modules:
+                raise ValueError(
+                    "Module {} is not found, please choose from {}".format(module_name, ", ".join(non_hidden_modules))
+                )
+
+            trainable_layers.append(module_name)
+
+    forbidden_modules = set()
+    if model_args.visual_inputs and finetuning_args.freeze_vision_tower:
+        forbidden_modules.add("vision_tower")
+
+    for name, param in model.named_parameters():
+        if any(trainable_layer in name for trainable_layer in trainable_layers) and not any(
+            forbidden_module in name for forbidden_module in forbidden_modules
+        ):
+            if cast_trainable_params_to_fp32:
+                param.data = param.data.to(torch.float32)
+        else:
+            param.requires_grad_(False)
+
+    logger.info("Set trainable layers: {}".format(",".join(trainable_layers)))
+
+
+def _setup_lora_tuning(
+    config: "PretrainedConfig",
+    model: "PreTrainedModel",
+    model_args: "ModelArguments",
+    finetuning_args: "FinetuningArguments",
+    is_trainable: bool,
+    cast_trainable_params_to_fp32: bool,
+) -> "PeftModel":
+    logger.info("Fine-tuning method: {}".format("DoRA" if finetuning_args.use_dora else "LoRA"))
+    adapter_to_resume = None
+
+    if model_args.adapter_name_or_path is not None:
+        is_mergeable = True
+        if getattr(model, "quantization_method", None):  # merge lora in quantized model is unstable
+            assert len(model_args.adapter_name_or_path) == 1, "Quantized model only accepts a single adapter."
+            is_mergeable = False
+
+        if is_deepspeed_zero3_enabled():
+            assert len(model_args.adapter_name_or_path) == 1, "Cannot use multiple adapters in DeepSpeed ZeRO-3."
+            is_mergeable = False
+
+        if model_args.use_unsloth:
+            assert len(model_args.adapter_name_or_path) == 1, "Unsloth model only accepts a single adapter."
+            is_mergeable = False
+
+        if (is_trainable and not finetuning_args.create_new_adapter) or (not is_mergeable):
+            adapter_to_merge = model_args.adapter_name_or_path[:-1]
+            adapter_to_resume = model_args.adapter_name_or_path[-1]
+        else:
+            adapter_to_merge = model_args.adapter_name_or_path
+
+        for adapter in adapter_to_merge:
+            model: "LoraModel" = PeftModel.from_pretrained(model, adapter, offload_folder=model_args.offload_folder)
+            model = model.merge_and_unload()
+
+        if len(adapter_to_merge) > 0:
+            logger.info("Merged {} adapter(s).".format(len(adapter_to_merge)))
+
+        if adapter_to_resume is not None:  # resume lora training
+            if model_args.use_unsloth:
+                model = load_unsloth_peft_model(config, model_args, is_trainable=is_trainable)
+            else:
+                model = PeftModel.from_pretrained(
+                    model,
+                    adapter_to_resume,
+                    is_trainable=is_trainable,
+                    offload_folder=model_args.offload_folder,
+                )
+
+    if is_trainable and adapter_to_resume is None:  # create new lora weights while training
+        if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all":
+            target_modules = find_all_linear_modules(model, finetuning_args.freeze_vision_tower)
+        else:
+            target_modules = finetuning_args.lora_target
+
+        if finetuning_args.use_llama_pro:
+            target_modules = find_expanded_modules(model, target_modules, finetuning_args.freeze_trainable_layers)
+
+        if model_args.visual_inputs and finetuning_args.freeze_vision_tower:
+            target_modules = "^(?!.*vision_tower).*(?:{}).*".format("|".join(target_modules))
+
+        if (
+            finetuning_args.use_dora
+            and getattr(model, "quantization_method", None) is not None
+            and getattr(model, "quantization_method", None) != QuantizationMethod.BITS_AND_BYTES
+        ):
+            raise ValueError("DoRA is not compatible with PTQ-quantized models.")
+
+        if model_args.resize_vocab and finetuning_args.additional_target is None:
+            input_embeddings = model.get_input_embeddings()
+            output_embeddings = model.get_output_embeddings()
+            module_names = set()
+            for name, module in model.named_modules():
+                if module in [input_embeddings, output_embeddings]:
+                    module_names.add(name.split(".")[-1])
+
+            finetuning_args.additional_target = module_names
+            logger.warning("Vocab has been resized, add {} to trainable params.".format(",".join(module_names)))
+
+        peft_kwargs = {
+            "r": finetuning_args.lora_rank,
+            "target_modules": target_modules,
+            "lora_alpha": finetuning_args.lora_alpha,
+            "lora_dropout": finetuning_args.lora_dropout,
+            "use_rslora": finetuning_args.use_rslora,
+            "modules_to_save": finetuning_args.additional_target,
+        }
+
+        if model_args.use_unsloth:
+            model = get_unsloth_peft_model(model, model_args, peft_kwargs)
+        else:
+            lora_config = LoraConfig(
+                task_type=TaskType.CAUSAL_LM,
+                inference_mode=False,
+                use_dora=finetuning_args.use_dora,
+                **peft_kwargs,
+            )
+            model = get_peft_model(model, lora_config)
+
+    if is_trainable and cast_trainable_params_to_fp32:
+        for param in filter(lambda p: p.requires_grad, model.parameters()):
+            param.data = param.data.to(torch.float32)
+
+    if model_args.adapter_name_or_path is not None:
+        logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path)))
+
+    return model
+
+
 def init_adapter(
     config: "PretrainedConfig",
     model: "PreTrainedModel",
@@ -35,7 +247,6 @@ def init_adapter(
 
     Note that the trainable parameters must be cast to float32.
     """
-
     if (not is_trainable) and model_args.adapter_name_or_path is None:
         logger.info("Adapter is not found at evaluation, load the base model.")
         return model
@@ -51,199 +262,14 @@ def init_adapter(
         cast_trainable_params_to_fp32 = True
 
     if is_trainable and finetuning_args.finetuning_type == "full":
-        logger.info("Fine-tuning method: Full")
-
-        forbidden_modules = set()
-        if model_args.visual_inputs and finetuning_args.freeze_vision_tower:
-            forbidden_modules.add("vision_tower")
-
-        if model_args.visual_inputs and finetuning_args.train_mm_proj_only:
-            forbidden_modules.add("language_model")
-
-        for name, param in model.named_parameters():
-            if not any(forbidden_module in name for forbidden_module in forbidden_modules):
-                if cast_trainable_params_to_fp32:
-                    param.data = param.data.to(torch.float32)
-            else:
-                param.requires_grad_(False)
+        _setup_full_tuning(model, model_args, finetuning_args, cast_trainable_params_to_fp32)
 
     if is_trainable and finetuning_args.finetuning_type == "freeze":
-        logger.info("Fine-tuning method: Freeze")
-
-        if model_args.visual_inputs:
-            config = model.config.text_config
-        else:
-            config = model.config
-
-        num_layers = (
-            getattr(config, "num_hidden_layers", None)
-            or getattr(config, "num_layers", None)
-            or getattr(config, "n_layer", None)
-        )
-        if not num_layers:
-            raise ValueError("Current model does not support freeze tuning.")
-
-        if finetuning_args.use_llama_pro:
-            if num_layers % finetuning_args.freeze_trainable_layers != 0:
-                raise ValueError(
-                    "`num_layers` {} should be divisible by `num_layer_trainable` {}.".format(
-                        num_layers, finetuning_args.freeze_trainable_layers
-                    )
-                )
-
-            stride = num_layers // finetuning_args.freeze_trainable_layers
-            trainable_layer_ids = range(stride - 1, num_layers + stride - 1, stride)
-        elif finetuning_args.freeze_trainable_layers > 0:  # fine-tuning the last n layers if num_layer_trainable > 0
-            trainable_layer_ids = range(max(0, num_layers - finetuning_args.freeze_trainable_layers), num_layers)
-        else:  # fine-tuning the first n layers if num_layer_trainable < 0
-            trainable_layer_ids = range(min(-finetuning_args.freeze_trainable_layers, num_layers))
-
-        hidden_modules = set()
-        non_hidden_modules = set()
-        for name, _ in model.named_parameters():
-            if ".0." in name:
-                hidden_modules.add(name.split(".0.")[-1].split(".")[0])
-            elif ".1." in name:  # MoD starts from layer 1
-                hidden_modules.add(name.split(".1.")[-1].split(".")[0])
-
-            if re.search(r"\.\d+\.", name) is None:
-                non_hidden_modules.add(name.split(".")[-2])
-
-        trainable_layers = []
-        for module_name in finetuning_args.freeze_trainable_modules:
-            if module_name != "all" and module_name not in hidden_modules:
-                raise ValueError(
-                    "Module {} is not found, please choose from {}".format(module_name, ", ".join(hidden_modules))
-                )
-
-            for idx in trainable_layer_ids:
-                trainable_layers.append(".{:d}.{}".format(idx, module_name if module_name != "all" else ""))
-
-        if finetuning_args.freeze_extra_modules:
-            for module_name in finetuning_args.freeze_extra_modules:
-                if module_name not in non_hidden_modules:
-                    raise ValueError(
-                        "Module {} is not found, please choose from {}".format(
-                            module_name, ", ".join(non_hidden_modules)
-                        )
-                    )
-
-                trainable_layers.append(module_name)
-
-        forbidden_modules = set()
-        if model_args.visual_inputs and finetuning_args.freeze_vision_tower:
-            forbidden_modules.add("vision_tower")
-
-        for name, param in model.named_parameters():
-            if any(trainable_layer in name for trainable_layer in trainable_layers) and not any(
-                forbidden_module in name for forbidden_module in forbidden_modules
-            ):
-                if cast_trainable_params_to_fp32:
-                    param.data = param.data.to(torch.float32)
-            else:
-                param.requires_grad_(False)
-
-        logger.info("Set trainable layers: {}".format(",".join(map(str, trainable_layer_ids))))
+        _setup_freeze_tuning(model, model_args, finetuning_args, cast_trainable_params_to_fp32)
 
     if finetuning_args.finetuning_type == "lora":
-        logger.info("Fine-tuning method: {}".format("DoRA" if finetuning_args.use_dora else "LoRA"))
-        adapter_to_resume = None
-
-        if model_args.adapter_name_or_path is not None:
-            is_mergeable = True
-            if getattr(model, "quantization_method", None):  # merge lora in quantized model is unstable
-                assert len(model_args.adapter_name_or_path) == 1, "Quantized model only accepts a single adapter."
-                is_mergeable = False
-
-            if is_deepspeed_zero3_enabled():
-                assert len(model_args.adapter_name_or_path) == 1, "Cannot use multiple adapters in DeepSpeed ZeRO-3."
-                is_mergeable = False
-
-            if model_args.use_unsloth:
-                assert len(model_args.adapter_name_or_path) == 1, "Unsloth model only accepts a single adapter."
-                is_mergeable = False
-
-            if (is_trainable and not finetuning_args.create_new_adapter) or (not is_mergeable):
-                adapter_to_merge = model_args.adapter_name_or_path[:-1]
-                adapter_to_resume = model_args.adapter_name_or_path[-1]
-            else:
-                adapter_to_merge = model_args.adapter_name_or_path
-
-            for adapter in adapter_to_merge:
-                model: "LoraModel" = PeftModel.from_pretrained(
-                    model, adapter, offload_folder=model_args.offload_folder
-                )
-                model = model.merge_and_unload()
-
-            if len(adapter_to_merge) > 0:
-                logger.info("Merged {} adapter(s).".format(len(adapter_to_merge)))
-
-            if adapter_to_resume is not None:  # resume lora training
-                if model_args.use_unsloth:
-                    model = load_unsloth_peft_model(config, model_args, is_trainable=is_trainable)
-                else:
-                    model = PeftModel.from_pretrained(
-                        model,
-                        adapter_to_resume,
-                        is_trainable=is_trainable,
-                        offload_folder=model_args.offload_folder,
-                    )
-
-        if is_trainable and adapter_to_resume is None:  # create new lora weights while training
-            if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all":
-                target_modules = find_all_linear_modules(model, finetuning_args.freeze_vision_tower)
-            else:
-                target_modules = finetuning_args.lora_target
-
-            if finetuning_args.use_llama_pro:
-                target_modules = find_expanded_modules(model, target_modules, finetuning_args.freeze_trainable_layers)
-
-            if model_args.visual_inputs and finetuning_args.freeze_vision_tower:
-                target_modules = "^(?!.*vision_tower).*(?:{}).*".format("|".join(target_modules))
-
-            if (
-                finetuning_args.use_dora
-                and getattr(model, "quantization_method", None) is not None
-                and getattr(model, "quantization_method", None) != QuantizationMethod.BITS_AND_BYTES
-            ):
-                raise ValueError("DoRA is not compatible with PTQ-quantized models.")
-
-            if model_args.resize_vocab and finetuning_args.additional_target is None:
-                input_embeddings = model.get_input_embeddings()
-                output_embeddings = model.get_output_embeddings()
-                module_names = set()
-                for name, module in model.named_modules():
-                    if module in [input_embeddings, output_embeddings]:
-                        module_names.add(name.split(".")[-1])
-
-                finetuning_args.additional_target = module_names
-                logger.warning("Vocab has been resized, add {} to trainable params.".format(",".join(module_names)))
-
-            peft_kwargs = {
-                "r": finetuning_args.lora_rank,
-                "target_modules": target_modules,
-                "lora_alpha": finetuning_args.lora_alpha,
-                "lora_dropout": finetuning_args.lora_dropout,
-                "use_rslora": finetuning_args.use_rslora,
-                "modules_to_save": finetuning_args.additional_target,
-            }
-
-            if model_args.use_unsloth:
-                model = get_unsloth_peft_model(model, model_args, peft_kwargs)
-            else:
-                lora_config = LoraConfig(
-                    task_type=TaskType.CAUSAL_LM,
-                    inference_mode=False,
-                    use_dora=finetuning_args.use_dora,
-                    **peft_kwargs,
-                )
-                model = get_peft_model(model, lora_config)
-
-        if is_trainable and cast_trainable_params_to_fp32:
-            for param in filter(lambda p: p.requires_grad, model.parameters()):
-                param.data = param.data.to(torch.float32)
-
-        if model_args.adapter_name_or_path is not None:
-            logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path)))
+        model = _setup_lora_tuning(
+            config, model, model_args, finetuning_args, is_trainable, cast_trainable_params_to_fp32
+        )
 
     return model

From 9ba1f5958cfbccbc2ecd2421710c3266f961e223 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 00:58:10 +0800
Subject: [PATCH 103/162] Delete .readthedocs.yaml

Former-commit-id: dd3ee514216a9a329519c58d79208040adcad126
---
 .readthedocs.yaml | 19 -------------------
 1 file changed, 19 deletions(-)
 delete mode 100644 .readthedocs.yaml

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
deleted file mode 100644
index 3a9eaea1..00000000
--- a/.readthedocs.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-# Read the Docs configuration file
-# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
-
-version: 2
-
-build:
-  os: ubuntu-22.04
-  tools:
-    python: "3.8"
-
-sphinx:
-  configuration: docs/source/conf.py
-
-formats:
-  - pdf
-
-python:
-  install:
-  - requirements: docs/requirements-docs.txt

From cf2afe5db0f1d30d0a6376a86ed918d4751f1ec7 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 01:35:58 +0800
Subject: [PATCH 104/162] init unittest

Former-commit-id: 1c6f21cb8878ced043fe0b27c72cad2ef6ee990e
---
 README.md                           |  1 -
 README_zh.md                        |  1 -
 docker-compose.yml                  |  2 --
 {tests => scripts}/test_toolcall.py |  2 +-
 tests/model/test_attn.py            | 35 +++++++++++++++++++++++++++++
 tests/test_throughput.py            | 30 -------------------------
 6 files changed, 36 insertions(+), 35 deletions(-)
 rename {tests => scripts}/test_toolcall.py (97%)
 create mode 100644 tests/model/test_attn.py
 delete mode 100644 tests/test_throughput.py

diff --git a/README.md b/README.md
index 77d9c709..44897420 100644
--- a/README.md
+++ b/README.md
@@ -430,7 +430,6 @@ docker run --gpus=all \
     -v ./hf_cache:/root/.cache/huggingface/ \
     -v ./data:/app/data \
     -v ./output:/app/output \
-    -e CUDA_VISIBLE_DEVICES=0 \
     -p 7860:7860 \
     --shm-size 16G \
     --name llama_factory \
diff --git a/README_zh.md b/README_zh.md
index 9a52a963..8321d202 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -428,7 +428,6 @@ docker run --gpus=all \
     -v ./hf_cache:/root/.cache/huggingface/ \
     -v ./data:/app/data \
     -v ./output:/app/output \
-    -e CUDA_VISIBLE_DEVICES=0 \
     -p 7860:7860 \
     --shm-size 16G \
     --name llama_factory \
diff --git a/docker-compose.yml b/docker-compose.yml
index 333dc51e..9602a3e3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,8 +10,6 @@ services:
       - ./hf_cache:/root/.cache/huggingface/
       - ./data:/app/data
       - ./output:/app/output
-    environment:
-      - CUDA_VISIBLE_DEVICES=0
     ports:
       - "7860:7860"
     ipc: host
diff --git a/tests/test_toolcall.py b/scripts/test_toolcall.py
similarity index 97%
rename from tests/test_toolcall.py
rename to scripts/test_toolcall.py
index d36e7fec..7e460017 100644
--- a/tests/test_toolcall.py
+++ b/scripts/test_toolcall.py
@@ -20,7 +20,7 @@ def calculate_gpa(grades: Sequence[str], hours: Sequence[int]) -> float:
 
 def main():
     client = OpenAI(
-        api_key="0",
+        api_key="{}".format(os.environ.get("API_KEY", "0")),
         base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
     )
     tools = [
diff --git a/tests/model/test_attn.py b/tests/model/test_attn.py
new file mode 100644
index 00000000..12d920ef
--- /dev/null
+++ b/tests/model/test_attn.py
@@ -0,0 +1,35 @@
+import os
+
+from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
+
+from llamafactory.hparams import get_infer_args
+from llamafactory.model import load_model, load_tokenizer
+
+
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+
+
+def test_attention():
+    attention_available = ["off"]
+    if is_torch_sdpa_available():
+        attention_available.append("sdpa")
+
+    if is_flash_attn_2_available():
+        attention_available.append("fa2")
+
+    llama_attention_classes = {
+        "off": "LlamaAttention",
+        "sdpa": "LlamaSdpaAttention",
+        "fa2": "LlamaFlashAttention2",
+    }
+    for requested_attention in attention_available:
+        model_args, _, finetuning_args, _ = get_infer_args({
+            "model_name_or_path": TINY_LLAMA,
+            "template": "llama2",
+            "flash_attn": requested_attention,
+        })
+        tokenizer = load_tokenizer(model_args)
+        model = load_model(tokenizer["tokenizer"], model_args, finetuning_args)
+        for module in model.modules():
+            if "Attention" in module.__class__.__name__:
+                assert  module.__class__.__name__ == llama_attention_classes[requested_attention]
diff --git a/tests/test_throughput.py b/tests/test_throughput.py
deleted file mode 100644
index e8048910..00000000
--- a/tests/test_throughput.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import os
-import time
-
-from openai import OpenAI
-from transformers.utils.versions import require_version
-
-
-require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
-
-
-def main():
-    client = OpenAI(
-        api_key="0",
-        base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
-    )
-    messages = [{"role": "user", "content": "Write a long essay about environment protection as long as possible."}]
-    num_tokens = 0
-    start_time = time.time()
-    for _ in range(8):
-        result = client.chat.completions.create(messages=messages, model="test")
-        num_tokens += result.usage.completion_tokens
-
-    elapsed_time = time.time() - start_time
-    print("Throughput: {:.2f} tokens/s".format(num_tokens / elapsed_time))
-    # --infer_backend hf: 27.22 tokens/s (1.0x)
-    # --infer_backend vllm: 73.03 tokens/s (2.7x)
-
-
-if __name__ == "__main__":
-    main()

From 9e7d1315ad4ddf045251462b04923e51dc349077 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 01:48:30 +0800
Subject: [PATCH 105/162] add ci

Former-commit-id: 3ea3acdadaa54abe33d93538580196cfdd91ee56
---
 .github/workflows/tests.yml | 46 ++++++++++++++++++++++++++++++-------
 Makefile                    |  5 +++-
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f891f711..a8246986 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -2,28 +2,58 @@ name: tests
 
 on:
   push:
-    branches: [ "main" ]
+    branches:
+      - main
+    paths:
+      - "**.py"
+      - "requirements.txt"
+      - ".github/workflows/*.yml"
   pull_request:
-    branches: [ "main" ]
+    branches:
+      - main
+    paths:
+      - "**.py"
+      - "requirements.txt"
+      - ".github/workflows/*.yml"
 
 jobs:
   check_code_quality:
-
     runs-on: ubuntu-latest
-
     steps:
       - uses: actions/checkout@v4
-
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
           python-version: "3.8"
-
+          cache: "pip"
+          cache-dependency-path: "setup.py"
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          python -m pip install ruff
-
+          python -m pip install .[torch,metrics,quality]
       - name: Check quality
         run: |
             make style && make quality
+
+  pytest:
+    needs: check_code_quality
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9"]
+        os: ["ubuntu-latest", "windows-latest"]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
+          cache-dependency-path: "setup.py"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install .[torch,metrics,quality]
+      - name: Test with pytest
+        run: |
+          make test
diff --git a/Makefile b/Makefile
index 3a4a12c9..65be047b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: quality style
+.PHONY: quality style test
 
 check_dirs := scripts src tests
 
@@ -9,3 +9,6 @@ quality:
 style:
 	ruff check $(check_dirs) --fix
 	ruff format $(check_dirs)
+
+test:
+	pytest tests/

From ec97bee94415a0ef1a5a225d9a430bcb3ceb9c28 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 01:57:36 +0800
Subject: [PATCH 106/162] fix ci

Former-commit-id: 95aceebd61d195be5c980a919c12c59b56722898
---
 .github/workflows/tests.yml                     |  6 +++---
 setup.py                                        |  2 +-
 tests/model/{test_attn.py => test_attention.py} | 14 ++++++++------
 3 files changed, 12 insertions(+), 10 deletions(-)
 rename tests/model/{test_attn.py => test_attention.py} (73%)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a8246986..a66b579b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -30,10 +30,10 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          python -m pip install .[torch,metrics,quality]
+          python -m pip install .[torch,dev]
       - name: Check quality
         run: |
-            make style && make quality
+          make style && make quality
 
   pytest:
     needs: check_code_quality
@@ -53,7 +53,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          python -m pip install .[torch,metrics,quality]
+          python -m pip install .[torch,dev]
       - name: Test with pytest
         run: |
           make test
diff --git a/setup.py b/setup.py
index c32be8af..405ac46e 100644
--- a/setup.py
+++ b/setup.py
@@ -33,7 +33,7 @@ extra_require = {
     "aqlm": ["aqlm[gpu]>=1.1.0"],
     "qwen": ["transformers_stream_generator"],
     "modelscope": ["modelscope"],
-    "quality": ["ruff"],
+    "dev": ["ruff", "pytest"],
 }
 
 
diff --git a/tests/model/test_attn.py b/tests/model/test_attention.py
similarity index 73%
rename from tests/model/test_attn.py
rename to tests/model/test_attention.py
index 12d920ef..6dd46050 100644
--- a/tests/model/test_attn.py
+++ b/tests/model/test_attention.py
@@ -23,13 +23,15 @@ def test_attention():
         "fa2": "LlamaFlashAttention2",
     }
     for requested_attention in attention_available:
-        model_args, _, finetuning_args, _ = get_infer_args({
-            "model_name_or_path": TINY_LLAMA,
-            "template": "llama2",
-            "flash_attn": requested_attention,
-        })
+        model_args, _, finetuning_args, _ = get_infer_args(
+            {
+                "model_name_or_path": TINY_LLAMA,
+                "template": "llama2",
+                "flash_attn": requested_attention,
+            }
+        )
         tokenizer = load_tokenizer(model_args)
         model = load_model(tokenizer["tokenizer"], model_args, finetuning_args)
         for module in model.modules():
             if "Attention" in module.__class__.__name__:
-                assert  module.__class__.__name__ == llama_attention_classes[requested_attention]
+                assert module.__class__.__name__ == llama_attention_classes[requested_attention]

From 71d048adb4139836bbbf660e2ad95da1aea3f0f0 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 02:00:44 +0800
Subject: [PATCH 107/162] fix ci

Former-commit-id: 3f4d293fd861d765edb2040f80d16f99a5e1e3c6
---
 .github/workflows/tests.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a66b579b..818d58fc 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -39,8 +39,10 @@ jobs:
     needs: check_code_quality
     strategy:
       matrix:
-        python-version: ["3.8", "3.9"]
-        os: ["ubuntu-latest", "windows-latest"]
+        python-version:
+          - "3.8"
+        os:
+          - "ubuntu-latest"
     runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v4

From d9aa226c0858c82f9f3c4aa95db1197f061469d4 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 02:42:34 +0800
Subject: [PATCH 108/162] add ultrafeedback and fineweb #4085 #4132

Former-commit-id: 968e4992e2f2a3ccba73e8668f1654ddc6eb0034
---
 .github/workflows/tests.yml | 24 +-----------------------
 README.md                   |  3 +++
 README_zh.md                |  3 +++
 data/dataset_info.json      | 31 +++++++++++++++++++++++++++++++
 4 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 818d58fc..32edf6a8 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -17,7 +17,7 @@ on:
       - ".github/workflows/*.yml"
 
 jobs:
-  check_code_quality:
+  tests:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
@@ -34,28 +34,6 @@ jobs:
       - name: Check quality
         run: |
           make style && make quality
-
-  pytest:
-    needs: check_code_quality
-    strategy:
-      matrix:
-        python-version:
-          - "3.8"
-        os:
-          - "ubuntu-latest"
-    runs-on: ${{ matrix.os }}
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-          cache: "pip"
-          cache-dependency-path: "setup.py"
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install .[torch,dev]
       - name: Test with pytest
         run: |
           make test
diff --git a/README.md b/README.md
index 44897420..fb6c5782 100644
--- a/README.md
+++ b/README.md
@@ -214,6 +214,8 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t
 - [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered)
 - [Pile (en)](https://huggingface.co/datasets/EleutherAI/pile)
 - [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B)
+- [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb)
+- [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu)
 - [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack)
 - [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata)
 
@@ -273,6 +275,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t
 <details><summary>Preference datasets</summary>
 
 - [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k)
+- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
 - [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs)
 - [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
 - [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
diff --git a/README_zh.md b/README_zh.md
index 8321d202..142254df 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -214,6 +214,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 - [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered)
 - [Pile (en)](https://huggingface.co/datasets/EleutherAI/pile)
 - [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B)
+- [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb)
+- [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu)
 - [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack)
 - [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata)
 
@@ -273,6 +275,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 <details><summary>偏好数据集</summary>
 
 - [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k)
+- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
 - [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs)
 - [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
 - [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
diff --git a/data/dataset_info.json b/data/dataset_info.json
index 2d9b0c83..8c5cbb45 100644
--- a/data/dataset_info.json
+++ b/data/dataset_info.json
@@ -391,6 +391,16 @@
       "rejected": "rejected"
     }
   },
+  "ultrafeedback": {
+    "hf_hub_url": "llamafactory/ultrafeedback_binarized",
+    "ms_hub_url": "llamafactory/ultrafeedback_binarized",
+    "ranking": true,
+    "columns": {
+      "prompt": "instruction",
+      "chosen": "chosen",
+      "rejected": "rejected"
+    }
+  },
   "orca_pairs": {
     "hf_hub_url": "Intel/orca_dpo_pairs",
     "ranking": true,
@@ -448,6 +458,15 @@
       "assistant_tag": "assistant"
     }
   },
+  "ultrafeedback_kto": {
+    "hf_hub_url": "argilla/ultrafeedback-binarized-preferences-cleaned-kto",
+    "ms_hub_url": "AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto",
+    "columns": {
+      "prompt": "prompt",
+      "response": "completion",
+      "kto_tag": "label"
+    }
+  },
   "wiki_demo": {
     "file_name": "wiki_demo.txt",
     "columns": {
@@ -501,6 +520,18 @@
       "prompt": "text"
     }
   },
+  "fileweb": {
+    "hf_hub_url": "HuggingFaceFW/fineweb",
+    "columns": {
+      "prompt": "text"
+    }
+  },
+  "fileweb_edu": {
+    "hf_hub_url": "HuggingFaceFW/fineweb-edu",
+    "columns": {
+      "prompt": "text"
+    }
+  },
   "the_stack": {
     "hf_hub_url": "bigcode/the-stack",
     "ms_hub_url": "AI-ModelScope/the-stack",

From cabe5ca7d0513a1c52ea322aaff7312bce19e322 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 05:20:54 +0800
Subject: [PATCH 109/162] release v0.8.0

Former-commit-id: 004db680b9e3996ec511ee818df6c0c02bf13603
---
 src/llamafactory/data/template.py             | 11 +---
 src/llamafactory/extras/env.py                |  2 +-
 tests/data/test_supervised.py                 | 44 +++++++++++++
 .../model/{ => model_utils}/test_attention.py |  4 +-
 tests/model/test_freeze.py                    | 61 +++++++++++++++++++
 tests/model/test_full.py                      | 33 ++++++++++
 6 files changed, 142 insertions(+), 13 deletions(-)
 create mode 100644 tests/data/test_supervised.py
 rename tests/model/{ => model_utils}/test_attention.py (88%)
 create mode 100644 tests/model/test_freeze.py
 create mode 100644 tests/model/test_full.py

diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 3dce5ec6..b600c567 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -700,17 +700,8 @@ _register_template(
 _register_template(
     name="llama2",
     format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]),
+    format_assistant=StringFormatter(slots=[" {{content}} ", {"eos_token"}]),
     format_system=StringFormatter(slots=["<<SYS>>\n{{content}}\n<</SYS>>\n\n"]),
-    default_system=(
-        "You are a helpful, respectful and honest assistant. "
-        "Always answer as helpfully as possible, while being safe. "
-        "Your answers should not include any harmful, unethical, "
-        "racist, sexist, toxic, dangerous, or illegal content. "
-        "Please ensure that your responses are socially unbiased and positive in nature.\n\n"
-        "If a question does not make any sense, or is not factually coherent, "
-        "explain why instead of answering something not correct. "
-        "If you don't know the answer to a question, please don't share false information."
-    ),
 )
 
 
diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py
index fdccf86b..cd81442d 100644
--- a/src/llamafactory/extras/env.py
+++ b/src/llamafactory/extras/env.py
@@ -12,7 +12,7 @@ from transformers.utils import is_bitsandbytes_available, is_torch_cuda_availabl
 from .packages import is_vllm_available
 
 
-VERSION = "0.7.2.dev0"
+VERSION = "0.8.0"
 
 
 def print_env() -> None:
diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py
new file mode 100644
index 00000000..bb7f71df
--- /dev/null
+++ b/tests/data/test_supervised.py
@@ -0,0 +1,44 @@
+import os
+
+import pytest
+from datasets import load_dataset
+
+from llamafactory.data import get_dataset
+from llamafactory.hparams import get_train_args
+from llamafactory.model import load_tokenizer
+
+
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+
+TRAINING_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "stage": "sft",
+    "do_train": True,
+    "finetuning_type": "full",
+    "dataset": "llamafactory/tiny_dataset",
+    "dataset_dir": "ONLINE",
+    "template": "llama3",
+    "cutoff_len": 1024,
+    "overwrite_cache": True,
+    "output_dir": "dummy_dir",
+    "overwrite_output_dir": True,
+    "fp16": True,
+}
+
+
+@pytest.mark.parametrize("test_num", [5])
+def test_supervised(test_num: int):
+    model_args, data_args, training_args, _, _ = get_train_args(TRAINING_ARGS)
+    tokenizer_module = load_tokenizer(model_args)
+    tokenizer = tokenizer_module["tokenizer"]
+    tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)
+
+    original_data = load_dataset(TRAINING_ARGS["dataset"], split="train")
+    for test_idx in range(test_num):
+        decode_result = tokenizer.decode(tokenized_data["input_ids"][test_idx])
+        messages = [
+            {"role": "user", "content": original_data[test_idx]["instruction"]},
+            {"role": "assistant", "content": original_data[test_idx]["output"]},
+        ]
+        templated_result = tokenizer.apply_chat_template(messages, tokenize=False)
+        assert decode_result == templated_result
diff --git a/tests/model/test_attention.py b/tests/model/model_utils/test_attention.py
similarity index 88%
rename from tests/model/test_attention.py
rename to tests/model/model_utils/test_attention.py
index 6dd46050..4d414289 100644
--- a/tests/model/test_attention.py
+++ b/tests/model/model_utils/test_attention.py
@@ -30,8 +30,8 @@ def test_attention():
                 "flash_attn": requested_attention,
             }
         )
-        tokenizer = load_tokenizer(model_args)
-        model = load_model(tokenizer["tokenizer"], model_args, finetuning_args)
+        tokenizer_module = load_tokenizer(model_args)
+        model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args)
         for module in model.modules():
             if "Attention" in module.__class__.__name__:
                 assert module.__class__.__name__ == llama_attention_classes[requested_attention]
diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py
new file mode 100644
index 00000000..c6cdec78
--- /dev/null
+++ b/tests/model/test_freeze.py
@@ -0,0 +1,61 @@
+import os
+
+import torch
+
+from llamafactory.hparams import get_train_args
+from llamafactory.model import load_model, load_tokenizer
+
+
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+
+TRAINING_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "stage": "sft",
+    "do_train": True,
+    "finetuning_type": "freeze",
+    "dataset": "llamafactory/tiny_dataset",
+    "dataset_dir": "ONLINE",
+    "template": "llama3",
+    "cutoff_len": 1024,
+    "overwrite_cache": True,
+    "output_dir": "dummy_dir",
+    "overwrite_output_dir": True,
+    "fp16": True,
+}
+
+
+def test_freeze_all_modules():
+    model_args, _, _, finetuning_args, _ = get_train_args(
+        {
+            "freeze_trainable_layers": 1,
+            **TRAINING_ARGS,
+        }
+    )
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+    for name, param in model.named_parameters():
+        if name.startswith("model.layers.1."):
+            assert param.requires_grad is True
+            assert param.dtype == torch.float32
+        else:
+            assert param.requires_grad is False
+            assert param.dtype == torch.float16
+
+
+def test_freeze_extra_modules():
+    model_args, _, _, finetuning_args, _ = get_train_args(
+        {
+            "freeze_trainable_layers": 1,
+            "freeze_extra_modules": "embed_tokens,lm_head",
+            **TRAINING_ARGS,
+        }
+    )
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+    for name, param in model.named_parameters():
+        if name.startswith("model.layers.1.") or any(module in name for module in ["embed_tokens", "lm_head"]):
+            assert param.requires_grad is True
+            assert param.dtype == torch.float32
+        else:
+            assert param.requires_grad is False
+            assert param.dtype == torch.float16
diff --git a/tests/model/test_full.py b/tests/model/test_full.py
new file mode 100644
index 00000000..ef57a980
--- /dev/null
+++ b/tests/model/test_full.py
@@ -0,0 +1,33 @@
+import os
+
+import torch
+
+from llamafactory.hparams import get_train_args
+from llamafactory.model import load_model, load_tokenizer
+
+
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+
+TRAINING_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "stage": "sft",
+    "do_train": True,
+    "finetuning_type": "full",
+    "dataset": "llamafactory/tiny_dataset",
+    "dataset_dir": "ONLINE",
+    "template": "llama3",
+    "cutoff_len": 1024,
+    "overwrite_cache": True,
+    "output_dir": "dummy_dir",
+    "overwrite_output_dir": True,
+    "fp16": True,
+}
+
+
+def test_full():
+    model_args, _, _, finetuning_args, _ = get_train_args(TRAINING_ARGS)
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+    for param in model.parameters():
+        assert param.requires_grad is True
+        assert param.dtype == torch.float32

From 088292e84a65b4c27750761e84178a5b59ef9024 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 06:46:09 +0800
Subject: [PATCH 110/162] set dev version

Former-commit-id: 08b7fe1c452cc99264ff0312e310b579590c6a45
---
 src/llamafactory/extras/env.py |  2 +-
 tests/model/test_lora.py       | 72 ++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 tests/model/test_lora.py

diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py
index cd81442d..2b9c6458 100644
--- a/src/llamafactory/extras/env.py
+++ b/src/llamafactory/extras/env.py
@@ -12,7 +12,7 @@ from transformers.utils import is_bitsandbytes_available, is_torch_cuda_availabl
 from .packages import is_vllm_available
 
 
-VERSION = "0.8.0"
+VERSION = "0.8.1.dev0"
 
 
 def print_env() -> None:
diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py
new file mode 100644
index 00000000..1f2c02ae
--- /dev/null
+++ b/tests/model/test_lora.py
@@ -0,0 +1,72 @@
+import os
+
+import torch
+
+from llamafactory.hparams import get_train_args
+from llamafactory.model import load_model, load_tokenizer
+
+
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+
+TRAINING_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "stage": "sft",
+    "do_train": True,
+    "finetuning_type": "lora",
+    "dataset": "llamafactory/tiny_dataset",
+    "dataset_dir": "ONLINE",
+    "template": "llama3",
+    "cutoff_len": 1024,
+    "overwrite_cache": True,
+    "output_dir": "dummy_dir",
+    "overwrite_output_dir": True,
+    "fp16": True,
+}
+
+
+def test_lora_all_modules():
+    model_args, _, _, finetuning_args, _ = get_train_args(
+        {
+            "lora_target": "all",
+            **TRAINING_ARGS,
+        }
+    )
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+    linear_modules = set()
+    for name, param in model.named_parameters():
+        if any(module in name for module in ["lora_A", "lora_B"]):
+            linear_modules.add(name.split(".lora_", maxsplit=1)[0].split(".")[-1])
+            assert param.requires_grad is True
+            assert param.dtype == torch.float32
+        else:
+            assert param.requires_grad is False
+            assert param.dtype == torch.float16
+
+    assert linear_modules == {"q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"}
+
+
+def test_lora_extra_modules():
+    model_args, _, _, finetuning_args, _ = get_train_args(
+        {
+            "lora_target": "all",
+            "additional_target": "embed_tokens,lm_head",
+            **TRAINING_ARGS,
+        }
+    )
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+    extra_modules = set()
+    for name, param in model.named_parameters():
+        if any(module in name for module in ["lora_A", "lora_B"]):
+            assert param.requires_grad is True
+            assert param.dtype == torch.float32
+        elif "modules_to_save" in name:
+            extra_modules.add(name.split(".modules_to_save", maxsplit=1)[0].split(".")[-1])
+            assert param.requires_grad is True
+            assert param.dtype == torch.float32
+        else:
+            assert param.requires_grad is False
+            assert param.dtype == torch.float16
+
+    assert extra_modules == {"embed_tokens", "lm_head"}

From a2acefea6eef0603aff2639e29dcafeaf4a93c10 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 07:15:45 +0800
Subject: [PATCH 111/162] fix llamafactory-cli env

Former-commit-id: b0515e5f42831b67d1f4d049999ecb68756e66db
---
 src/llamafactory/extras/env.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py
index 2b9c6458..1d4e43f1 100644
--- a/src/llamafactory/extras/env.py
+++ b/src/llamafactory/extras/env.py
@@ -6,10 +6,7 @@ import peft
 import torch
 import transformers
 import trl
-from transformers.integrations import is_deepspeed_available
-from transformers.utils import is_bitsandbytes_available, is_torch_cuda_available, is_torch_npu_available
-
-from .packages import is_vllm_available
+from transformers.utils import is_torch_cuda_available, is_torch_npu_available
 
 
 VERSION = "0.8.1.dev0"
@@ -37,19 +34,25 @@ def print_env() -> None:
         info["NPU type"] = torch.npu.get_device_name()
         info["CANN version"] = torch.version.cann
 
-    if is_deepspeed_available():
+    try:
         import deepspeed  # type: ignore
 
         info["DeepSpeed version"] = deepspeed.__version__
+    except Exception:
+        pass
 
-    if is_bitsandbytes_available():
+    try:
         import bitsandbytes
 
         info["Bitsandbytes version"] = bitsandbytes.__version__
+    except Exception:
+        pass
 
-    if is_vllm_available():
+    try:
         import vllm
 
         info["vLLM version"] = vllm.__version__
+    except Exception:
+        pass
 
     print("\n" + "\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n")

From 9d88d0bcd861b1383dcbaedde4bcbe20452816fc Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 21:11:32 +0800
Subject: [PATCH 112/162] update git workflows

Former-commit-id: 5a3f26bc53433caa98b2a66294becaf156280a4c
---
 .github/workflows/label_issue.yml | 17 +++++++++++++++++
 .github/workflows/tests.yml       | 17 ++++++++---------
 2 files changed, 25 insertions(+), 9 deletions(-)
 create mode 100644 .github/workflows/label_issue.yml

diff --git a/.github/workflows/label_issue.yml b/.github/workflows/label_issue.yml
new file mode 100644
index 00000000..b9a5543c
--- /dev/null
+++ b/.github/workflows/label_issue.yml
@@ -0,0 +1,17 @@
+name: label_issue
+
+on:
+  issues:
+    types:
+      - opened
+
+jobs:
+  label_issue:
+    runs-on: ubuntu-latest
+
+    steps:
+      - env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          ISSUE_URL: ${{ github.event.issue.html_url }}
+        run: |
+          gh issue edit $ISSUE_URL --add-label "pending"
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 32edf6a8..6ddcbc05 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -3,14 +3,7 @@ name: tests
 on:
   push:
     branches:
-      - main
-    paths:
-      - "**.py"
-      - "requirements.txt"
-      - ".github/workflows/*.yml"
-  pull_request:
-    branches:
-      - main
+      - $default-branch
     paths:
       - "**.py"
       - "requirements.txt"
@@ -19,21 +12,27 @@ on:
 jobs:
   tests:
     runs-on: ubuntu-latest
+
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
+
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
           python-version: "3.8"
           cache: "pip"
           cache-dependency-path: "setup.py"
+
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           python -m pip install .[torch,dev]
+
       - name: Check quality
         run: |
           make style && make quality
+
       - name: Test with pytest
         run: |
           make test

From d999691d4fba74bc0d7f5474fb3c037ba81a804a Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 21:15:36 +0800
Subject: [PATCH 113/162] Update tests.yml

Former-commit-id: e90f0cc30d6bb819246ccc08935c39e714c179a1
---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 6ddcbc05..f3ac96db 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -3,7 +3,7 @@ name: tests
 on:
   push:
     branches:
-      - $default-branch
+      - main
     paths:
       - "**.py"
       - "requirements.txt"

From fcd42d8e3adeca9b3d4e65e178834d620ce3c451 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 8 Jun 2024 21:25:35 +0800
Subject: [PATCH 114/162] add pr ci

Former-commit-id: 9b05bb8540b946d0c74bf804bcafc4a785d22c47
---
 .github/workflows/tests.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f3ac96db..96092662 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -8,6 +8,15 @@ on:
       - "**.py"
       - "requirements.txt"
       - ".github/workflows/*.yml"
+  pull_request:
+    types:
+      - review_requested
+    branches:
+      - main
+    paths:
+      - "**.py"
+      - "requirements.txt"
+      - ".github/workflows/*.yml"
 
 jobs:
   tests:

From 74764be45affef2b2d1145dd36b26df8ad744b8a Mon Sep 17 00:00:00 2001
From: "-.-" <wooooolpl@163.com>
Date: Sat, 8 Jun 2024 23:51:56 +0800
Subject: [PATCH 115/162] fix README

Former-commit-id: fa30028c0b83c38610b596209493a748b8ca0928
---
 README.md    | 2 +-
 README_zh.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index fb6c5782..4dea65b9 100644
--- a/README.md
+++ b/README.md
@@ -335,7 +335,7 @@ huggingface-cli login
 ```bash
 git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
 cd LLaMA-Factory
-pip install -e '.[torch,metrics]'
+pip install -e ".[torch,metrics]"
 ```
 
 Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality
diff --git a/README_zh.md b/README_zh.md
index 142254df..ab0e8cb7 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -335,7 +335,7 @@ huggingface-cli login
 ```bash
 git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
 cd LLaMA-Factory
-pip install -e '.[torch,metrics]'
+pip install -e ".[torch,metrics]"
 ```
 
 可选的额外依赖项：torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality

From 7474e8035fcdbb08a3a047d0e81354229c785dbc Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 10 Jun 2024 21:24:15 +0800
Subject: [PATCH 116/162] fix #2666

Former-commit-id: f121d5c4f94af9f165132c4309cb9bdc8217d985
---
 src/llamafactory/model/adapter.py         |  2 +-
 tests/data/test_supervised.py             | 32 ++++++++++++++---------
 tests/model/model_utils/test_attention.py | 15 +++++------
 tests/model/test_freeze.py                | 19 ++++----------
 tests/model/test_full.py                  |  8 +++---
 tests/model/test_lora.py                  | 19 ++++----------
 6 files changed, 41 insertions(+), 54 deletions(-)

diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py
index f4e501a7..34b9eda6 100644
--- a/src/llamafactory/model/adapter.py
+++ b/src/llamafactory/model/adapter.py
@@ -209,6 +209,7 @@ def _setup_lora_tuning(
             "lora_alpha": finetuning_args.lora_alpha,
             "lora_dropout": finetuning_args.lora_dropout,
             "use_rslora": finetuning_args.use_rslora,
+            "use_dora": finetuning_args.use_dora,
             "modules_to_save": finetuning_args.additional_target,
         }
 
@@ -218,7 +219,6 @@ def _setup_lora_tuning(
             lora_config = LoraConfig(
                 task_type=TaskType.CAUSAL_LM,
                 inference_mode=False,
-                use_dora=finetuning_args.use_dora,
                 **peft_kwargs,
             )
             model = get_peft_model(model, lora_config)
diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py
index bb7f71df..63a3453f 100644
--- a/tests/data/test_supervised.py
+++ b/tests/data/test_supervised.py
@@ -1,4 +1,5 @@
 import os
+import random
 
 import pytest
 from datasets import load_dataset
@@ -8,17 +9,17 @@ from llamafactory.hparams import get_train_args
 from llamafactory.model import load_tokenizer
 
 
-TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
 
-TRAINING_ARGS = {
+TRAIN_ARGS = {
     "model_name_or_path": TINY_LLAMA,
     "stage": "sft",
     "do_train": True,
     "finetuning_type": "full",
-    "dataset": "llamafactory/tiny_dataset",
+    "dataset": "llamafactory/tiny-supervised-dataset",
     "dataset_dir": "ONLINE",
     "template": "llama3",
-    "cutoff_len": 1024,
+    "cutoff_len": 8192,
     "overwrite_cache": True,
     "output_dir": "dummy_dir",
     "overwrite_output_dir": True,
@@ -26,19 +27,24 @@ TRAINING_ARGS = {
 }
 
 
-@pytest.mark.parametrize("test_num", [5])
-def test_supervised(test_num: int):
-    model_args, data_args, training_args, _, _ = get_train_args(TRAINING_ARGS)
+@pytest.mark.parametrize("num_samples", [10])
+def test_supervised(num_samples: int):
+    model_args, data_args, training_args, _, _ = get_train_args(TRAIN_ARGS)
     tokenizer_module = load_tokenizer(model_args)
     tokenizer = tokenizer_module["tokenizer"]
     tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)
 
-    original_data = load_dataset(TRAINING_ARGS["dataset"], split="train")
-    for test_idx in range(test_num):
-        decode_result = tokenizer.decode(tokenized_data["input_ids"][test_idx])
+    original_data = load_dataset(TRAIN_ARGS["dataset"], split="train")
+    indexes = random.choices(range(len(original_data)), k=num_samples)
+    for index in indexes:
+        decoded_result = tokenizer.decode(tokenized_data["input_ids"][index])
+        prompt = original_data[index]["instruction"]
+        if original_data[index]["input"]:
+            prompt += "\n" + original_data[index]["input"]
+
         messages = [
-            {"role": "user", "content": original_data[test_idx]["instruction"]},
-            {"role": "assistant", "content": original_data[test_idx]["output"]},
+            {"role": "user", "content": prompt},
+            {"role": "assistant", "content": original_data[index]["output"]},
         ]
         templated_result = tokenizer.apply_chat_template(messages, tokenize=False)
-        assert decode_result == templated_result
+        assert decoded_result == templated_result
diff --git a/tests/model/model_utils/test_attention.py b/tests/model/model_utils/test_attention.py
index 4d414289..751adda4 100644
--- a/tests/model/model_utils/test_attention.py
+++ b/tests/model/model_utils/test_attention.py
@@ -6,7 +6,12 @@ from llamafactory.hparams import get_infer_args
 from llamafactory.model import load_model, load_tokenizer
 
 
-TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
+
+INFER_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "template": "llama3",
+}
 
 
 def test_attention():
@@ -23,13 +28,7 @@ def test_attention():
         "fa2": "LlamaFlashAttention2",
     }
     for requested_attention in attention_available:
-        model_args, _, finetuning_args, _ = get_infer_args(
-            {
-                "model_name_or_path": TINY_LLAMA,
-                "template": "llama2",
-                "flash_attn": requested_attention,
-            }
-        )
+        model_args, _, finetuning_args, _ = get_infer_args({"flash_attn": requested_attention, **INFER_ARGS})
         tokenizer_module = load_tokenizer(model_args)
         model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args)
         for module in model.modules():
diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py
index c6cdec78..97800696 100644
--- a/tests/model/test_freeze.py
+++ b/tests/model/test_freeze.py
@@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args
 from llamafactory.model import load_model, load_tokenizer
 
 
-TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
 
-TRAINING_ARGS = {
+TRAIN_ARGS = {
     "model_name_or_path": TINY_LLAMA,
     "stage": "sft",
     "do_train": True,
     "finetuning_type": "freeze",
-    "dataset": "llamafactory/tiny_dataset",
+    "dataset": "llamafactory/tiny-supervised-dataset",
     "dataset_dir": "ONLINE",
     "template": "llama3",
     "cutoff_len": 1024,
@@ -25,12 +25,7 @@ TRAINING_ARGS = {
 
 
 def test_freeze_all_modules():
-    model_args, _, _, finetuning_args, _ = get_train_args(
-        {
-            "freeze_trainable_layers": 1,
-            **TRAINING_ARGS,
-        }
-    )
+    model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS})
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
     for name, param in model.named_parameters():
@@ -44,11 +39,7 @@ def test_freeze_all_modules():
 
 def test_freeze_extra_modules():
     model_args, _, _, finetuning_args, _ = get_train_args(
-        {
-            "freeze_trainable_layers": 1,
-            "freeze_extra_modules": "embed_tokens,lm_head",
-            **TRAINING_ARGS,
-        }
+        {"freeze_trainable_layers": 1, "freeze_extra_modules": "embed_tokens,lm_head", **TRAIN_ARGS}
     )
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
diff --git a/tests/model/test_full.py b/tests/model/test_full.py
index ef57a980..6cb78f37 100644
--- a/tests/model/test_full.py
+++ b/tests/model/test_full.py
@@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args
 from llamafactory.model import load_model, load_tokenizer
 
 
-TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
 
-TRAINING_ARGS = {
+TRAIN_ARGS = {
     "model_name_or_path": TINY_LLAMA,
     "stage": "sft",
     "do_train": True,
     "finetuning_type": "full",
-    "dataset": "llamafactory/tiny_dataset",
+    "dataset": "llamafactory/tiny-supervised-dataset",
     "dataset_dir": "ONLINE",
     "template": "llama3",
     "cutoff_len": 1024,
@@ -25,7 +25,7 @@ TRAINING_ARGS = {
 
 
 def test_full():
-    model_args, _, _, finetuning_args, _ = get_train_args(TRAINING_ARGS)
+    model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS)
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
     for param in model.parameters():
diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py
index 1f2c02ae..2e2b89d9 100644
--- a/tests/model/test_lora.py
+++ b/tests/model/test_lora.py
@@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args
 from llamafactory.model import load_model, load_tokenizer
 
 
-TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
 
-TRAINING_ARGS = {
+TRAIN_ARGS = {
     "model_name_or_path": TINY_LLAMA,
     "stage": "sft",
     "do_train": True,
     "finetuning_type": "lora",
-    "dataset": "llamafactory/tiny_dataset",
+    "dataset": "llamafactory/tiny-supervised-dataset",
     "dataset_dir": "ONLINE",
     "template": "llama3",
     "cutoff_len": 1024,
@@ -25,12 +25,7 @@ TRAINING_ARGS = {
 
 
 def test_lora_all_modules():
-    model_args, _, _, finetuning_args, _ = get_train_args(
-        {
-            "lora_target": "all",
-            **TRAINING_ARGS,
-        }
-    )
+    model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS})
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
     linear_modules = set()
@@ -48,11 +43,7 @@ def test_lora_all_modules():
 
 def test_lora_extra_modules():
     model_args, _, _, finetuning_args, _ = get_train_args(
-        {
-            "lora_target": "all",
-            "additional_target": "embed_tokens,lm_head",
-            **TRAINING_ARGS,
-        }
+        {"lora_target": "all", "additional_target": "embed_tokens,lm_head", **TRAIN_ARGS}
     )
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)

From bc86e70af9b30c3aa96d0162b21b292ca79e252e Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 10 Jun 2024 23:56:00 +0800
Subject: [PATCH 117/162] update evaluator

Former-commit-id: bb8661e62481ff7027b8969f3d8a6a17290c9da3
---
 src/llamafactory/eval/evaluator.py |  4 +-
 src/llamafactory/eval/template.py  |  9 ++--
 tests/eval/test_eval_template.py   | 77 ++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+), 9 deletions(-)
 create mode 100644 tests/eval/test_eval_template.py

diff --git a/src/llamafactory/eval/evaluator.py b/src/llamafactory/eval/evaluator.py
index 192f4815..5c6fb104 100644
--- a/src/llamafactory/eval/evaluator.py
+++ b/src/llamafactory/eval/evaluator.py
@@ -26,9 +26,7 @@ class Evaluator:
         self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template)
         self.model = load_model(self.tokenizer, self.model_args, finetuning_args)
         self.eval_template = get_eval_template(self.eval_args.lang)
-        self.choice_inputs = [
-            self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES
-        ]
+        self.choice_inputs = [self.tokenizer.encode(ch, add_special_tokens=False)[-1] for ch in CHOICES]
 
     @torch.inference_mode()
     def batch_inference(self, batch_input: Dict[str, torch.Tensor]) -> List[str]:
diff --git a/src/llamafactory/eval/template.py b/src/llamafactory/eval/template.py
index a4a6ef0e..2cbb5aaf 100644
--- a/src/llamafactory/eval/template.py
+++ b/src/llamafactory/eval/template.py
@@ -10,7 +10,6 @@ class EvalTemplate:
     system: str
     choice: str
     answer: str
-    prefix: str
 
     def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]:
         r"""
@@ -42,8 +41,8 @@ class EvalTemplate:
 eval_templates: Dict[str, "EvalTemplate"] = {}
 
 
-def _register_eval_template(name: str, system: str, choice: str, answer: str, prefix: str) -> None:
-    eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer, prefix=prefix)
+def _register_eval_template(name: str, system: str, choice: str, answer: str) -> None:
+    eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer)
 
 
 def get_eval_template(name: str) -> "EvalTemplate":
@@ -56,8 +55,7 @@ _register_eval_template(
     name="en",
     system="The following are multiple choice questions (with answers) about {subject}.\n\n",
     choice="\n{choice}. {content}",
-    answer="\nAnswer: ",
-    prefix=" ",
+    answer="\nAnswer:",
 )
 
 
@@ -66,5 +64,4 @@ _register_eval_template(
     system="以下是中国关于{subject}考试的单项选择题，请选出其中的正确答案。\n\n",
     choice="\n{choice}. {content}",
     answer="\n答案：",
-    prefix=" ",
 )
diff --git a/tests/eval/test_eval_template.py b/tests/eval/test_eval_template.py
new file mode 100644
index 00000000..f6a91a67
--- /dev/null
+++ b/tests/eval/test_eval_template.py
@@ -0,0 +1,77 @@
+from llamafactory.eval.template import get_eval_template
+
+
+def test_eval_template_en():
+    support_set = [
+        {
+            "question": "Fewshot question",
+            "A": "Fewshot1",
+            "B": "Fewshot2",
+            "C": "Fewshot3",
+            "D": "Fewshot4",
+            "answer": "B",
+        }
+    ]
+    example = {
+        "question": "Target question",
+        "A": "Target1",
+        "B": "Target2",
+        "C": "Target3",
+        "D": "Target4",
+        "answer": "C",
+    }
+    template = get_eval_template(name="en")
+    messages = template.format_example(example, support_set=support_set, subject_name="SubName")
+    assert messages == [
+        {
+            "role": "user",
+            "content": (
+                "The following are multiple choice questions (with answers) about SubName.\n\n"
+                "Fewshot question\nA. Fewshot1\nB. Fewshot2\nC. Fewshot3\nD. Fewshot4\nAnswer:"
+            ),
+        },
+        {"role": "assistant", "content": "B"},
+        {
+            "role": "user",
+            "content": "Target question\nA. Target1\nB. Target2\nC. Target3\nD. Target4\nAnswer:",
+        },
+        {"role": "assistant", "content": "C"},
+    ]
+
+
+def test_eval_template_zh():
+    support_set = [
+        {
+            "question": "示例问题",
+            "A": "示例答案1",
+            "B": "示例答案2",
+            "C": "示例答案3",
+            "D": "示例答案4",
+            "answer": "B",
+        }
+    ]
+    example = {
+        "question": "目标问题",
+        "A": "目标答案1",
+        "B": "目标答案2",
+        "C": "目标答案3",
+        "D": "目标答案4",
+        "answer": "C",
+    }
+    template = get_eval_template(name="zh")
+    messages = template.format_example(example, support_set=support_set, subject_name="主题")
+    assert messages == [
+        {
+            "role": "user",
+            "content": (
+                "以下是中国关于主题考试的单项选择题，请选出其中的正确答案。\n\n"
+                "示例问题\nA. 示例答案1\nB. 示例答案2\nC. 示例答案3\nD. 示例答案4\n答案："
+            ),
+        },
+        {"role": "assistant", "content": "B"},
+        {
+            "role": "user",
+            "content": "目标问题\nA. 目标答案1\nB. 目标答案2\nC. 目标答案3\nD. 目标答案4\n答案：",
+        },
+        {"role": "assistant", "content": "C"},
+    ]

From 0e7c15d2bd422dc952ce782fbd61b726ab828f3e Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 11 Jun 2024 00:19:17 +0800
Subject: [PATCH 118/162] fix #4145

Fix the docker image


Former-commit-id: a9838281156fe870bfcde5d1f7afc15264fd4aad
---
 Dockerfile         | 38 ++++++++++++++++++++++++++++++++++----
 README.md          | 36 ++++++++++++++++++------------------
 README_zh.md       | 34 ++++++++++++++++++----------------
 docker-compose.yml | 10 ++++++++--
 4 files changed, 78 insertions(+), 40 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 0a35e355..45849601 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,14 +1,44 @@
-FROM nvcr.io/nvidia/pytorch:24.01-py3
+# Use the NVIDIA official image with PyTorch 2.3.0
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
+FROM nvcr.io/nvidia/pytorch:24.02-py3
 
+# Define installation arguments
+ARG INSTALL_BNB=false
+ARG INSTALL_VLLM=false
+ARG INSTALL_DEEPSPEED=false
+ARG PIP_INDEX=https://pypi.org/simple
+
+# Set the working directory
 WORKDIR /app
 
+# Install the requirements
 COPY requirements.txt /app/
-RUN pip install -r requirements.txt
+RUN pip config set global.index-url $PIP_INDEX
+RUN python -m pip install --upgrade pip
+RUN python -m pip install -r requirements.txt
 
+# Copy the rest of the application into the image
 COPY . /app/
-RUN pip install -e .[metrics,bitsandbytes,qwen]
 
+# Install the LLaMA Factory
+RUN EXTRA_PACKAGES="metrics"; \
+    if [ "$INSTALL_BNB" = "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
+    fi; \
+    if [ "$INSTALL_VLLM" = "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
+    fi; \
+    if [ "$INSTALL_DEEPSPEED" = "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
+    fi; \
+    pip install -e .[$EXTRA_PACKAGES] && \
+    pip uninstall -y transformer-engine
+
+# Set up volumes
 VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
+
+# Expose port 7860 for the LLaMA Board
 EXPOSE 7860
 
-CMD [ "llamafactory-cli", "webui" ]
+# Expose port 8000 for the API service
+EXPOSE 8000
diff --git a/README.md b/README.md
index 4dea65b9..35dacd2e 100644
--- a/README.md
+++ b/README.md
@@ -405,9 +405,9 @@ Please refer to [data/README.md](data/README.md) for checking the details about
 Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
+llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
+llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
 ```
 
 See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
@@ -417,33 +417,33 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr
 
 ### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio))
 
-#### Use local environment
-
 ```bash
-CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
+llamafactory-cli webui
 ```
 
-</details>
-
-#### Use Docker
+### Build Docker
 
 ```bash
-docker build -f ./Dockerfile -t llama-factory:latest .
-docker run --gpus=all \
+docker build -f ./Dockerfile \
+    --build-arg INSTALL_BNB=false \
+    --build-arg INSTALL_VLLM=false \
+    --build-arg INSTALL_DEEPSPEED=false \
+    --build-arg PIP_INDEX=https://pypi.org/simple \
+    -t llamafactory:latest .
+
+docker run -it --gpus=all \
     -v ./hf_cache:/root/.cache/huggingface/ \
     -v ./data:/app/data \
     -v ./output:/app/output \
     -p 7860:7860 \
+    -p 8000:8000 \
     --shm-size 16G \
-    --name llama_factory \
-    -d llama-factory:latest
+    --name llamafactory \
+    llamafactory:latest
 ```
 
-#### Use Docker Compose
-
-```bash
-docker compose -f ./docker-compose.yml up -d
-```
+> [!TIP]
+> Use Docker Compose to build image via `docker compose up -d`.
 
 <details><summary>Details about volume</summary>
 
diff --git a/README_zh.md b/README_zh.md
index ab0e8cb7..0ddb8b19 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -405,9 +405,9 @@ Docker 镜像：
 下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
+llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
+llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
 ```
 
 高级用法请参考 [examples/README_zh.md](examples/README_zh.md)（包括多 GPU 微调）。
@@ -417,31 +417,33 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s
 
 ### LLaMA Board 可视化微调（由 [Gradio](https://github.com/gradio-app/gradio) 驱动）
 
-#### 使用本地环境
-
 ```bash
-CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
+llamafactory-cli webui
 ```
 
-#### 使用 Docker
+### 构建 Docker
 
 ```bash
-docker build -f ./Dockerfile -t llama-factory:latest .
-docker run --gpus=all \
+docker build -f ./Dockerfile \
+    --build-arg INSTALL_BNB=false \
+    --build-arg INSTALL_VLLM=false \
+    --build-arg INSTALL_DEEPSPEED=false \
+    --build-arg PIP_INDEX=https://pypi.org/simple \
+    -t llamafactory:latest .
+
+docker run -it --gpus=all \
     -v ./hf_cache:/root/.cache/huggingface/ \
     -v ./data:/app/data \
     -v ./output:/app/output \
     -p 7860:7860 \
+    -p 8000:8000 \
     --shm-size 16G \
-    --name llama_factory \
-    -d llama-factory:latest
+    --name llamafactory \
+    llamafactory:latest
 ```
 
-#### 使用 Docker Compose
-
-```bash
-docker compose -f ./docker-compose.yml up -d
-```
+> [!TIP]
+> 通过 `docker compose up -d` 使用 Docker Compose 构建镜像。
 
 <details><summary>数据卷详情</summary>
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 9602a3e3..b3e4a34d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,17 +1,23 @@
 version: '3.8'
 
 services:
-  llama-factory:
+  llamafactory:
     build:
       dockerfile: Dockerfile
       context: .
-    container_name: llama_factory
+      args:
+        INSTALL_BNB: false
+        INSTALL_VLLM: false
+        INSTALL_DEEPSPEED: false
+        PIP_INDEX: https://pypi.org/simple
+    container_name: llamafactory
     volumes:
       - ./hf_cache:/root/.cache/huggingface/
       - ./data:/app/data
       - ./output:/app/output
     ports:
       - "7860:7860"
+      - "8000:8000"
     ipc: host
     deploy:
       resources:

From 68df064c1fd485ef4ce64a0162b93b76eb1a8061 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 11 Jun 2024 00:37:17 +0800
Subject: [PATCH 119/162] fix #4160

The split heads should be concatenated in dim=2


Former-commit-id: 4b3f247f270d44df9fe226cfe0dabfb7fcd2deda
---
 src/llamafactory/model/model_utils/longlora.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py
index c8dc52f5..cd468979 100644
--- a/src/llamafactory/model/model_utils/longlora.py
+++ b/src/llamafactory/model/model_utils/longlora.py
@@ -96,7 +96,8 @@ def llama_attention_forward(
             (
                 attn_output[:, :, : self.num_heads // 2],
                 attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
-            )
+            ),
+            dim=2,
         )
 
     attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
@@ -194,7 +195,8 @@ def llama_flash_attention_2_forward(
             (
                 attn_output[:, :, : self.num_heads // 2],
                 attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
-            )
+            ),
+            dim=2,
         )
 
     attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
@@ -293,7 +295,8 @@ def llama_sdpa_attention_forward(
             (
                 attn_output[:, :, : self.num_heads // 2],
                 attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
-            )
+            ),
+            dim=2,
         )
 
     attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
@@ -303,7 +306,7 @@ def llama_sdpa_attention_forward(
 
 
 def _apply_llama_patch() -> None:
-    require_version("transformers==4.40.2", "To fix: pip install transformers==4.40.2")
+    require_version("transformers==4.41.2", "To fix: pip install transformers==4.41.2")
     LlamaAttention.forward = llama_attention_forward
     LlamaFlashAttention2.forward = llama_flash_attention_2_forward
     LlamaSdpaAttention.forward = llama_sdpa_attention_forward

From 41eadf54599323b8c64c54cfd848f60972cf0dd0 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 11 Jun 2024 00:44:26 +0800
Subject: [PATCH 120/162] release v0.8.1

Former-commit-id: 875a34f492701d1c644facbe9ede411af2931513
---
 src/llamafactory/extras/env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py
index 1d4e43f1..8c34fd96 100644
--- a/src/llamafactory/extras/env.py
+++ b/src/llamafactory/extras/env.py
@@ -9,7 +9,7 @@ import trl
 from transformers.utils import is_torch_cuda_available, is_torch_npu_available
 
 
-VERSION = "0.8.1.dev0"
+VERSION = "0.8.1"
 
 
 def print_env() -> None:

From e540759f4f31756dd35497b2956cc8c729ebc6ab Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 11 Jun 2024 00:50:53 +0800
Subject: [PATCH 121/162] set dev version

Former-commit-id: 16c47cc15226119e33e46ba0f2f6ccb37072257f
---
 src/llamafactory/extras/env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py
index 8c34fd96..a8cb799d 100644
--- a/src/llamafactory/extras/env.py
+++ b/src/llamafactory/extras/env.py
@@ -9,7 +9,7 @@ import trl
 from transformers.utils import is_torch_cuda_available, is_torch_npu_available
 
 
-VERSION = "0.8.1"
+VERSION = "0.8.2.dev0"
 
 
 def print_env() -> None:

From 8c7943c4de5c6a4d695407304b2fdf37798caf00 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 11 Jun 2024 01:04:16 +0800
Subject: [PATCH 122/162] tiny fix

Former-commit-id: b5e9711ef375cc323fc083e742cccfc974550416
---
 src/llamafactory/model/model_utils/longlora.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py
index cd468979..4a8c562a 100644
--- a/src/llamafactory/model/model_utils/longlora.py
+++ b/src/llamafactory/model/model_utils/longlora.py
@@ -182,11 +182,9 @@ def llama_flash_attention_2_forward(
         query_states, key_states, value_states = shift(query_states), shift(key_states), shift(value_states)
         if attention_mask is not None:
             attention_mask = attention_mask[:, :groupsz].repeat(num_groups, 1)
-    else:
-        groupsz = q_len
 
     attn_output: torch.Tensor = self._flash_attention_forward(
-        query_states, key_states, value_states, attention_mask, groupsz, dropout=dropout_rate
+        query_states, key_states, value_states, attention_mask, query_states.size(1), dropout=dropout_rate
     )
 
     if getattr(self.config, "group_size_ratio", None) and self.training:  # shift back

From 4f33de245c5e00ea4f4457676ea92779d1ab3533 Mon Sep 17 00:00:00 2001
From: Alfredo Luque <alfredo.luque@airbnb.com>
Date: Tue, 11 Jun 2024 00:07:06 +0000
Subject: [PATCH 123/162] add manifest so requirements.txt in sdist

Former-commit-id: b501a3c56c51786c3006a2aca15a145641a4556c
---
 MANIFEST.in | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 MANIFEST.in

diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..82c51f63
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include LICENSE requirements.txt

From b7458a24b6adbc958839b4a27803069055c3fbf4 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 11 Jun 2024 12:48:53 +0800
Subject: [PATCH 124/162] tiny fix

Former-commit-id: c4b2e263d9cefbad0fbc5de72422e4ef8edbcb54
---
 src/llamafactory/hparams/parser.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py
index ff1fbf5d..ec5dd62c 100644
--- a/src/llamafactory/hparams/parser.py
+++ b/src/llamafactory/hparams/parser.py
@@ -171,9 +171,6 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
     if training_args.do_train and model_args.quantization_device_map == "auto":
         raise ValueError("Cannot use device map for quantized models in training.")
 
-    if finetuning_args.use_dora and model_args.use_unsloth:
-        raise ValueError("Unsloth does not support DoRA.")
-
     if finetuning_args.pure_bf16:
         if not is_torch_bf16_gpu_available():
             raise ValueError("This device does not support `pure_bf16`.")

From 95f95bef609cf3f44bd4c939ee9d7201ae4749cb Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 11 Jun 2024 15:38:38 +0800
Subject: [PATCH 125/162] fix #4198

Former-commit-id: 945d2c6cc73542adf9272ebd9aa332ea2c1c7361
---
 src/llamafactory/hparams/model_args.py  | 12 ++++++++++
 src/llamafactory/model/patcher.py       |  2 +-
 src/llamafactory/train/trainer_utils.py | 32 +++++++++++--------------
 3 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 6352a420..71467770 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -1,6 +1,8 @@
 from dataclasses import asdict, dataclass, field
 from typing import Any, Dict, Literal, Optional
 
+from typing_extensions import Self
+
 
 @dataclass
 class ModelArguments:
@@ -216,3 +218,13 @@ class ModelArguments:
 
     def to_dict(self) -> Dict[str, Any]:
         return asdict(self)
+
+    @classmethod
+    def copyfrom(cls, old_arg: Self, **kwargs) -> Self:
+        arg_dict = old_arg.to_dict()
+        arg_dict.update(**kwargs)
+        new_arg = cls(**arg_dict)
+        new_arg.compute_dtype = old_arg.compute_dtype
+        new_arg.device_map = old_arg.device_map
+        new_arg.model_max_length = old_arg.model_max_length
+        return new_arg
diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py
index 87c92315..18221a10 100644
--- a/src/llamafactory/model/patcher.py
+++ b/src/llamafactory/model/patcher.py
@@ -79,7 +79,7 @@ def patch_config(
             if "device_map" not in init_kwargs and model_args.device_map:
                 init_kwargs["device_map"] = model_args.device_map
 
-            if init_kwargs["device_map"] == "auto":
+            if init_kwargs.get("device_map", None) == "auto":
                 init_kwargs["offload_folder"] = model_args.offload_folder
 
 
diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py
index 0ddcdb11..7e9cc881 100644
--- a/src/llamafactory/train/trainer_utils.py
+++ b/src/llamafactory/train/trainer_utils.py
@@ -83,15 +83,12 @@ def create_ref_model(
     The valuehead parameter is randomly initialized since it is useless for PPO training.
     """
     if finetuning_args.ref_model is not None:
-        ref_model_args_dict = model_args.to_dict()
-        ref_model_args_dict.update(
-            dict(
-                model_name_or_path=finetuning_args.ref_model,
-                adapter_name_or_path=finetuning_args.ref_model_adapters,
-                quantization_bit=finetuning_args.ref_model_quantization_bit,
-            )
+        ref_model_args = ModelArguments.copyfrom(
+            model_args,
+            model_name_or_path=finetuning_args.ref_model,
+            adapter_name_or_path=finetuning_args.ref_model_adapters,
+            quantization_bit=finetuning_args.ref_model_quantization_bit,
         )
-        ref_model_args = ModelArguments(**ref_model_args_dict)
         ref_finetuning_args = FinetuningArguments()
         tokenizer = load_tokenizer(ref_model_args)["tokenizer"]
         ref_model = load_model(
@@ -102,9 +99,11 @@ def create_ref_model(
         if finetuning_args.finetuning_type == "lora":
             ref_model = None
         else:
-            tokenizer = load_tokenizer(model_args)["tokenizer"]
+            ref_model_args = ModelArguments.copyfrom(model_args)
+            ref_finetuning_args = FinetuningArguments()
+            tokenizer = load_tokenizer(ref_model_args)["tokenizer"]
             ref_model = load_model(
-                tokenizer, model_args, finetuning_args, is_trainable=False, add_valuehead=add_valuehead
+                tokenizer, ref_model_args, ref_finetuning_args, is_trainable=False, add_valuehead=add_valuehead
             )
             logger.info("Created reference model from the model itself.")
 
@@ -139,15 +138,12 @@ def create_reward_model(
         logger.info("Loaded adapter weights of reward model from {}".format(finetuning_args.reward_model))
         return None
     else:
-        reward_model_args_dict = model_args.to_dict()
-        reward_model_args_dict.update(
-            dict(
-                model_name_or_path=finetuning_args.reward_model,
-                adapter_name_or_path=finetuning_args.reward_model_adapters,
-                quantization_bit=finetuning_args.reward_model_quantization_bit,
-            )
+        reward_model_args = ModelArguments.copyfrom(
+            model_args,
+            model_name_or_path=finetuning_args.reward_model,
+            adapter_name_or_path=finetuning_args.reward_model_adapters,
+            quantization_bit=finetuning_args.reward_model_quantization_bit,
         )
-        reward_model_args = ModelArguments(**reward_model_args_dict)
         reward_finetuning_args = FinetuningArguments()
         tokenizer = load_tokenizer(reward_model_args)["tokenizer"]
         reward_model = load_model(

From 3f52c233b1b1c2502e199d4f3a1204215d1bd0cc Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 11 Jun 2024 15:40:21 +0800
Subject: [PATCH 126/162] Update bug-report.yml

Former-commit-id: bb022cd867ebf2593e40fc6ba43b768603b129a3
---
 .github/ISSUE_TEMPLATE/bug-report.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index 1d962200..768adea6 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -38,7 +38,9 @@ body:
         请合理使用 Markdown 标签来格式化您的文本。
 
       placeholder: |
+        ```bash
         llamafactory-cli train ...
+        ```
 
   - type: textarea
     id: expected-behavior

From dfac202c7d87551e9eb13fb1a40d47d3c904eb96 Mon Sep 17 00:00:00 2001
From: d <913015993@qq.com>
Date: Tue, 11 Jun 2024 16:21:48 +0800
Subject: [PATCH 127/162] =?UTF-8?q?=E7=BB=8F=E8=BF=87=E5=A4=A7=E9=87=8F?=
 =?UTF-8?q?=E7=9A=84=E5=A2=9E=E9=87=8F=E9=A2=84=E8=AE=AD=E7=BB=83=EF=BC=8C?=
 =?UTF-8?q?=E8=BF=9B=E8=A1=8C=E5=AF=B9=E6=AF=94=E8=AF=95=E9=AA=8C=EF=BC=8C?=
 =?UTF-8?q?=E5=8F=91=E7=8E=B0=E8=BF=99=E4=B8=AAbug=EF=BC=9Allama3=E5=9C=A8?=
 =?UTF-8?q?=E9=A2=84=E8=AE=AD=E7=BB=83=E6=97=B6=E4=BD=BF=E7=94=A8=E7=9A=84?=
 =?UTF-8?q?tokenizer.eos=5Ftoke=E6=98=AF'<|end=5Fof=5Ftext|>'=20=EF=BC=8C?=
 =?UTF-8?q?=E8=BF=99=E9=87=8C=E5=9C=A8=E6=AF=8F=E6=9D=A1=E6=95=B0=E6=8D=AE?=
 =?UTF-8?q?=E5=90=8E=E9=9D=A2=E4=B9=9F=E5=BE=97=E7=94=A8=E8=BF=99=E4=B8=AA?=
 =?UTF-8?q?=EF=BC=8C=E8=80=8C=E4=B8=8D=E6=98=AF'<|eot=5Fid|>'=EF=BC=8C?=
 =?UTF-8?q?=E5=90=A6=E5=88=99=E5=BE=88=E5=AE=B9=E6=98=93=E5=AF=BC=E8=87=B4?=
 =?UTF-8?q?=E4=B8=A5=E9=87=8D=E7=9A=84=E6=80=A7=E8=83=BD=E4=B8=8B=E9=99=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Former-commit-id: ef470561f742b16eaa0f99c4cadecd7c84ce6bd2
---
 src/llamafactory/data/processors/pretrain.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py
index 87727b55..4050f74c 100644
--- a/src/llamafactory/data/processors/pretrain.py
+++ b/src/llamafactory/data/processors/pretrain.py
@@ -12,7 +12,8 @@ def preprocess_pretrain_dataset(
     examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments"
 ) -> Dict[str, List[List[int]]]:
     # build grouped texts with format `X1 X2 X3 ...` if packing is enabled
-    text_examples = [messages[0]["content"] + tokenizer.eos_token for messages in examples["prompt"]]
+    eos_token = '<|end_of_text|>' if data_args.template == 'llama3' else  tokenizer.eos_token
+    text_examples = [messages[0]["content"] + eos_token for messages in examples["prompt"]]
 
     if not data_args.packing:
         if data_args.template == "gemma":

From a7233181f28bb6e9008c8c67654c04621e8bc8ea Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 11 Jun 2024 16:52:36 +0800
Subject: [PATCH 128/162] fix deepspeed version

Former-commit-id: 938a69bb07d4de7d82928ff01c582032162c1480
---
 src/llamafactory/model/model_utils/moe.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/llamafactory/model/model_utils/moe.py b/src/llamafactory/model/model_utils/moe.py
index e554e45a..8a73c844 100644
--- a/src/llamafactory/model/model_utils/moe.py
+++ b/src/llamafactory/model/model_utils/moe.py
@@ -1,5 +1,6 @@
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Sequence
 
+import torch
 from transformers.integrations import is_deepspeed_zero3_enabled
 from transformers.utils.versions import require_version
 
@@ -10,6 +11,13 @@ if TYPE_CHECKING:
     from ...hparams import ModelArguments
 
 
+def _set_z3_leaf_modules(model: "PreTrainedModel", leaf_modules: Sequence["torch.nn.Module"]) -> None:
+    require_version("deepspeed>=0.13.0", "To fix: pip install deepspeed>=0.13.0")
+    from deepspeed.utils import set_z3_leaf_modules  # type: ignore
+
+    set_z3_leaf_modules(model, leaf_modules)
+
+
 def add_z3_leaf_module(model: "PreTrainedModel") -> None:
     r"""
     Sets module as a leaf module to skip partitioning in deepspeed zero3.
@@ -17,33 +25,30 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None:
     if not is_deepspeed_zero3_enabled():
         return
 
-    require_version("deepspeed>=0.13.0", "To fix: pip install deepspeed>=0.13.0")
-    from deepspeed.utils import set_z3_leaf_modules  # type: ignore
-
     if getattr(model.config, "model_type", None) == "dbrx":
         from transformers.models.dbrx.modeling_dbrx import DbrxFFN
 
-        set_z3_leaf_modules(model, [DbrxFFN])
+        _set_z3_leaf_modules(model, [DbrxFFN])
 
     if getattr(model.config, "model_type", None) == "jamba":
         from transformers.models.jamba.modeling_jamba import JambaSparseMoeBlock
 
-        set_z3_leaf_modules(model, [JambaSparseMoeBlock])
+        _set_z3_leaf_modules(model, [JambaSparseMoeBlock])
 
     if getattr(model.config, "model_type", None) == "jetmoe":
         from transformers.models.jetmoe.modeling_jetmoe import JetMoeMoA, JetMoeMoE
 
-        set_z3_leaf_modules(model, [JetMoeMoA, JetMoeMoE])
+        _set_z3_leaf_modules(model, [JetMoeMoA, JetMoeMoE])
 
     if getattr(model.config, "model_type", None) == "mixtral":
         from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock
 
-        set_z3_leaf_modules(model, [MixtralSparseMoeBlock])
+        _set_z3_leaf_modules(model, [MixtralSparseMoeBlock])
 
     if getattr(model.config, "model_type", None) == "qwen2moe":
         from transformers.models.qwen2_moe.modeling_qwen2_moe import Qwen2MoeSparseMoeBlock
 
-        set_z3_leaf_modules(model, [Qwen2MoeSparseMoeBlock])
+        _set_z3_leaf_modules(model, [Qwen2MoeSparseMoeBlock])
 
 
 def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:

From 6625bf6b332a52d09be21c0de3b989468d1ecf6c Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Tue, 11 Jun 2024 17:02:14 +0800
Subject: [PATCH 129/162] Update pretrain.py

Former-commit-id: e2317b2a84149e39fddfd6366be3de23dfb71f82
---
 src/llamafactory/data/processors/pretrain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py
index 4050f74c..832c987e 100644
--- a/src/llamafactory/data/processors/pretrain.py
+++ b/src/llamafactory/data/processors/pretrain.py
@@ -12,7 +12,7 @@ def preprocess_pretrain_dataset(
     examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments"
 ) -> Dict[str, List[List[int]]]:
     # build grouped texts with format `X1 X2 X3 ...` if packing is enabled
-    eos_token = '<|end_of_text|>' if data_args.template == 'llama3' else  tokenizer.eos_token
+    eos_token = "<|end_of_text|>" if data_args.template == "llama3" else tokenizer.eos_token
     text_examples = [messages[0]["content"] + eos_token for messages in examples["prompt"]]
 
     if not data_args.packing:

From 45712c6251414024413eb2f669214e93c693f8c6 Mon Sep 17 00:00:00 2001
From: Arthur Kim <kimdwkimdw@gmail.com>
Date: Wed, 12 Jun 2024 16:49:12 +0900
Subject: [PATCH 130/162] Support vllm==0.5.0

Former-commit-id: e7a8ffd7af21bc3759f055033ba2209fa7a1be0e
---
 src/llamafactory/chat/vllm_engine.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py
index 87ce8684..d096f6eb 100644
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -13,7 +13,10 @@ from .base_engine import BaseEngine, Response
 if is_vllm_available():
     from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams
     from vllm.lora.request import LoRARequest
-    from vllm.sequence import MultiModalData
+    try:
+        from vllm.multimodal import MultiModalData  # vllm==0.5.0
+    except ImportError:
+        from vllm.sequence import MultiModalData  # vllm<0.5.0
 
 
 if TYPE_CHECKING:

From f7b66af7aff84177d6310b01e15bf58f2d0ca170 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 12 Jun 2024 16:50:11 +0800
Subject: [PATCH 131/162] fix #4242

Former-commit-id: cf260e7af03f49aa5e3d6daf3b27738ff9b9bcb8
---
 Dockerfile                           | 2 +-
 src/llamafactory/chat/vllm_engine.py | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 45849601..3932ff30 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -32,7 +32,7 @@ RUN EXTRA_PACKAGES="metrics"; \
         EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
     fi; \
     pip install -e .[$EXTRA_PACKAGES] && \
-    pip uninstall -y transformer-engine
+    pip uninstall -y transformer-engine flash-attn
 
 # Set up volumes
 VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py
index d096f6eb..d2850a6e 100644
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -13,10 +13,11 @@ from .base_engine import BaseEngine, Response
 if is_vllm_available():
     from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams
     from vllm.lora.request import LoRARequest
+
     try:
-        from vllm.multimodal import MultiModalData  # vllm==0.5.0
+        from vllm.multimodal import MultiModalData  # type: ignore (for vllm>=0.5.0)
     except ImportError:
-        from vllm.sequence import MultiModalData  # vllm<0.5.0
+        from vllm.sequence import MultiModalData  # for vllm<0.5.0
 
 
 if TYPE_CHECKING:

From 8ad6a70a57f2892a748a94db80c7185a3ba8b7d8 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Wed, 12 Jun 2024 17:39:12 +0800
Subject: [PATCH 132/162] update readme

Former-commit-id: a436aaa83f0cf12c8f404459e5486f9369d538ec
---
 README.md    | 2 +-
 README_zh.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 35dacd2e..a773a5f4 100644
--- a/README.md
+++ b/README.md
@@ -443,7 +443,7 @@ docker run -it --gpus=all \
 ```
 
 > [!TIP]
-> Use Docker Compose to build image via `docker compose up -d`.
+> Use Docker Compose to build image via `docker-compose up -d`.
 
 <details><summary>Details about volume</summary>
 
diff --git a/README_zh.md b/README_zh.md
index 0ddb8b19..7a9cb159 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -443,7 +443,7 @@ docker run -it --gpus=all \
 ```
 
 > [!TIP]
-> 通过 `docker compose up -d` 使用 Docker Compose 构建镜像。
+> 通过 `docker-compose up -d` 使用 Docker Compose 构建镜像。
 
 <details><summary>数据卷详情</summary>
 

From d98900d804b94bc0a972483c2d91c6883547c18d Mon Sep 17 00:00:00 2001
From: hzhaoy <hzywong@gmail.com>
Date: Wed, 12 Jun 2024 18:29:03 +0800
Subject: [PATCH 133/162] adapt vllm==0.5.0

Former-commit-id: 02afd9ff64f23e6707ac739ae1269f41bd70c340
---
 src/llamafactory/chat/vllm_engine.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py
index d2850a6e..34126adf 100644
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -1,10 +1,12 @@
 import uuid
 from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union
 
+from packaging import version
+
 from ..data import get_template_and_fix_tokenizer
 from ..extras.logging import get_logger
 from ..extras.misc import get_device_count
-from ..extras.packages import is_vllm_available
+from ..extras.packages import is_vllm_available, _get_package_version
 from ..model import load_config, load_tokenizer
 from ..model.model_utils.visual import LlavaMultiModalProjectorForYiVLForVLLM
 from .base_engine import BaseEngine, Response
@@ -14,10 +16,10 @@ if is_vllm_available():
     from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams
     from vllm.lora.request import LoRARequest
 
-    try:
-        from vllm.multimodal import MultiModalData  # type: ignore (for vllm>=0.5.0)
-    except ImportError:
-        from vllm.sequence import MultiModalData  # for vllm<0.5.0
+    if _get_package_version("vllm") >= version.parse("0.5.0"):
+        from vllm.multimodal.image import ImagePixelData
+    else:
+        from vllm.sequence import MultiModalData
 
 
 if TYPE_CHECKING:
@@ -110,7 +112,10 @@ class VllmEngine(BaseEngine):
         if self.processor is not None and image is not None:  # add image features
             image_processor: "BaseImageProcessor" = getattr(self.processor, "image_processor")
             pixel_values = image_processor(image, return_tensors="pt")["pixel_values"]
-            multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values)
+            if _get_package_version("vllm") >= version.parse("0.5.0"):
+                multi_modal_data = ImagePixelData(pixel_values)
+            else:
+                multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values)
         else:
             multi_modal_data = None
 

From 7d3a9b10b70d1fc1af3c218bb21f7c3fe42b59b9 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 13 Jun 2024 00:07:48 +0800
Subject: [PATCH 134/162] fix docker compose usage

Former-commit-id: 59a5bd5d5c8d2a44e2dad26b74e77a45e109c8d6
---
 README.md          | 10 ++++++++--
 README_zh.md       | 10 ++++++++--
 docker-compose.yml |  5 +++--
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index a773a5f4..65964560 100644
--- a/README.md
+++ b/README.md
@@ -423,6 +423,8 @@ llamafactory-cli webui
 
 ### Build Docker
 
+#### Use Docker
+
 ```bash
 docker build -f ./Dockerfile \
     --build-arg INSTALL_BNB=false \
@@ -442,8 +444,12 @@ docker run -it --gpus=all \
     llamafactory:latest
 ```
 
-> [!TIP]
-> Use Docker Compose to build image via `docker-compose up -d`.
+#### Use Docker Compose
+
+```bash
+docker-compose up -d
+docker-compose exec -it llamafactory bash
+```
 
 <details><summary>Details about volume</summary>
 
diff --git a/README_zh.md b/README_zh.md
index 7a9cb159..7962a6d1 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -423,6 +423,8 @@ llamafactory-cli webui
 
 ### 构建 Docker
 
+#### 使用 Docker
+
 ```bash
 docker build -f ./Dockerfile \
     --build-arg INSTALL_BNB=false \
@@ -442,8 +444,12 @@ docker run -it --gpus=all \
     llamafactory:latest
 ```
 
-> [!TIP]
-> 通过 `docker-compose up -d` 使用 Docker Compose 构建镜像。
+#### 使用 Docker Compose
+
+```bash
+docker-compose up -d
+docker-compose exec -it llamafactory bash
+```
 
 <details><summary>数据卷详情</summary>
 
diff --git a/docker-compose.yml b/docker-compose.yml
index b3e4a34d..c5dc34e9 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,5 +1,3 @@
-version: '3.8'
-
 services:
   llamafactory:
     build:
@@ -19,6 +17,9 @@ services:
       - "7860:7860"
       - "8000:8000"
     ipc: host
+    tty: true
+    stdin_open: true
+    command: bash
     deploy:
       resources:
         reservations:

From f4c95557609699ff36d483beb49d0f792fbff146 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 13 Jun 2024 00:48:44 +0800
Subject: [PATCH 135/162] fix lint

Former-commit-id: b170165679317af2b3f03633afac27661b3deb06
---
 README.md                                   |  2 +-
 README_zh.md                                |  2 +-
 src/llamafactory/hparams/finetuning_args.py | 17 ++++++++---------
 src/llamafactory/hparams/model_args.py      | 12 ++++++++----
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 65964560..994a62c6 100644
--- a/README.md
+++ b/README.md
@@ -448,7 +448,7 @@ docker run -it --gpus=all \
 
 ```bash
 docker-compose up -d
-docker-compose exec -it llamafactory bash
+docker-compose exec llamafactory bash
 ```
 
 <details><summary>Details about volume</summary>
diff --git a/README_zh.md b/README_zh.md
index 7962a6d1..fa395c6b 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -448,7 +448,7 @@ docker run -it --gpus=all \
 
 ```bash
 docker-compose up -d
-docker-compose exec -it llamafactory bash
+docker-compose exec llamafactory bash
 ```
 
 <details><summary>数据卷详情</summary>
diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py
index 08af31e4..facbe792 100644
--- a/src/llamafactory/hparams/finetuning_args.py
+++ b/src/llamafactory/hparams/finetuning_args.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import Literal, Optional
+from typing import List, Literal, Optional
 
 
 @dataclass
@@ -319,20 +319,19 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA
                 return [item.strip() for item in arg.split(",")]
             return arg
 
-        self.freeze_trainable_modules = split_arg(self.freeze_trainable_modules)
-        self.freeze_extra_modules = split_arg(self.freeze_extra_modules)
-        self.lora_alpha = self.lora_alpha or self.lora_rank * 2
-        self.lora_target = split_arg(self.lora_target)
-        self.additional_target = split_arg(self.additional_target)
-        self.galore_target = split_arg(self.galore_target)
+        self.freeze_trainable_modules: List[str] = split_arg(self.freeze_trainable_modules)
+        self.freeze_extra_modules: Optional[List[str]] = split_arg(self.freeze_extra_modules)
+        self.lora_alpha: int = self.lora_alpha or self.lora_rank * 2
+        self.lora_target: List[str] = split_arg(self.lora_target)
+        self.additional_target: Optional[List[str]] = split_arg(self.additional_target)
+        self.galore_target: List[str] = split_arg(self.galore_target)
         self.freeze_vision_tower = self.freeze_vision_tower or self.train_mm_proj_only
+        self.use_ref_model = self.pref_loss not in ["orpo", "simpo"]
 
         assert self.finetuning_type in ["lora", "freeze", "full"], "Invalid fine-tuning method."
         assert self.ref_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization."
         assert self.reward_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization."
 
-        self.use_ref_model = self.pref_loss not in ["orpo", "simpo"]
-
         if self.stage == "ppo" and self.reward_model is None:
             raise ValueError("`reward_model` is necessary for PPO training.")
 
diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 71467770..359beafd 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -1,9 +1,13 @@
 from dataclasses import asdict, dataclass, field
-from typing import Any, Dict, Literal, Optional
+from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Union
 
 from typing_extensions import Self
 
 
+if TYPE_CHECKING:
+    import torch
+
+
 @dataclass
 class ModelArguments:
     r"""
@@ -194,9 +198,9 @@ class ModelArguments:
     )
 
     def __post_init__(self):
-        self.compute_dtype = None
-        self.device_map = None
-        self.model_max_length = None
+        self.compute_dtype: Optional["torch.dtype"] = None
+        self.device_map: Optional[Union[str, Dict[str, Any]]] = None
+        self.model_max_length: Optional[int] = None
 
         if self.split_special_tokens and self.use_fast_tokenizer:
             raise ValueError("`split_special_tokens` is only supported for slow tokenizers.")

From 39e3d3fed63a5185806802e0b7de2eaf22451c22 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 13 Jun 2024 01:00:56 +0800
Subject: [PATCH 136/162] add neo-sft dataset

Former-commit-id: 34863fa7cb641ceca92e3a2eec914126db537b62
---
 README.md              | 1 +
 README_zh.md           | 1 +
 data/dataset_info.json | 4 ++++
 3 files changed, 6 insertions(+)

diff --git a/README.md b/README.md
index 994a62c6..5bbaf2d7 100644
--- a/README.md
+++ b/README.md
@@ -259,6 +259,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t
 - [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)
 - [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction)
 - [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo)
+- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2)
 - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
 - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
 - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
diff --git a/README_zh.md b/README_zh.md
index fa395c6b..fb616909 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -259,6 +259,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 - [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)
 - [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction)
 - [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo)
+- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2)
 - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
 - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
 - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
diff --git a/data/dataset_info.json b/data/dataset_info.json
index 8c5cbb45..1d226b3a 100644
--- a/data/dataset_info.json
+++ b/data/dataset_info.json
@@ -248,6 +248,10 @@
   "ruozhiba_gpt4": {
     "hf_hub_url": "hfl/ruozhiba_gpt4_turbo"
   },
+  "neo_sft": {
+    "hf_hub_url": "m-a-p/neo_sft_phase2",
+    "formatting": "sharegpt"
+  },
   "llava_1k_en": {
     "hf_hub_url": "BUAADreamer/llava-en-zh-2k",
     "subset": "en",

From 344d1192acfbfb2aba7cbb8bbe45900546618e20 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 13 Jun 2024 01:58:16 +0800
Subject: [PATCH 137/162] clean code

Former-commit-id: f54cafd5c7f0383370d1a2f357834a61a97397ce
---
 src/llamafactory/chat/vllm_engine.py          | 12 +++++-----
 src/llamafactory/extras/packages.py           | 22 +++++--------------
 .../model/model_utils/attention.py            |  7 +++---
 src/llamafactory/train/sft/metric.py          |  3 ++-
 4 files changed, 17 insertions(+), 27 deletions(-)

diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py
index 34126adf..e4c05478 100644
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -1,12 +1,10 @@
 import uuid
 from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union
 
-from packaging import version
-
 from ..data import get_template_and_fix_tokenizer
 from ..extras.logging import get_logger
 from ..extras.misc import get_device_count
-from ..extras.packages import is_vllm_available, _get_package_version
+from ..extras.packages import is_vllm_available, is_vllm_version_greater_than_0_5
 from ..model import load_config, load_tokenizer
 from ..model.model_utils.visual import LlavaMultiModalProjectorForYiVLForVLLM
 from .base_engine import BaseEngine, Response
@@ -16,7 +14,7 @@ if is_vllm_available():
     from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams
     from vllm.lora.request import LoRARequest
 
-    if _get_package_version("vllm") >= version.parse("0.5.0"):
+    if is_vllm_version_greater_than_0_5():
         from vllm.multimodal.image import ImagePixelData
     else:
         from vllm.sequence import MultiModalData
@@ -112,9 +110,9 @@ class VllmEngine(BaseEngine):
         if self.processor is not None and image is not None:  # add image features
             image_processor: "BaseImageProcessor" = getattr(self.processor, "image_processor")
             pixel_values = image_processor(image, return_tensors="pt")["pixel_values"]
-            if _get_package_version("vllm") >= version.parse("0.5.0"):
-                multi_modal_data = ImagePixelData(pixel_values)
-            else:
+            if is_vllm_version_greater_than_0_5():
+                multi_modal_data = ImagePixelData(image=pixel_values)
+            else:  # TODO: remove vllm 0.4.3 support
                 multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values)
         else:
             multi_modal_data = None
diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py
index 4c9e6492..0746bb4f 100644
--- a/src/llamafactory/extras/packages.py
+++ b/src/llamafactory/extras/packages.py
@@ -1,5 +1,6 @@
 import importlib.metadata
 import importlib.util
+from functools import lru_cache
 from typing import TYPE_CHECKING
 
 from packaging import version
@@ -24,10 +25,6 @@ def is_fastapi_available():
     return _is_package_available("fastapi")
 
 
-def is_flash_attn2_available():
-    return _is_package_available("flash_attn") and _get_package_version("flash_attn") > version.parse("2.0.0")
-
-
 def is_galore_available():
     return _is_package_available("galore_torch")
 
@@ -36,18 +33,10 @@ def is_gradio_available():
     return _is_package_available("gradio")
 
 
-def is_jieba_available():
-    return _is_package_available("jieba")
-
-
 def is_matplotlib_available():
     return _is_package_available("matplotlib")
 
 
-def is_nltk_available():
-    return _is_package_available("nltk")
-
-
 def is_pillow_available():
     return _is_package_available("PIL")
 
@@ -60,10 +49,6 @@ def is_rouge_available():
     return _is_package_available("rouge_chinese")
 
 
-def is_sdpa_available():
-    return _get_package_version("torch") > version.parse("2.1.1")
-
-
 def is_starlette_available():
     return _is_package_available("sse_starlette")
 
@@ -74,3 +59,8 @@ def is_uvicorn_available():
 
 def is_vllm_available():
     return _is_package_available("vllm")
+
+
+@lru_cache
+def is_vllm_version_greater_than_0_5():
+    return _get_package_version("vllm") >= version.parse("0.5.0")
diff --git a/src/llamafactory/model/model_utils/attention.py b/src/llamafactory/model/model_utils/attention.py
index b52ddc86..2bd36fdc 100644
--- a/src/llamafactory/model/model_utils/attention.py
+++ b/src/llamafactory/model/model_utils/attention.py
@@ -1,7 +1,8 @@
 from typing import TYPE_CHECKING
 
+from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
+
 from ...extras.logging import get_logger
-from ...extras.packages import is_flash_attn2_available, is_sdpa_available
 
 
 if TYPE_CHECKING:
@@ -21,13 +22,13 @@ def configure_attn_implementation(config: "PretrainedConfig", model_args: "Model
         requested_attn_implementation = "eager"
 
     elif model_args.flash_attn == "sdpa":
-        if not is_sdpa_available():
+        if not is_torch_sdpa_available():
             logger.warning("torch>=2.1.1 is required for SDPA attention.")
             return
 
         requested_attn_implementation = "sdpa"
     elif model_args.flash_attn == "fa2":
-        if not is_flash_attn2_available():
+        if not is_flash_attn_2_available():
             logger.warning("FlashAttention-2 is not installed.")
             return
 
diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py
index b135fcfb..6ed356c1 100644
--- a/src/llamafactory/train/sft/metric.py
+++ b/src/llamafactory/train/sft/metric.py
@@ -2,9 +2,10 @@ from dataclasses import dataclass
 from typing import TYPE_CHECKING, Dict, Sequence, Tuple, Union
 
 import numpy as np
+from transformers.utils import is_jieba_available, is_nltk_available
 
 from ...extras.constants import IGNORE_INDEX
-from ...extras.packages import is_jieba_available, is_nltk_available, is_rouge_available
+from ...extras.packages import is_rouge_available
 
 
 if TYPE_CHECKING:

From 045cef901ea3fb1516f242ae0b9faac9a99185df Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 13 Jun 2024 02:25:50 +0800
Subject: [PATCH 138/162] fix #4209

DeepSpeed ZeRO3 has inflight param error when calling model.eval()


Former-commit-id: 4be013f18ea6a35b5a11db98db5f0670ffb41619
---
 src/llamafactory/train/dpo/trainer.py   |  7 +++++--
 src/llamafactory/train/kto/trainer.py   |  7 +++++--
 src/llamafactory/train/ppo/trainer.py   |  2 ++
 src/llamafactory/train/trainer_utils.py | 13 -------------
 4 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py
index d860b29a..5bdb9c43 100644
--- a/src/llamafactory/train/dpo/trainer.py
+++ b/src/llamafactory/train/dpo/trainer.py
@@ -1,3 +1,4 @@
+import warnings
 from collections import defaultdict
 from contextlib import nullcontext
 from types import MethodType
@@ -10,7 +11,7 @@ from trl import DPOTrainer
 from trl.trainer import disable_dropout_in_model
 
 from ...extras.constants import IGNORE_INDEX
-from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps, get_ref_context
+from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps
 
 
 if TYPE_CHECKING:
@@ -61,6 +62,8 @@ class CustomDPOTrainer(DPOTrainer):
         if not hasattr(self, "accelerator"):
             raise AttributeError("Please update `transformers`.")
 
+        warnings.simplefilter("ignore")  # remove gc warnings on ref model
+
         if ref_model is not None:
             if self.is_deepspeed_enabled:
                 if not (
@@ -176,7 +179,7 @@ class CustomDPOTrainer(DPOTrainer):
 
         if self.ref_model is None:
             ref_model = model
-            ref_context = get_ref_context(self.accelerator, model)
+            ref_context = self.accelerator.unwrap_model(model).disable_adapter()
         else:
             ref_model = self.ref_model
             ref_context = nullcontext()
diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py
index 22a84e4a..3b4488fc 100644
--- a/src/llamafactory/train/kto/trainer.py
+++ b/src/llamafactory/train/kto/trainer.py
@@ -1,3 +1,4 @@
+import warnings
 from collections import defaultdict
 from contextlib import nullcontext
 from types import MethodType
@@ -9,7 +10,7 @@ from trl import KTOTrainer
 from trl.trainer import disable_dropout_in_model
 
 from ...extras.constants import IGNORE_INDEX
-from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps, get_ref_context
+from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps
 
 
 if TYPE_CHECKING:
@@ -60,6 +61,8 @@ class CustomKTOTrainer(KTOTrainer):
         if not hasattr(self, "accelerator"):
             raise AttributeError("Please update `transformers`.")
 
+        warnings.simplefilter("ignore")  # remove gc warnings on ref model
+
         if ref_model is not None:
             if self.is_deepspeed_enabled:
                 if not (
@@ -143,7 +146,7 @@ class CustomKTOTrainer(KTOTrainer):
         """
         if self.ref_model is None:
             ref_model = model
-            ref_context = get_ref_context(self.accelerator, model)
+            ref_context = self.accelerator.unwrap_model(model).disable_adapter()
         else:
             ref_model = self.ref_model
             ref_context = nullcontext()
diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py
index 2e1288e4..737c45a3 100644
--- a/src/llamafactory/train/ppo/trainer.py
+++ b/src/llamafactory/train/ppo/trainer.py
@@ -1,6 +1,7 @@
 import math
 import os
 import sys
+import warnings
 from types import MethodType
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
 
@@ -136,6 +137,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
 
         device_type = unwrapped_model.pretrained_model.device.type
         self.amp_context = torch.autocast(device_type, dtype=model_args.compute_dtype)
+        warnings.simplefilter("ignore")  # remove gc warnings on ref model
 
         if finetuning_args.reward_model_type == "full":
             if self.is_deepspeed_enabled:
diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py
index 7e9cc881..48944a63 100644
--- a/src/llamafactory/train/trainer_utils.py
+++ b/src/llamafactory/train/trainer_utils.py
@@ -1,4 +1,3 @@
-from contextlib import contextmanager
 from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
@@ -19,7 +18,6 @@ if is_galore_available():
 
 
 if TYPE_CHECKING:
-    from accelerate import Accelerator
     from transformers import PreTrainedModel, Seq2SeqTrainingArguments
     from trl import AutoModelForCausalLMWithValueHead
 
@@ -154,17 +152,6 @@ def create_reward_model(
         return reward_model
 
 
-@contextmanager
-def get_ref_context(accelerator: "Accelerator", model: "PreTrainedModel"):
-    r"""
-    Gets adapter context for the reference model.
-    """
-    with accelerator.unwrap_model(model).disable_adapter():
-        model.eval()
-        yield
-        model.train()
-
-
 def _get_decay_parameter_names(model: "PreTrainedModel") -> List[str]:
     r"""
     Returns a list of names of parameters with weight decay. (weights in non-layernorm layers)

From e8885443a9edae3452c9fd26cfc0e1feef1c2fe5 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 13 Jun 2024 02:48:21 +0800
Subject: [PATCH 139/162] fix #4221

Former-commit-id: 05a3be4853b941909e7d193c31e8d62c8c5f879b
---
 src/llamafactory/data/aligner.py                 |  8 ++++++--
 src/llamafactory/data/loader.py                  | 11 ++++++-----
 src/llamafactory/data/preprocess.py              |  3 +--
 src/llamafactory/data/processors/feedback.py     |  3 +--
 src/llamafactory/data/processors/pairwise.py     |  3 +--
 src/llamafactory/data/processors/pretrain.py     |  2 +-
 src/llamafactory/data/processors/supervised.py   |  3 +--
 src/llamafactory/data/processors/unsupervised.py |  3 +--
 src/llamafactory/train/sft/metric.py             |  2 +-
 9 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/llamafactory/data/aligner.py b/src/llamafactory/data/aligner.py
index 434956af..3e9d5c46 100644
--- a/src/llamafactory/data/aligner.py
+++ b/src/llamafactory/data/aligner.py
@@ -10,6 +10,7 @@ from .data_utils import Role
 
 if TYPE_CHECKING:
     from datasets import Dataset, IterableDataset
+    from transformers import Seq2SeqTrainingArguments
 
     from ..hparams import DataArguments
     from .parser import DatasetAttr
@@ -175,7 +176,10 @@ def convert_sharegpt(
 
 
 def align_dataset(
-    dataset: Union["Dataset", "IterableDataset"], dataset_attr: "DatasetAttr", data_args: "DataArguments"
+    dataset: Union["Dataset", "IterableDataset"],
+    dataset_attr: "DatasetAttr",
+    data_args: "DataArguments",
+    training_args: "Seq2SeqTrainingArguments",
 ) -> Union["Dataset", "IterableDataset"]:
     r"""
     Aligned dataset:
@@ -208,7 +212,7 @@ def align_dataset(
     if not data_args.streaming:
         kwargs = dict(
             num_proc=data_args.preprocessing_num_workers,
-            load_from_cache_file=(not data_args.overwrite_cache),
+            load_from_cache_file=(not data_args.overwrite_cache) or (training_args.local_process_index != 0),
             desc="Converting format of dataset",
         )
 
diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py
index 2c236c76..ba426f81 100644
--- a/src/llamafactory/data/loader.py
+++ b/src/llamafactory/data/loader.py
@@ -18,8 +18,7 @@ from .template import get_template_and_fix_tokenizer
 
 if TYPE_CHECKING:
     from datasets import Dataset, IterableDataset
-    from transformers import ProcessorMixin, Seq2SeqTrainingArguments
-    from transformers.tokenization_utils import PreTrainedTokenizer
+    from transformers import PreTrainedTokenizer, ProcessorMixin, Seq2SeqTrainingArguments
 
     from ..hparams import DataArguments, ModelArguments
     from .parser import DatasetAttr
@@ -32,6 +31,7 @@ def load_single_dataset(
     dataset_attr: "DatasetAttr",
     model_args: "ModelArguments",
     data_args: "DataArguments",
+    training_args: "Seq2SeqTrainingArguments",
 ) -> Union["Dataset", "IterableDataset"]:
     logger.info("Loading dataset {}...".format(dataset_attr))
     data_path, data_name, data_dir, data_files = None, None, None, None
@@ -123,7 +123,7 @@ def load_single_dataset(
         max_samples = min(data_args.max_samples, len(dataset))
         dataset = dataset.select(range(max_samples))
 
-    return align_dataset(dataset, dataset_attr, data_args)
+    return align_dataset(dataset, dataset_attr, data_args, training_args)
 
 
 def get_dataset(
@@ -157,7 +157,8 @@ def get_dataset(
             if (stage == "rm" and dataset_attr.ranking is False) or (stage != "rm" and dataset_attr.ranking is True):
                 raise ValueError("The dataset is not applicable in the current training stage.")
 
-            all_datasets.append(load_single_dataset(dataset_attr, model_args, data_args))
+            all_datasets.append(load_single_dataset(dataset_attr, model_args, data_args, training_args))
+
         dataset = merge_dataset(all_datasets, data_args, training_args)
 
     with training_args.main_process_first(desc="pre-process dataset"):
@@ -169,7 +170,7 @@ def get_dataset(
         if not data_args.streaming:
             kwargs = dict(
                 num_proc=data_args.preprocessing_num_workers,
-                load_from_cache_file=(not data_args.overwrite_cache),
+                load_from_cache_file=(not data_args.overwrite_cache) or (training_args.local_process_index != 0),
                 desc="Running tokenizer on dataset",
             )
 
diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py
index 97789c39..875f55d6 100644
--- a/src/llamafactory/data/preprocess.py
+++ b/src/llamafactory/data/preprocess.py
@@ -13,8 +13,7 @@ from .processors.unsupervised import preprocess_unsupervised_dataset, print_unsu
 
 
 if TYPE_CHECKING:
-    from transformers import ProcessorMixin, Seq2SeqTrainingArguments
-    from transformers.tokenization_utils import PreTrainedTokenizer
+    from transformers import PreTrainedTokenizer, ProcessorMixin, Seq2SeqTrainingArguments
 
     from ..hparams import DataArguments
     from .template import Template
diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py
index 98d83658..5fba452c 100644
--- a/src/llamafactory/data/processors/feedback.py
+++ b/src/llamafactory/data/processors/feedback.py
@@ -6,8 +6,7 @@ from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
 
 
 if TYPE_CHECKING:
-    from transformers import ProcessorMixin
-    from transformers.tokenization_utils import PreTrainedTokenizer
+    from transformers import PreTrainedTokenizer, ProcessorMixin
 
     from ...hparams import DataArguments
     from ..template import Template
diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py
index fe984efa..db52c6a7 100644
--- a/src/llamafactory/data/processors/pairwise.py
+++ b/src/llamafactory/data/processors/pairwise.py
@@ -6,8 +6,7 @@ from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
 
 
 if TYPE_CHECKING:
-    from transformers import ProcessorMixin
-    from transformers.tokenization_utils import PreTrainedTokenizer
+    from transformers import PreTrainedTokenizer, ProcessorMixin
 
     from ...hparams import DataArguments
     from ..template import Template
diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py
index 832c987e..a10ccabd 100644
--- a/src/llamafactory/data/processors/pretrain.py
+++ b/src/llamafactory/data/processors/pretrain.py
@@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Any, Dict, List
 
 
 if TYPE_CHECKING:
-    from transformers.tokenization_utils import PreTrainedTokenizer
+    from transformers import PreTrainedTokenizer
 
     from ...hparams import DataArguments
 
diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py
index 19d60280..f59f5371 100644
--- a/src/llamafactory/data/processors/supervised.py
+++ b/src/llamafactory/data/processors/supervised.py
@@ -7,8 +7,7 @@ from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, gre
 
 
 if TYPE_CHECKING:
-    from transformers import ProcessorMixin
-    from transformers.tokenization_utils import PreTrainedTokenizer
+    from transformers import PreTrainedTokenizer, ProcessorMixin
 
     from ...hparams import DataArguments
     from ..template import Template
diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py
index f711eeac..38497a15 100644
--- a/src/llamafactory/data/processors/unsupervised.py
+++ b/src/llamafactory/data/processors/unsupervised.py
@@ -6,8 +6,7 @@ from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
 
 
 if TYPE_CHECKING:
-    from transformers import ProcessorMixin
-    from transformers.tokenization_utils import PreTrainedTokenizer
+    from transformers import PreTrainedTokenizer, ProcessorMixin
 
     from ...hparams import DataArguments
     from ..template import Template
diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py
index 6ed356c1..923238d6 100644
--- a/src/llamafactory/train/sft/metric.py
+++ b/src/llamafactory/train/sft/metric.py
@@ -9,7 +9,7 @@ from ...extras.packages import is_rouge_available
 
 
 if TYPE_CHECKING:
-    from transformers.tokenization_utils import PreTrainedTokenizer
+    from transformers import PreTrainedTokenizer
 
 
 if is_jieba_available():

From 554c84f8d3af480336deb0b3649fde8cb5da766f Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 13 Jun 2024 03:15:06 +0800
Subject: [PATCH 140/162] update examples

Former-commit-id: 19681f93db399d695aa8e35f8ec2a9e720875baa
---
 README.md                                     |   2 +-
 README_zh.md                                  |   2 +-
 examples/README.md                            | 126 ++++++++---------
 examples/README_zh.md                         | 128 ++++++++----------
 .../extras/fsdp_qlora/llama3_lora_sft.yaml    |   4 +-
 .../extras/llama_pro/llama3_freeze_sft.yaml   |   1 +
 examples/extras/loraplus/llama3_lora_sft.yaml |   1 +
 examples/extras/mod/llama3_full_sft.yaml      |   1 +
 examples/lora_multi_gpu/llama3_lora_sft.yaml  |  41 ------
 .../llama3_full_predict.yaml                  |   0
 .../llama3_full_sft_ds3.yaml}                 |   0
 .../llama3_lora_dpo.yaml                      |   1 +
 .../llama3_lora_eval.yaml                     |   0
 .../llama3_lora_kto.yaml                      |   2 +
 .../llama3_lora_ppo.yaml                      |   1 +
 .../llama3_lora_predict.yaml                  |   1 +
 .../llama3_lora_pretrain.yaml                 |   1 +
 .../llama3_lora_reward.yaml                   |   1 +
 .../llama3_lora_sft.yaml                      |   1 +
 .../llama3_lora_sft_ds0.yaml}                 |   4 +-
 .../llama3_lora_sft_ds3.yaml}                 |   4 +-
 .../llama3_preprocess.yaml                    |   0
 .../llava1_5_lora_sft.yaml                    |   1 +
 .../llama3_lora_sft_aqlm.yaml                 |   1 +
 .../llama3_lora_sft_awq.yaml                  |   1 +
 .../llama3_lora_sft_bitsandbytes.yaml         |   1 +
 .../llama3_lora_sft_gptq.yaml                 |   1 +
 27 files changed, 128 insertions(+), 199 deletions(-)
 delete mode 100644 examples/lora_multi_gpu/llama3_lora_sft.yaml
 rename examples/{full_multi_gpu => train_full}/llama3_full_predict.yaml (100%)
 rename examples/{full_multi_gpu/llama3_full_sft.yaml => train_full/llama3_full_sft_ds3.yaml} (100%)
 rename examples/{lora_single_gpu => train_lora}/llama3_lora_dpo.yaml (96%)
 rename examples/{lora_single_gpu => train_lora}/llama3_lora_eval.yaml (100%)
 rename examples/{lora_single_gpu => train_lora}/llama3_lora_kto.yaml (94%)
 rename examples/{lora_single_gpu => train_lora}/llama3_lora_ppo.yaml (96%)
 rename examples/{lora_single_gpu => train_lora}/llama3_lora_predict.yaml (95%)
 rename examples/{lora_single_gpu => train_lora}/llama3_lora_pretrain.yaml (96%)
 rename examples/{lora_single_gpu => train_lora}/llama3_lora_reward.yaml (96%)
 rename examples/{lora_single_gpu => train_lora}/llama3_lora_sft.yaml (96%)
 rename examples/{lora_multi_npu/llama3_lora_sft_ds.yaml => train_lora/llama3_lora_sft_ds0.yaml} (98%)
 rename examples/{lora_multi_gpu/llama3_lora_sft_ds.yaml => train_lora/llama3_lora_sft_ds3.yaml} (98%)
 rename examples/{lora_single_gpu => train_lora}/llama3_preprocess.yaml (100%)
 rename examples/{lora_single_gpu => train_lora}/llava1_5_lora_sft.yaml (96%)
 rename examples/{qlora_single_gpu => train_qlora}/llama3_lora_sft_aqlm.yaml (96%)
 rename examples/{qlora_single_gpu => train_qlora}/llama3_lora_sft_awq.yaml (96%)
 rename examples/{qlora_single_gpu => train_qlora}/llama3_lora_sft_bitsandbytes.yaml (96%)
 rename examples/{qlora_single_gpu => train_qlora}/llama3_lora_sft_gptq.yaml (96%)

diff --git a/README.md b/README.md
index 5bbaf2d7..5dd10d5a 100644
--- a/README.md
+++ b/README.md
@@ -406,7 +406,7 @@ Please refer to [data/README.md](data/README.md) for checking the details about
 Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.
 
 ```bash
-llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
 ```
diff --git a/README_zh.md b/README_zh.md
index fb616909..76bd2d89 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -406,7 +406,7 @@ Docker 镜像：
 下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
 
 ```bash
-llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
 ```
diff --git a/examples/README.md b/examples/README.md
index f985d552..3372afb9 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -4,59 +4,57 @@ Make sure to execute these commands in the `LLaMA-Factory` directory.
 
 ## Table of Contents
 
-- [LoRA Fine-Tuning on A Single GPU](#lora-fine-tuning-on-a-single-gpu)
-- [QLoRA Fine-Tuning on a Single GPU](#qlora-fine-tuning-on-a-single-gpu)
-- [LoRA Fine-Tuning on Multiple GPUs](#lora-fine-tuning-on-multiple-gpus)
-- [LoRA Fine-Tuning on Multiple NPUs](#lora-fine-tuning-on-multiple-npus)
-- [Full-Parameter Fine-Tuning on Multiple GPUs](#full-parameter-fine-tuning-on-multiple-gpus)
+- [LoRA Fine-Tuning](#lora-fine-tuning)
+- [QLoRA Fine-Tuning](#qlora-fine-tuning)
+- [Full-Parameter Fine-Tuning](#full-parameter-fine-tuning)
 - [Merging LoRA Adapters and Quantization](#merging-lora-adapters-and-quantization)
 - [Inferring LoRA Fine-Tuned Models](#inferring-lora-fine-tuned-models)
 - [Extras](#extras)
 
 ## Examples
 
-### LoRA Fine-Tuning on A Single GPU
+### LoRA Fine-Tuning
 
 #### (Continuous) Pre-Training
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_pretrain.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml
 ```
 
 #### Supervised Fine-Tuning
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
 ```
 
 #### Multimodal Supervised Fine-Tuning
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
+llamafactory-cli train examples/train_lora/llava1_5_lora_sft.yaml
 ```
 
 #### Reward Modeling
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_reward.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml
 ```
 
 #### PPO Training
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_ppo.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_ppo.yaml
 ```
 
 #### DPO/ORPO/SimPO Training
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_dpo.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml
 ```
 
 #### KTO Training
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_kto.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml
 ```
 
 #### Preprocess Dataset
@@ -64,95 +62,79 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo
 It is useful for large dataset, use `tokenized_path` in config to load the preprocessed dataset.
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_preprocess.yaml
+llamafactory-cli train examples/train_lora/llama3_preprocess.yaml
 ```
 
 #### Evaluating on MMLU/CMMLU/C-Eval Benchmarks
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli eval examples/lora_single_gpu/llama3_lora_eval.yaml
+llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml
 ```
 
 #### Batch Predicting and Computing BLEU and ROUGE Scores
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_predict.yaml
-```
-
-### QLoRA Fine-Tuning on a Single GPU
-
-#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes Quantization (Recommended)
-
-```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
-```
-
-#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization
-
-```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
-```
-
-#### Supervised Fine-Tuning with 4-bit AWQ Quantization
-
-```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
-```
-
-#### Supervised Fine-Tuning with 2-bit AQLM Quantization
-
-```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
-```
-
-### LoRA Fine-Tuning on Multiple GPUs
-
-#### Supervised Fine-Tuning on Single Node
-
-```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
 ```
 
 #### Supervised Fine-Tuning on Multiple Nodes
 
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
 ```
 
 #### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding)
 
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
+FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds.yaml
 ```
 
-### LoRA Fine-Tuning on Multiple NPUs
+### QLoRA Fine-Tuning
 
-#### Supervised Fine-Tuning with DeepSpeed ZeRO-0
+#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes Quantization (Recommended)
 
 ```bash
-ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml
+CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml
 ```
 
-### Full-Parameter Fine-Tuning on Multiple GPUs
+#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization
+
+```bash
+CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml
+```
+
+#### Supervised Fine-Tuning with 4-bit AWQ Quantization
+
+```bash
+CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml
+```
+
+#### Supervised Fine-Tuning with 2-bit AQLM Quantization
+
+```bash
+CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
+```
+
+### Full-Parameter Fine-Tuning
 
 #### Supervised Fine-Tuning on Single Node
 
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
+FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
 ```
 
 #### Supervised Fine-Tuning on Multiple Nodes
 
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
-CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
 ```
 
 #### Batch Predicting and Computing BLEU and ROUGE Scores
 
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_predict.yaml
+llamafactory-cli train examples/train_full/llama3_full_predict.yaml
 ```
 
 ### Merging LoRA Adapters and Quantization
@@ -162,35 +144,33 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llam
 Note: DO NOT use quantized model or `quantization_bit` when merging LoRA adapters.
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
+llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
 ```
 
 #### Quantizing Model using AutoGPTQ
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
+llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
 ```
 
 ### Inferring LoRA Fine-Tuned Models
 
-Use `CUDA_VISIBLE_DEVICES=0,1` to infer models on multiple devices.
-
 #### Use CLI
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
 ```
 
 #### Use Web UI
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
 ```
 
 #### Launch OpenAI-style API
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.yaml
+llamafactory-cli api examples/inference/llama3_lora_sft.yaml
 ```
 
 ### Extras
@@ -198,32 +178,32 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.y
 #### Full-Parameter Fine-Tuning using GaLore
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
+llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
 ```
 
 #### Full-Parameter Fine-Tuning using BAdam
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
+llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
 ```
 
 #### LoRA+ Fine-Tuning
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml
+llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml
 ```
 
 #### Mixture-of-Depths Fine-Tuning
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml
+llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml
 ```
 
 #### LLaMA-Pro Fine-Tuning
 
 ```bash
 bash examples/extras/llama_pro/expand.sh
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
+llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
 ```
 
 #### FSDP+QLoRA Fine-Tuning
diff --git a/examples/README_zh.md b/examples/README_zh.md
index cf5bbf49..64c31fbd 100644
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@@ -4,59 +4,57 @@
 
 ## 目录
 
-- [单 GPU LoRA 微调](#单-gpu-lora-微调)
-- [单 GPU QLoRA 微调](#单-gpu-qlora-微调)
-- [多 GPU LoRA 微调](#多-gpu-lora-微调)
-- [多 NPU LoRA 微调](#多-npu-lora-微调)
-- [多 GPU 全参数微调](#多-gpu-全参数微调)
+- [LoRA 微调](#lora-微调)
+- [QLoRA 微调](#qlora-微调)
+- [全参数微调](#全参数微调)
 - [合并 LoRA 适配器与模型量化](#合并-lora-适配器与模型量化)
 - [推理 LoRA 模型](#推理-lora-模型)
 - [杂项](#杂项)
 
 ## 示例
 
-### 单 GPU LoRA 微调
+### LoRA 微调
 
 #### （增量）预训练
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_pretrain.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml
 ```
 
 #### 指令监督微调
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
 ```
 
 #### 多模态指令监督微调
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
+llamafactory-cli train examples/train_lora/llava1_5_lora_sft.yaml
 ```
 
 #### 奖励模型训练
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_reward.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml
 ```
 
 #### PPO 训练
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_ppo.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_ppo.yaml
 ```
 
 #### DPO/ORPO/SimPO 训练
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_dpo.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml
 ```
 
 #### KTO 训练
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_kto.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml
 ```
 
 #### 预处理数据集
@@ -64,95 +62,79 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo
 对于大数据集有帮助，在配置中使用 `tokenized_path` 以加载预处理后的数据集。
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_preprocess.yaml
+llamafactory-cli train examples/train_lora/llama3_preprocess.yaml
 ```
 
 #### 在 MMLU/CMMLU/C-Eval 上评估
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli eval examples/lora_single_gpu/llama3_lora_eval.yaml
+llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml
 ```
 
 #### 批量预测并计算 BLEU 和 ROUGE 分数
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_predict.yaml
+llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
 ```
 
-### 单 GPU QLoRA 微调
-
-#### 基于 4/8 比特 Bitsandbytes 量化进行指令监督微调（推荐）
+#### 多机指令监督微调
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
-```
-
-#### 基于 4/8 比特 GPTQ 量化进行指令监督微调
-
-```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
-```
-
-#### 基于 4 比特 AWQ 量化进行指令监督微调
-
-```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
-```
-
-#### 基于 2 比特 AQLM 量化进行指令监督微调
-
-```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
-```
-
-### 多 GPU LoRA 微调
-
-#### 在单机上进行指令监督微调
-
-```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
-```
-
-#### 在多机上进行指令监督微调
-
-```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
 ```
 
 #### 使用 DeepSpeed ZeRO-3 平均分配显存
 
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
+FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds.yaml
 ```
 
-### 多 NPU LoRA 微调
+### QLoRA 微调
 
-#### 使用 DeepSpeed ZeRO-0 进行指令监督微调
+#### 基于 4/8 比特 Bitsandbytes 量化进行指令监督微调（推荐）
 
 ```bash
-ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml
+llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml
 ```
 
-### 多 GPU 全参数微调
+#### 基于 4/8 比特 GPTQ 量化进行指令监督微调
+
+```bash
+llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml
+```
+
+#### 基于 4 比特 AWQ 量化进行指令监督微调
+
+```bash
+llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml
+```
+
+#### 基于 2 比特 AQLM 量化进行指令监督微调
+
+```bash
+llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
+```
+
+### 全参数微调
 
 #### 在单机上进行指令监督微调
 
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
+FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
 ```
 
 #### 在多机上进行指令监督微调
 
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
-CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
 ```
 
 #### 批量预测并计算 BLEU 和 ROUGE 分数
 
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_predict.yaml
+llamafactory-cli train examples/train_full/llama3_full_predict.yaml
 ```
 
 ### 合并 LoRA 适配器与模型量化
@@ -162,35 +144,33 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llam
 注：请勿使用量化后的模型或 `quantization_bit` 参数来合并 LoRA 适配器。
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
+llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
 ```
 
 #### 使用 AutoGPTQ 量化模型
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
+llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
 ```
 
 ### 推理 LoRA 模型
 
-使用 `CUDA_VISIBLE_DEVICES=0,1` 进行多卡推理。
-
 #### 使用命令行接口
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
 ```
 
 #### 使用浏览器界面
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
 ```
 
 #### 启动 OpenAI 风格 API
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.yaml
+llamafactory-cli api examples/inference/llama3_lora_sft.yaml
 ```
 
 ### 杂项
@@ -198,32 +178,32 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.y
 #### 使用 GaLore 进行全参数训练
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
+llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
 ```
 
 #### 使用 BAdam 进行全参数训练
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
+llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
 ```
 
 #### LoRA+ 微调
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml
+llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml
 ```
 
 #### 深度混合微调
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml
+llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml
 ```
 
 #### LLaMA-Pro 微调
 
 ```bash
 bash examples/extras/llama_pro/expand.sh
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
+llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
 ```
 
 #### FSDP+QLoRA 微调
diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
index 084269ef..cc773991 100644
--- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
+++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
@@ -8,9 +8,6 @@ do_train: true
 finetuning_type: lora
 lora_target: all
 
-### ddp
-ddp_timeout: 180000000
-
 ### dataset
 dataset: identity,alpaca_en_demo
 template: llama3
@@ -34,6 +31,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/extras/llama_pro/llama3_freeze_sft.yaml b/examples/extras/llama_pro/llama3_freeze_sft.yaml
index 444a1113..f92d6945 100644
--- a/examples/extras/llama_pro/llama3_freeze_sft.yaml
+++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml
@@ -32,6 +32,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml
index 1ba654ec..57383ae0 100644
--- a/examples/extras/loraplus/llama3_lora_sft.yaml
+++ b/examples/extras/loraplus/llama3_lora_sft.yaml
@@ -31,6 +31,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/extras/mod/llama3_full_sft.yaml b/examples/extras/mod/llama3_full_sft.yaml
index df03c1e0..085febfc 100644
--- a/examples/extras/mod/llama3_full_sft.yaml
+++ b/examples/extras/mod/llama3_full_sft.yaml
@@ -31,6 +31,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 pure_bf16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/lora_multi_gpu/llama3_lora_sft.yaml b/examples/lora_multi_gpu/llama3_lora_sft.yaml
deleted file mode 100644
index 348e53b9..00000000
--- a/examples/lora_multi_gpu/llama3_lora_sft.yaml
+++ /dev/null
@@ -1,41 +0,0 @@
-### model
-model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
-
-### method
-stage: sft
-do_train: true
-finetuning_type: lora
-lora_target: all
-
-### ddp
-ddp_timeout: 180000000
-
-### dataset
-dataset: identity,alpaca_en_demo
-template: llama3
-cutoff_len: 1024
-max_samples: 1000
-overwrite_cache: true
-preprocessing_num_workers: 16
-
-### output
-output_dir: saves/llama3-8b/lora/sft
-logging_steps: 10
-save_steps: 500
-plot_loss: true
-overwrite_output_dir: true
-
-### train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 2
-learning_rate: 1.0e-4
-num_train_epochs: 3.0
-lr_scheduler_type: cosine
-warmup_ratio: 0.1
-fp16: true
-
-### eval
-val_size: 0.1
-per_device_eval_batch_size: 1
-eval_strategy: steps
-eval_steps: 500
diff --git a/examples/full_multi_gpu/llama3_full_predict.yaml b/examples/train_full/llama3_full_predict.yaml
similarity index 100%
rename from examples/full_multi_gpu/llama3_full_predict.yaml
rename to examples/train_full/llama3_full_predict.yaml
diff --git a/examples/full_multi_gpu/llama3_full_sft.yaml b/examples/train_full/llama3_full_sft_ds3.yaml
similarity index 100%
rename from examples/full_multi_gpu/llama3_full_sft.yaml
rename to examples/train_full/llama3_full_sft_ds3.yaml
diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/train_lora/llama3_lora_dpo.yaml
similarity index 96%
rename from examples/lora_single_gpu/llama3_lora_dpo.yaml
rename to examples/train_lora/llama3_lora_dpo.yaml
index 78344330..db25fb51 100644
--- a/examples/lora_single_gpu/llama3_lora_dpo.yaml
+++ b/examples/train_lora/llama3_lora_dpo.yaml
@@ -32,6 +32,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/lora_single_gpu/llama3_lora_eval.yaml b/examples/train_lora/llama3_lora_eval.yaml
similarity index 100%
rename from examples/lora_single_gpu/llama3_lora_eval.yaml
rename to examples/train_lora/llama3_lora_eval.yaml
diff --git a/examples/lora_single_gpu/llama3_lora_kto.yaml b/examples/train_lora/llama3_lora_kto.yaml
similarity index 94%
rename from examples/lora_single_gpu/llama3_lora_kto.yaml
rename to examples/train_lora/llama3_lora_kto.yaml
index d5234c0a..f730c82e 100644
--- a/examples/lora_single_gpu/llama3_lora_kto.yaml
+++ b/examples/train_lora/llama3_lora_kto.yaml
@@ -6,6 +6,7 @@ stage: kto
 do_train: true
 finetuning_type: lora
 lora_target: all
+pref_beta: 0.1
 
 ### dataset
 dataset: kto_en_demo
@@ -30,6 +31,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/lora_single_gpu/llama3_lora_ppo.yaml b/examples/train_lora/llama3_lora_ppo.yaml
similarity index 96%
rename from examples/lora_single_gpu/llama3_lora_ppo.yaml
rename to examples/train_lora/llama3_lora_ppo.yaml
index 98c842f9..e574014e 100644
--- a/examples/lora_single_gpu/llama3_lora_ppo.yaml
+++ b/examples/train_lora/llama3_lora_ppo.yaml
@@ -31,6 +31,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### generate
 max_new_tokens: 512
diff --git a/examples/lora_single_gpu/llama3_lora_predict.yaml b/examples/train_lora/llama3_lora_predict.yaml
similarity index 95%
rename from examples/lora_single_gpu/llama3_lora_predict.yaml
rename to examples/train_lora/llama3_lora_predict.yaml
index a127d248..148c8635 100644
--- a/examples/lora_single_gpu/llama3_lora_predict.yaml
+++ b/examples/train_lora/llama3_lora_predict.yaml
@@ -22,3 +22,4 @@ overwrite_output_dir: true
 ### eval
 per_device_eval_batch_size: 1
 predict_with_generate: true
+ddp_timeout: 180000000
diff --git a/examples/lora_single_gpu/llama3_lora_pretrain.yaml b/examples/train_lora/llama3_lora_pretrain.yaml
similarity index 96%
rename from examples/lora_single_gpu/llama3_lora_pretrain.yaml
rename to examples/train_lora/llama3_lora_pretrain.yaml
index db435ca9..839b3e51 100644
--- a/examples/lora_single_gpu/llama3_lora_pretrain.yaml
+++ b/examples/train_lora/llama3_lora_pretrain.yaml
@@ -29,6 +29,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/lora_single_gpu/llama3_lora_reward.yaml b/examples/train_lora/llama3_lora_reward.yaml
similarity index 96%
rename from examples/lora_single_gpu/llama3_lora_reward.yaml
rename to examples/train_lora/llama3_lora_reward.yaml
index 1ce42ea4..79559d19 100644
--- a/examples/lora_single_gpu/llama3_lora_reward.yaml
+++ b/examples/train_lora/llama3_lora_reward.yaml
@@ -30,6 +30,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/lora_single_gpu/llama3_lora_sft.yaml b/examples/train_lora/llama3_lora_sft.yaml
similarity index 96%
rename from examples/lora_single_gpu/llama3_lora_sft.yaml
rename to examples/train_lora/llama3_lora_sft.yaml
index 651b636f..fe30c575 100644
--- a/examples/lora_single_gpu/llama3_lora_sft.yaml
+++ b/examples/train_lora/llama3_lora_sft.yaml
@@ -30,6 +30,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml b/examples/train_lora/llama3_lora_sft_ds0.yaml
similarity index 98%
rename from examples/lora_multi_npu/llama3_lora_sft_ds.yaml
rename to examples/train_lora/llama3_lora_sft_ds0.yaml
index a0ec8aa1..08b638e6 100644
--- a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml
+++ b/examples/train_lora/llama3_lora_sft_ds0.yaml
@@ -6,9 +6,6 @@ stage: sft
 do_train: true
 finetuning_type: lora
 lora_target: all
-
-### ddp
-ddp_timeout: 180000000
 deepspeed: examples/deepspeed/ds_z0_config.json
 
 ### dataset
@@ -34,6 +31,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml b/examples/train_lora/llama3_lora_sft_ds3.yaml
similarity index 98%
rename from examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
rename to examples/train_lora/llama3_lora_sft_ds3.yaml
index 1c432fa7..b7266d61 100644
--- a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
+++ b/examples/train_lora/llama3_lora_sft_ds3.yaml
@@ -6,9 +6,6 @@ stage: sft
 do_train: true
 finetuning_type: lora
 lora_target: all
-
-### ddp
-ddp_timeout: 180000000
 deepspeed: examples/deepspeed/ds_z3_config.json
 
 ### dataset
@@ -34,6 +31,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/lora_single_gpu/llama3_preprocess.yaml b/examples/train_lora/llama3_preprocess.yaml
similarity index 100%
rename from examples/lora_single_gpu/llama3_preprocess.yaml
rename to examples/train_lora/llama3_preprocess.yaml
diff --git a/examples/lora_single_gpu/llava1_5_lora_sft.yaml b/examples/train_lora/llava1_5_lora_sft.yaml
similarity index 96%
rename from examples/lora_single_gpu/llava1_5_lora_sft.yaml
rename to examples/train_lora/llava1_5_lora_sft.yaml
index df510a93..55ac31fa 100644
--- a/examples/lora_single_gpu/llava1_5_lora_sft.yaml
+++ b/examples/train_lora/llava1_5_lora_sft.yaml
@@ -31,6 +31,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml b/examples/train_qlora/llama3_lora_sft_aqlm.yaml
similarity index 96%
rename from examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
rename to examples/train_qlora/llama3_lora_sft_aqlm.yaml
index d54d6af6..7b6767d5 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
+++ b/examples/train_qlora/llama3_lora_sft_aqlm.yaml
@@ -30,6 +30,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml b/examples/train_qlora/llama3_lora_sft_awq.yaml
similarity index 96%
rename from examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
rename to examples/train_qlora/llama3_lora_sft_awq.yaml
index 5cef178a..a2a26e4b 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
+++ b/examples/train_qlora/llama3_lora_sft_awq.yaml
@@ -30,6 +30,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml b/examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml
similarity index 96%
rename from examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
rename to examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml
index b308dcab..cc773991 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
+++ b/examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml
@@ -31,6 +31,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml b/examples/train_qlora/llama3_lora_sft_gptq.yaml
similarity index 96%
rename from examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
rename to examples/train_qlora/llama3_lora_sft_gptq.yaml
index b950042e..ad3d854c 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
+++ b/examples/train_qlora/llama3_lora_sft_gptq.yaml
@@ -30,6 +30,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1

From e06568de988db0f6e44e663bbc8457e8aa0f0507 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 13 Jun 2024 03:16:20 +0800
Subject: [PATCH 141/162] Update llama3_full_sft_ds3.yaml

Former-commit-id: e715af62d521112d9c155cfa91fbb42fa0e77710
---
 examples/train_full/llama3_full_sft_ds3.yaml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/examples/train_full/llama3_full_sft_ds3.yaml b/examples/train_full/llama3_full_sft_ds3.yaml
index 40b62f24..40afd2ee 100644
--- a/examples/train_full/llama3_full_sft_ds3.yaml
+++ b/examples/train_full/llama3_full_sft_ds3.yaml
@@ -5,9 +5,6 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 stage: sft
 do_train: true
 finetuning_type: full
-
-### ddp
-ddp_timeout: 180000000
 deepspeed: examples/deepspeed/ds_z3_config.json
 
 ### dataset
@@ -33,6 +30,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 fp16: true
+ddp_timeout: 180000000
 
 ### eval
 val_size: 0.1

From 48d87a2decf4dd4d687ef0d2eec8b93109ad7d98 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 13 Jun 2024 03:19:18 +0800
Subject: [PATCH 142/162] add quant check in webui export tab

Former-commit-id: 6455ca07061ae9858cd7bc996b28be1fde697a3d
---
 src/llamafactory/webui/components/export.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py
index 7e1493c8..9d756a38 100644
--- a/src/llamafactory/webui/components/export.py
+++ b/src/llamafactory/webui/components/export.py
@@ -21,6 +21,13 @@ if TYPE_CHECKING:
 GPTQ_BITS = ["8", "4", "3", "2"]
 
 
+def can_quantize(checkpoint_path: Union[str, List[str]]) -> "gr.Dropdown":
+    if isinstance(checkpoint_path, list) and len(checkpoint_path) != 0:
+        return gr.Dropdown(value="none", interactive=False)
+    else:
+        return gr.Dropdown(interactive=True)
+
+
 def save_model(
     lang: str,
     model_name: str,
@@ -96,6 +103,9 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
         export_dir = gr.Textbox()
         export_hub_model_id = gr.Textbox()
 
+    checkpoint_path: gr.Dropdown = engine.manager.get_elem_by_id("top.checkpoint_path")
+    checkpoint_path.change(can_quantize, [checkpoint_path], [export_quantization_bit], queue=False)
+
     export_btn = gr.Button()
     info_box = gr.Textbox(show_label=False, interactive=False)
 

From db6e74884c633df80aedcd245a2dd08532ae37c3 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 13 Jun 2024 03:26:10 +0800
Subject: [PATCH 143/162] update examples

Former-commit-id: d6bf6231290d79eb3a63e711f18fa711ef18a4f6
---
 examples/README.md    | 10 ++++++----
 examples/README_zh.md | 10 ++++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/examples/README.md b/examples/README.md
index 3372afb9..180d5f7b 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -11,6 +11,8 @@ Make sure to execute these commands in the `LLaMA-Factory` directory.
 - [Inferring LoRA Fine-Tuned Models](#inferring-lora-fine-tuned-models)
 - [Extras](#extras)
 
+Use `CUDA_VISIBLE_DEVICES` (GPU) or `ASCEND_RT_VISIBLE_DEVICES` (NPU) to choose computing devices.
+
 ## Examples
 
 ### LoRA Fine-Tuning
@@ -87,7 +89,7 @@ FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llama
 #### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding)
 
 ```bash
-FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds.yaml
+FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml
 ```
 
 ### QLoRA Fine-Tuning
@@ -121,14 +123,14 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_s
 #### Supervised Fine-Tuning on Single Node
 
 ```bash
-FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
+FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
 ```
 
 #### Supervised Fine-Tuning on Multiple Nodes
 
 ```bash
-FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
-FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
+FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
 ```
 
 #### Batch Predicting and Computing BLEU and ROUGE Scores
diff --git a/examples/README_zh.md b/examples/README_zh.md
index 64c31fbd..b6168a95 100644
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@@ -11,6 +11,8 @@
 - [推理 LoRA 模型](#推理-lora-模型)
 - [杂项](#杂项)
 
+使用 `CUDA_VISIBLE_DEVICES`（GPU）或 `ASCEND_RT_VISIBLE_DEVICES`（NPU）选择计算设备。
+
 ## 示例
 
 ### LoRA 微调
@@ -87,7 +89,7 @@ FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llama
 #### 使用 DeepSpeed ZeRO-3 平均分配显存
 
 ```bash
-FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds.yaml
+FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml
 ```
 
 ### QLoRA 微调
@@ -121,14 +123,14 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
 #### 在单机上进行指令监督微调
 
 ```bash
-FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
+FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
 ```
 
 #### 在多机上进行指令监督微调
 
 ```bash
-FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
-FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
+FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
 ```
 
 #### 批量预测并计算 BLEU 和 ROUGE 分数

From 6306f3e2ead0a80b75997e29bf265710a67c2fd2 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Thu, 13 Jun 2024 16:02:21 +0800
Subject: [PATCH 144/162] Update README.md

Former-commit-id: f8d701cd3ce2e56f95b4f5439b8b48d5b62e0d2b
---
 examples/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/README.md b/examples/README.md
index 180d5f7b..a6d78936 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -97,25 +97,25 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.
 #### Supervised Fine-Tuning with 4/8-bit Bitsandbytes Quantization (Recommended)
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml
+llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml
 ```
 
 #### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml
+llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml
 ```
 
 #### Supervised Fine-Tuning with 4-bit AWQ Quantization
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml
+llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml
 ```
 
 #### Supervised Fine-Tuning with 2-bit AQLM Quantization
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
+llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
 ```
 
 ### Full-Parameter Fine-Tuning

From bbeb3b10aacd38261ab0e352ffd3a35ee0027a38 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 15 Jun 2024 04:05:54 +0800
Subject: [PATCH 145/162] add test cases

Former-commit-id: 731176ff34cdf0cbf6b41c40c69f4ceb54c2daf6
---
 src/llamafactory/chat/vllm_engine.py   |  2 +-
 src/llamafactory/hparams/model_args.py |  8 +--
 src/llamafactory/model/adapter.py      | 43 +++++++------
 src/llamafactory/model/patcher.py      |  5 +-
 src/llamafactory/train/ppo/trainer.py  |  3 +-
 tests/model/test_base.py               | 32 ++++++++++
 tests/model/test_freeze.py             | 22 ++++++-
 tests/model/test_full.py               | 20 ++++++-
 tests/model/test_lora.py               | 83 +++++++++++++++++++++++++-
 9 files changed, 184 insertions(+), 34 deletions(-)
 create mode 100644 tests/model/test_base.py

diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py
index e4c05478..f0812a99 100644
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -52,7 +52,7 @@ class VllmEngine(BaseEngine):
             "model": model_args.model_name_or_path,
             "trust_remote_code": True,
             "download_dir": model_args.cache_dir,
-            "dtype": model_args.vllm_dtype,
+            "dtype": model_args.infer_dtype,
             "max_model_len": model_args.vllm_maxlen,
             "tensor_parallel_size": get_device_count() or 1,
             "gpu_memory_utilization": model_args.vllm_gpu_util,
diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 359beafd..bbac2e4b 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -136,10 +136,6 @@ class ModelArguments:
         default=8,
         metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."},
     )
-    vllm_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field(
-        default="auto",
-        metadata={"help": "Data type for model weights and activations in the vLLM engine."},
-    )
     offload_folder: str = field(
         default="offload",
         metadata={"help": "Path to offload model weights."},
@@ -148,6 +144,10 @@ class ModelArguments:
         default=True,
         metadata={"help": "Whether or not to use KV cache in generation."},
     )
+    infer_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field(
+        default="auto",
+        metadata={"help": "Data type for model weights and activations at inference."}
+    )
     hf_hub_token: Optional[str] = field(
         default=None,
         metadata={"help": "Auth token to log in with Hugging Face Hub."},
diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py
index 34b9eda6..c37f6009 100644
--- a/src/llamafactory/model/adapter.py
+++ b/src/llamafactory/model/adapter.py
@@ -25,8 +25,12 @@ def _setup_full_tuning(
     model: "PreTrainedModel",
     model_args: "ModelArguments",
     finetuning_args: "FinetuningArguments",
+    is_trainable: bool,
     cast_trainable_params_to_fp32: bool,
 ) -> None:
+    if not is_trainable:
+        return
+
     logger.info("Fine-tuning method: Full")
     forbidden_modules = set()
     if model_args.visual_inputs and finetuning_args.freeze_vision_tower:
@@ -47,8 +51,12 @@ def _setup_freeze_tuning(
     model: "PreTrainedModel",
     model_args: "ModelArguments",
     finetuning_args: "FinetuningArguments",
+    is_trainable: bool,
     cast_trainable_params_to_fp32: bool,
 ) -> None:
+    if not is_trainable:
+        return
+
     logger.info("Fine-tuning method: Freeze")
     if model_args.visual_inputs:
         config = model.config.text_config
@@ -132,7 +140,9 @@ def _setup_lora_tuning(
     is_trainable: bool,
     cast_trainable_params_to_fp32: bool,
 ) -> "PeftModel":
-    logger.info("Fine-tuning method: {}".format("DoRA" if finetuning_args.use_dora else "LoRA"))
+    if is_trainable:
+        logger.info("Fine-tuning method: {}".format("DoRA" if finetuning_args.use_dora else "LoRA"))
+
     adapter_to_resume = None
 
     if model_args.adapter_name_or_path is not None:
@@ -173,6 +183,8 @@ def _setup_lora_tuning(
                     offload_folder=model_args.offload_folder,
                 )
 
+        logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path)))
+
     if is_trainable and adapter_to_resume is None:  # create new lora weights while training
         if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all":
             target_modules = find_all_linear_modules(model, finetuning_args.freeze_vision_tower)
@@ -227,9 +239,6 @@ def _setup_lora_tuning(
         for param in filter(lambda p: p.requires_grad, model.parameters()):
             param.data = param.data.to(torch.float32)
 
-    if model_args.adapter_name_or_path is not None:
-        logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path)))
-
     return model
 
 
@@ -247,29 +256,27 @@ def init_adapter(
 
     Note that the trainable parameters must be cast to float32.
     """
-    if (not is_trainable) and model_args.adapter_name_or_path is None:
-        logger.info("Adapter is not found at evaluation, load the base model.")
-        return model
+    if is_trainable and getattr(model, "quantization_method", None) and finetuning_args.finetuning_type != "lora":
+        raise ValueError("Quantized models can only be used for the LoRA tuning.")
 
-    if finetuning_args.finetuning_type != "lora" and getattr(model, "quantization_method", None):
-        raise ValueError("You can only use lora for quantized models.")
-
-    if is_deepspeed_zero3_enabled() or is_fsdp_enabled() or finetuning_args.pure_bf16 or finetuning_args.use_badam:
+    if not is_trainable:
+        cast_trainable_params_to_fp32 = False
+    elif is_deepspeed_zero3_enabled() or is_fsdp_enabled() or finetuning_args.pure_bf16 or finetuning_args.use_badam:
         logger.info("ZeRO3/FSDP/PureBF16/BAdam detected, remaining trainable params as their original precision.")
         cast_trainable_params_to_fp32 = False
     else:
         logger.info("Upcasting trainable params to float32.")
         cast_trainable_params_to_fp32 = True
 
-    if is_trainable and finetuning_args.finetuning_type == "full":
-        _setup_full_tuning(model, model_args, finetuning_args, cast_trainable_params_to_fp32)
-
-    if is_trainable and finetuning_args.finetuning_type == "freeze":
-        _setup_freeze_tuning(model, model_args, finetuning_args, cast_trainable_params_to_fp32)
-
-    if finetuning_args.finetuning_type == "lora":
+    if finetuning_args.finetuning_type == "full":
+        _setup_full_tuning(model, model_args, finetuning_args, is_trainable, cast_trainable_params_to_fp32)
+    elif finetuning_args.finetuning_type == "freeze":
+        _setup_freeze_tuning(model, model_args, finetuning_args, is_trainable, cast_trainable_params_to_fp32)
+    elif finetuning_args.finetuning_type == "lora":
         model = _setup_lora_tuning(
             config, model, model_args, finetuning_args, is_trainable, cast_trainable_params_to_fp32
         )
+    else:
+        raise NotImplementedError("Unknown finetuning type: {}.".format(finetuning_args.finetuning_type))
 
     return model
diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py
index 18221a10..b97ff433 100644
--- a/src/llamafactory/model/patcher.py
+++ b/src/llamafactory/model/patcher.py
@@ -44,7 +44,10 @@ def patch_config(
     is_trainable: bool,
 ) -> None:
     if model_args.compute_dtype is None:  # priority: bf16 > fp16 > fp32
-        model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None))
+        if model_args.infer_dtype == "auto":
+            model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None))
+        else:
+            model_args.compute_dtype = getattr(torch, model_args.infer_dtype)
 
     if is_torch_npu_available():
         use_jit_compile = os.environ.get("JIT_COMPILE", "0").lower() in ["true", "1"]
diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py
index 737c45a3..45f47455 100644
--- a/src/llamafactory/train/ppo/trainer.py
+++ b/src/llamafactory/train/ppo/trainer.py
@@ -135,8 +135,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
         unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model)
         self.is_chatglm_model = getattr(unwrapped_model.config, "model_type", None) == "chatglm"
 
-        device_type = unwrapped_model.pretrained_model.device.type
-        self.amp_context = torch.autocast(device_type, dtype=model_args.compute_dtype)
+        self.amp_context = torch.autocast(self.current_device.type, dtype=self.model_args.compute_dtype)
         warnings.simplefilter("ignore")  # remove gc warnings on ref model
 
         if finetuning_args.reward_model_type == "full":
diff --git a/tests/model/test_base.py b/tests/model/test_base.py
new file mode 100644
index 00000000..32a3918e
--- /dev/null
+++ b/tests/model/test_base.py
@@ -0,0 +1,32 @@
+import os
+
+import torch
+from transformers import AutoModelForCausalLM
+
+from llamafactory.hparams import get_infer_args
+from llamafactory.model import load_model, load_tokenizer
+
+
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
+
+INFER_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "template": "llama3",
+    "infer_dtype": "float16",
+}
+
+
+def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"):
+    state_dict_a = model_a.state_dict()
+    state_dict_b = model_b.state_dict()
+    assert set(state_dict_a.keys()) == set(state_dict_b.keys())
+    for name in state_dict_a.keys():
+        assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True
+
+
+def test_base():
+    model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False)
+    ref_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device)
+    compare_model(model, ref_model)
diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py
index 97800696..a0618315 100644
--- a/tests/model/test_freeze.py
+++ b/tests/model/test_freeze.py
@@ -2,7 +2,7 @@ import os
 
 import torch
 
-from llamafactory.hparams import get_train_args
+from llamafactory.hparams import get_infer_args, get_train_args
 from llamafactory.model import load_model, load_tokenizer
 
 
@@ -23,8 +23,15 @@ TRAIN_ARGS = {
     "fp16": True,
 }
 
+INFER_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "finetuning_type": "freeze",
+    "template": "llama3",
+    "infer_dtype": "float16",
+}
 
-def test_freeze_all_modules():
+
+def test_freeze_train_all_modules():
     model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS})
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
@@ -37,7 +44,7 @@ def test_freeze_all_modules():
             assert param.dtype == torch.float16
 
 
-def test_freeze_extra_modules():
+def test_freeze_train_extra_modules():
     model_args, _, _, finetuning_args, _ = get_train_args(
         {"freeze_trainable_layers": 1, "freeze_extra_modules": "embed_tokens,lm_head", **TRAIN_ARGS}
     )
@@ -50,3 +57,12 @@ def test_freeze_extra_modules():
         else:
             assert param.requires_grad is False
             assert param.dtype == torch.float16
+
+
+def test_freeze_inference():
+    model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False)
+    for param in model.parameters():
+        assert param.requires_grad is False
+        assert param.dtype == torch.float16
diff --git a/tests/model/test_full.py b/tests/model/test_full.py
index 6cb78f37..802b987c 100644
--- a/tests/model/test_full.py
+++ b/tests/model/test_full.py
@@ -2,7 +2,7 @@ import os
 
 import torch
 
-from llamafactory.hparams import get_train_args
+from llamafactory.hparams import get_infer_args, get_train_args
 from llamafactory.model import load_model, load_tokenizer
 
 
@@ -23,11 +23,27 @@ TRAIN_ARGS = {
     "fp16": True,
 }
 
+INFER_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "finetuning_type": "full",
+    "template": "llama3",
+    "infer_dtype": "float16",
+}
 
-def test_full():
+
+def test_full_train():
     model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS)
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
     for param in model.parameters():
         assert param.requires_grad is True
         assert param.dtype == torch.float32
+
+
+def test_full_inference():
+    model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False)
+    for param in model.parameters():
+        assert param.requires_grad is False
+        assert param.dtype == torch.float16
diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py
index 2e2b89d9..3e2503f1 100644
--- a/tests/model/test_lora.py
+++ b/tests/model/test_lora.py
@@ -1,13 +1,18 @@
 import os
+from typing import Sequence
 
 import torch
+from peft import LoraModel, PeftModel
+from transformers import AutoModelForCausalLM
 
-from llamafactory.hparams import get_train_args
+from llamafactory.hparams import get_infer_args, get_train_args
 from llamafactory.model import load_model, load_tokenizer
 
 
 TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
 
+TINY_LLAMA_ADAPTER = os.environ.get("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora")
+
 TRAIN_ARGS = {
     "model_name_or_path": TINY_LLAMA,
     "stage": "sft",
@@ -23,8 +28,32 @@ TRAIN_ARGS = {
     "fp16": True,
 }
 
+INFER_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "adapter_name_or_path": TINY_LLAMA_ADAPTER,
+    "finetuning_type": "lora",
+    "template": "llama3",
+    "infer_dtype": "float16",
+}
 
-def test_lora_all_modules():
+
+def load_reference_model() -> "torch.nn.Module":
+    model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA)
+    return PeftModel.from_pretrained(model, TINY_LLAMA_ADAPTER)
+
+
+def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_keys: Sequence[str] = []):
+    state_dict_a = model_a.state_dict()
+    state_dict_b = model_b.state_dict()
+    assert set(state_dict_a.keys()) == set(state_dict_b.keys())
+    for name in state_dict_a.keys():
+        if any(key in name for key in diff_keys):
+            assert torch.allclose(state_dict_a[name], state_dict_b[name]) is False
+        else:
+            assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True
+
+
+def test_lora_train_all_modules():
     model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS})
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
@@ -41,7 +70,7 @@ def test_lora_all_modules():
     assert linear_modules == {"q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"}
 
 
-def test_lora_extra_modules():
+def test_lora_train_extra_modules():
     model_args, _, _, finetuning_args, _ = get_train_args(
         {"lora_target": "all", "additional_target": "embed_tokens,lm_head", **TRAIN_ARGS}
     )
@@ -61,3 +90,51 @@ def test_lora_extra_modules():
             assert param.dtype == torch.float16
 
     assert extra_modules == {"embed_tokens", "lm_head"}
+
+
+def test_lora_train_old_adapters():
+    model_args, _, _, finetuning_args, _ = get_train_args(
+        {"adapter_name_or_path": TINY_LLAMA_ADAPTER, "create_new_adapter": False, **TRAIN_ARGS}
+    )
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+
+    base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device)
+    ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True)
+    for param in filter(lambda p: p.requires_grad, ref_model.parameters()):
+        param.data = param.data.to(torch.float32)
+
+    compare_model(model, ref_model)
+
+
+def test_lora_train_new_adapters():
+    model_args, _, _, finetuning_args, _ = get_train_args(
+        {"adapter_name_or_path": TINY_LLAMA_ADAPTER, "create_new_adapter": True, **TRAIN_ARGS}
+    )
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+
+    base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device)
+    ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True)
+    for param in filter(lambda p: p.requires_grad, ref_model.parameters()):
+        param.data = param.data.to(torch.float32)
+
+    compare_model(
+        model, ref_model, diff_keys=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"]
+    )
+
+
+def test_lora_inference():
+    model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False)
+
+    base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device)
+    ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER)
+    ref_model = ref_model.merge_and_unload()
+    compare_model(model, ref_model)
+
+    for name, param in model.named_parameters():
+        assert param.requires_grad is False
+        assert param.dtype == torch.float16
+        assert "lora" not in name

From 43cfbde0d181bb6af1abff4e615cc0a034ba5d55 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 15 Jun 2024 04:34:55 +0800
Subject: [PATCH 146/162] fix #4295

Former-commit-id: 08f657868f9d605b837c5d8c2946a25cc05c8735
---
 src/llamafactory/train/sft/trainer.py  | 10 +++++++---
 src/llamafactory/train/sft/workflow.py |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py
index c063b214..6bf5b7c0 100644
--- a/src/llamafactory/train/sft/trainer.py
+++ b/src/llamafactory/train/sft/trainer.py
@@ -13,6 +13,7 @@ from ..trainer_utils import create_custom_optimzer, create_custom_scheduler
 
 
 if TYPE_CHECKING:
+    from torch.utils.data import Dataset
     from transformers import ProcessorMixin
     from transformers.trainer import PredictionOutput
 
@@ -94,7 +95,7 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
         padded_tensor[:, -src_tensor.shape[-1] :] = src_tensor  # adopt left-padding
         return padded_tensor.contiguous()  # in contiguous memory
 
-    def save_predictions(self, predict_results: "PredictionOutput") -> None:
+    def save_predictions(self, dataset: "Dataset", predict_results: "PredictionOutput") -> None:
         r"""
         Saves model predictions to `output_dir`.
 
@@ -120,6 +121,9 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
                     (preds[i][pad_len[0] :], preds[i][: pad_len[0]]), axis=-1
                 )  # move pad token to last
 
+        decoded_inputs = self.tokenizer.batch_decode(
+            dataset["input_ids"], skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )
         decoded_labels = self.tokenizer.batch_decode(
             labels, skip_special_tokens=True, clean_up_tokenization_spaces=False
         )
@@ -127,6 +131,6 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
 
         with open(output_prediction_file, "w", encoding="utf-8") as writer:
             res: List[str] = []
-            for label, pred in zip(decoded_labels, decoded_preds):
-                res.append(json.dumps({"label": label, "predict": pred}, ensure_ascii=False))
+            for text, label, pred in zip(decoded_inputs, decoded_labels, decoded_preds):
+                res.append(json.dumps({"prompt": text, "label": label, "predict": pred}, ensure_ascii=False))
             writer.write("\n".join(res))
diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py
index f09b5173..a989b3f7 100644
--- a/src/llamafactory/train/sft/workflow.py
+++ b/src/llamafactory/train/sft/workflow.py
@@ -93,7 +93,7 @@ def run_sft(
             predict_results.metrics.pop("predict_loss", None)
         trainer.log_metrics("predict", predict_results.metrics)
         trainer.save_metrics("predict", predict_results.metrics)
-        trainer.save_predictions(predict_results)
+        trainer.save_predictions(dataset, predict_results)
 
     # Create model card
     create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args)

From d2137c7099436f0ce42b16f2b838de09942602fb Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 15 Jun 2024 04:47:13 +0800
Subject: [PATCH 147/162] fix #4292

Former-commit-id: 4cd4c179d24eab0fcaec2b29b9dd71970f877fe8
---
 src/llamafactory/webui/common.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py
index 37b38df0..3b8d5378 100644
--- a/src/llamafactory/webui/common.py
+++ b/src/llamafactory/webui/common.py
@@ -39,7 +39,11 @@ def get_save_dir(*paths: str) -> os.PathLike:
     r"""
     Gets the path to saved model checkpoints.
     """
-    paths = (path.replace(os.path.sep, "").replace(" ", "").strip() for path in paths)
+    if os.path.sep in paths[-1]:
+        logger.warning("Found complex path, some features may be not available.")
+        return paths[-1]
+
+    paths = (path.replace(" ", "").strip() for path in paths)
     return os.path.join(DEFAULT_SAVE_DIR, *paths)
 
 
From c9959df3c7f7e0867a322654735357df5c10aa48 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 15 Jun 2024 04:57:19 +0800
Subject: [PATCH 148/162] disable DP

Former-commit-id: c18fd609d268389f3e65274992045a6c9f8e6c1f
---
 src/llamafactory/hparams/parser.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py
index ec5dd62c..3476a726 100644
--- a/src/llamafactory/hparams/parser.py
+++ b/src/llamafactory/hparams/parser.py
@@ -8,6 +8,7 @@ import transformers
 from transformers import HfArgumentParser, Seq2SeqTrainingArguments
 from transformers.integrations import is_deepspeed_zero3_enabled
 from transformers.trainer_utils import get_last_checkpoint
+from transformers.training_args import ParallelMode
 from transformers.utils import is_torch_bf16_gpu_available
 from transformers.utils.versions import require_version
 
@@ -162,6 +163,9 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
     ):
         raise ValueError("PPO only accepts wandb or tensorboard logger.")
 
+    if training_args.parallel_mode == ParallelMode.NOT_DISTRIBUTED:
+        raise ValueError("Please launch distributed training with `llamafactory-cli` or `torchrun`.")
+
     if training_args.max_steps == -1 and data_args.streaming:
         raise ValueError("Please specify `max_steps` in streaming mode.")
 
@@ -181,14 +185,14 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
     if (
         finetuning_args.use_galore
         and finetuning_args.galore_layerwise
-        and training_args.parallel_mode.value == "distributed"
+        and training_args.parallel_mode == ParallelMode.DISTRIBUTED
     ):
         raise ValueError("Distributed training does not support layer-wise GaLore.")
 
     if (
         finetuning_args.use_badam
         and finetuning_args.badam_mode == "layer"
-        and training_args.parallel_mode.value == "distributed"
+        and training_args.parallel_mode == ParallelMode.DISTRIBUTED
     ):
         raise ValueError("Layer-wise BAdam does not yet support distributed training, use ratio-wise BAdam.")
 
@@ -230,7 +234,7 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
 
     # Post-process training arguments
     if (
-        training_args.parallel_mode.value == "distributed"
+        training_args.parallel_mode == ParallelMode.DISTRIBUTED
         and training_args.ddp_find_unused_parameters is None
         and finetuning_args.finetuning_type == "lora"
     ):
@@ -290,7 +294,7 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
             training_args.local_rank,
             training_args.device,
             training_args.n_gpu,
-            training_args.parallel_mode.value == "distributed",
+            training_args.parallel_mode == ParallelMode.DISTRIBUTED,
             str(model_args.compute_dtype),
         )
     )

From ab4b3931fd1b6bef0d6ef691ad070b4504f9246a Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 15 Jun 2024 05:11:33 +0800
Subject: [PATCH 149/162] fix #4271

Former-commit-id: 03707e78d29bfcf5d395a64bb38632bdb3ff47ce
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 405ac46e..1756551e 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@ extra_require = {
     "torch": ["torch>=1.13.1"],
     "torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"],
     "metrics": ["nltk", "jieba", "rouge-chinese"],
-    "deepspeed": ["deepspeed>=0.10.0,<=0.14.0"],
+    "deepspeed": ["deepspeed>=0.10.0"],
     "bitsandbytes": ["bitsandbytes>=0.39.0"],
     "vllm": ["vllm>=0.4.3"],
     "galore": ["galore-torch"],

From 8b523554d63c86511c64bd68b5ed6c11849e5fc0 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 15 Jun 2024 05:13:16 +0800
Subject: [PATCH 150/162] update readme

Former-commit-id: a43d302aa79cbfb9b0606e855b4c1af6865d8e68
---
 README.md    | 2 +-
 README_zh.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 5dd10d5a..cae79694 100644
--- a/README.md
+++ b/README.md
@@ -463,7 +463,7 @@ docker-compose exec llamafactory bash
 ### Deploy with OpenAI-style API and vLLM
 
 ```bash
-CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
+API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
 ```
 
 > [!TIP]
diff --git a/README_zh.md b/README_zh.md
index 76bd2d89..af3ff8f0 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -463,7 +463,7 @@ docker-compose exec llamafactory bash
 ### 利用 vLLM 部署 OpenAI API
 
 ```bash
-CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
+API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
 ```
 
 > [!TIP]

From acfae2e677cc6d507671ea241af0236d91b4f7fd Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 15 Jun 2024 17:54:33 +0800
Subject: [PATCH 151/162] add license

Former-commit-id: 69cfc98d7c81756a5ab6bf962240e393e449fef0
---
 evaluation/ceval/ceval.py                     |  1 +
 evaluation/cmmlu/cmmlu.py                     |  1 +
 evaluation/mmlu/mmlu.py                       |  1 +
 scripts/cal_flops.py                          | 23 +++++++++--
 scripts/cal_lr.py                             | 23 +++++++++--
 scripts/cal_ppl.py                            | 19 ++++++++-
 scripts/length_cdf.py                         | 19 ++++++++-
 scripts/llama_pro.py                          | 23 +++++++++--
 scripts/llamafy_baichuan2.py                  | 22 ++++++++--
 scripts/llamafy_qwen.py                       | 21 ++++++++--
 scripts/loftq_init.py                         | 23 +++++++++--
 scripts/test_toolcall.py                      | 15 +++++++
 setup.py                                      | 14 +++++++
 src/api.py                                    | 14 +++++++
 src/llamafactory/__init__.py                  | 14 +++++++
 src/llamafactory/api/app.py                   | 14 +++++++
 src/llamafactory/api/chat.py                  | 14 +++++++
 src/llamafactory/api/common.py                | 14 +++++++
 src/llamafactory/api/protocol.py              | 14 +++++++
 src/llamafactory/chat/__init__.py             | 14 +++++++
 src/llamafactory/chat/base_engine.py          | 14 +++++++
 src/llamafactory/chat/chat_model.py           | 17 ++++++++
 src/llamafactory/chat/hf_engine.py            | 14 +++++++
 src/llamafactory/chat/vllm_engine.py          | 14 +++++++
 src/llamafactory/cli.py                       | 14 +++++++
 src/llamafactory/data/__init__.py             | 14 +++++++
 src/llamafactory/data/aligner.py              | 14 +++++++
 src/llamafactory/data/collator.py             | 14 +++++++
 src/llamafactory/data/data_utils.py           | 14 +++++++
 src/llamafactory/data/formatter.py            | 14 +++++++
 src/llamafactory/data/loader.py               | 14 +++++++
 src/llamafactory/data/parser.py               | 14 +++++++
 src/llamafactory/data/preprocess.py           | 14 +++++++
 src/llamafactory/data/processors/feedback.py  | 14 +++++++
 src/llamafactory/data/processors/pairwise.py  | 14 +++++++
 src/llamafactory/data/processors/pretrain.py  | 17 ++++++++
 .../data/processors/processor_utils.py        | 14 +++++++
 .../data/processors/supervised.py             | 14 +++++++
 .../data/processors/unsupervised.py           | 14 +++++++
 src/llamafactory/data/template.py             | 14 +++++++
 src/llamafactory/eval/evaluator.py            | 39 +++++++++++++++++-
 src/llamafactory/eval/template.py             | 14 +++++++
 src/llamafactory/extras/callbacks.py          | 14 +++++++
 src/llamafactory/extras/constants.py          | 14 +++++++
 src/llamafactory/extras/env.py                | 14 +++++++
 src/llamafactory/extras/logging.py            | 14 +++++++
 src/llamafactory/extras/misc.py               | 14 +++++++
 src/llamafactory/extras/packages.py           | 17 ++++++++
 src/llamafactory/extras/ploting.py            | 14 +++++++
 src/llamafactory/hparams/__init__.py          | 14 +++++++
 src/llamafactory/hparams/data_args.py         | 17 ++++++++
 src/llamafactory/hparams/evaluation_args.py   | 14 +++++++
 src/llamafactory/hparams/finetuning_args.py   | 14 +++++++
 src/llamafactory/hparams/generating_args.py   | 14 +++++++
 src/llamafactory/hparams/model_args.py        | 17 ++++++++
 src/llamafactory/hparams/parser.py            | 17 ++++++++
 src/llamafactory/launcher.py                  | 14 +++++++
 src/llamafactory/model/__init__.py            | 14 +++++++
 src/llamafactory/model/adapter.py             | 14 +++++++
 src/llamafactory/model/loader.py              | 14 +++++++
 .../model/model_utils/attention.py            | 14 +++++++
 .../model/model_utils/checkpointing.py        | 19 ++++++++-
 .../model/model_utils/embedding.py            | 14 +++++++
 .../model/model_utils/longlora.py             | 17 ++++++++
 src/llamafactory/model/model_utils/misc.py    | 14 +++++++
 src/llamafactory/model/model_utils/mod.py     | 14 +++++++
 src/llamafactory/model/model_utils/moe.py     | 14 +++++++
 .../model/model_utils/quantization.py         | 18 ++++++++-
 src/llamafactory/model/model_utils/rope.py    | 18 +++++++++
 src/llamafactory/model/model_utils/unsloth.py | 14 +++++++
 .../model/model_utils/valuehead.py            | 14 +++++++
 src/llamafactory/model/model_utils/visual.py  | 17 ++++++++
 src/llamafactory/model/patcher.py             | 14 +++++++
 src/llamafactory/train/dpo/__init__.py        | 14 +++++++
 src/llamafactory/train/dpo/trainer.py         | 17 ++++++++
 src/llamafactory/train/dpo/workflow.py        | 17 +++++++-
 src/llamafactory/train/kto/__init__.py        | 14 +++++++
 src/llamafactory/train/kto/trainer.py         | 17 ++++++++
 src/llamafactory/train/kto/workflow.py        | 17 ++++++++
 src/llamafactory/train/ppo/__init__.py        | 14 +++++++
 src/llamafactory/train/ppo/ppo_utils.py       | 14 +++++++
 src/llamafactory/train/ppo/trainer.py         | 17 ++++++++
 src/llamafactory/train/ppo/workflow.py        | 17 +++++++-
 src/llamafactory/train/pt/__init__.py         | 14 +++++++
 src/llamafactory/train/pt/trainer.py          | 14 +++++++
 src/llamafactory/train/pt/workflow.py         | 17 +++++++-
 src/llamafactory/train/rm/__init__.py         | 14 +++++++
 src/llamafactory/train/rm/metric.py           | 14 +++++++
 src/llamafactory/train/rm/trainer.py          | 40 ++++++++++++++++++-
 src/llamafactory/train/rm/workflow.py         | 39 +++++++++++++++++-
 src/llamafactory/train/sft/__init__.py        | 14 +++++++
 src/llamafactory/train/sft/metric.py          | 18 +++++++++
 src/llamafactory/train/sft/trainer.py         | 17 ++++++++
 src/llamafactory/train/sft/workflow.py        | 17 +++++++-
 src/llamafactory/train/trainer_utils.py       | 19 +++++++++
 src/llamafactory/train/tuner.py               | 14 +++++++
 src/llamafactory/webui/chatter.py             | 14 +++++++
 src/llamafactory/webui/common.py              | 14 +++++++
 src/llamafactory/webui/components/__init__.py | 14 +++++++
 src/llamafactory/webui/components/chatbot.py  | 14 +++++++
 src/llamafactory/webui/components/data.py     | 14 +++++++
 src/llamafactory/webui/components/eval.py     | 14 +++++++
 src/llamafactory/webui/components/export.py   | 14 +++++++
 src/llamafactory/webui/components/infer.py    | 14 +++++++
 src/llamafactory/webui/components/top.py      | 14 +++++++
 src/llamafactory/webui/components/train.py    | 14 +++++++
 src/llamafactory/webui/css.py                 | 14 +++++++
 src/llamafactory/webui/engine.py              | 14 +++++++
 src/llamafactory/webui/interface.py           | 14 +++++++
 src/llamafactory/webui/locales.py             | 14 +++++++
 src/llamafactory/webui/manager.py             | 14 +++++++
 src/llamafactory/webui/runner.py              | 14 +++++++
 src/llamafactory/webui/utils.py               | 14 +++++++
 src/train.py                                  | 14 +++++++
 src/webui.py                                  | 14 +++++++
 tests/data/test_supervised.py                 | 14 +++++++
 tests/eval/test_eval_template.py              | 14 +++++++
 tests/model/model_utils/test_attention.py     | 14 +++++++
 tests/model/test_base.py                      | 14 +++++++
 tests/model/test_freeze.py                    | 14 +++++++
 tests/model/test_full.py                      | 14 +++++++
 tests/model/test_lora.py                      | 14 +++++++
 122 files changed, 1848 insertions(+), 32 deletions(-)

diff --git a/evaluation/ceval/ceval.py b/evaluation/ceval/ceval.py
index 4111d6b4..48442d50 100644
--- a/evaluation/ceval/ceval.py
+++ b/evaluation/ceval/ceval.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import os
 
 import datasets
diff --git a/evaluation/cmmlu/cmmlu.py b/evaluation/cmmlu/cmmlu.py
index 37efb328..5ff548a4 100644
--- a/evaluation/cmmlu/cmmlu.py
+++ b/evaluation/cmmlu/cmmlu.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import os
 
 import datasets
diff --git a/evaluation/mmlu/mmlu.py b/evaluation/mmlu/mmlu.py
index a4530250..1065fb31 100644
--- a/evaluation/mmlu/mmlu.py
+++ b/evaluation/mmlu/mmlu.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import os
 
 import datasets
diff --git a/scripts/cal_flops.py b/scripts/cal_flops.py
index ac87e0ab..627b5534 100644
--- a/scripts/cal_flops.py
+++ b/scripts/cal_flops.py
@@ -1,7 +1,20 @@
 # coding=utf-8
-# Calculates the flops of pre-trained models.
-# Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
-# Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/
+# Copyright 2024 Microsoft Corporation and the LlamaFactory team.
+#
+# This code is inspired by Microsoft's DeepSpeed library.
+# https://www.deepspeed.ai/tutorials/flops-profiler/
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import fire
 import torch
@@ -17,6 +30,10 @@ def calculate_flops(
     seq_length: int = 256,
     flash_attn: str = "auto",
 ):
+    r"""
+    Calculates the flops of pre-trained models.
+    Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
+    """
     with get_accelerator().device(0):
         chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn))
         fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device)
diff --git a/scripts/cal_lr.py b/scripts/cal_lr.py
index bfa32cc9..ff21d27c 100644
--- a/scripts/cal_lr.py
+++ b/scripts/cal_lr.py
@@ -1,7 +1,20 @@
 # coding=utf-8
-# Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
-# Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16
-# Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py
+# Copyright 2024 imoneoi and the LlamaFactory team.
+#
+# This code is inspired by imoneoi's OpenChat library.
+# https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import math
 from typing import Literal
@@ -32,6 +45,10 @@ def calculate_lr(
     cutoff_len: int = 1024,  # i.e. maximum input length during training
     is_mistral: bool = False,  # mistral model uses a smaller learning rate,
 ):
+    r"""
+    Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
+    Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16
+    """
     model_args, data_args, training_args, _, _ = get_train_args(
         dict(
             stage=stage,
diff --git a/scripts/cal_ppl.py b/scripts/cal_ppl.py
index 387b756c..fb503629 100644
--- a/scripts/cal_ppl.py
+++ b/scripts/cal_ppl.py
@@ -1,6 +1,17 @@
 # coding=utf-8
-# Calculates the ppl on the dataset of the pre-trained models.
-# Usage: python cal_ppl.py --model_name_or_path path_to_model --save_name ppl.json
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import json
 from dataclasses import dataclass
@@ -56,6 +67,10 @@ def cal_ppl(
     max_samples: Optional[int] = None,
     train_on_prompt: bool = False,
 ):
+    r"""
+    Calculates the ppl on the dataset of the pre-trained models.
+    Usage: python cal_ppl.py --model_name_or_path path_to_model --save_name ppl.json
+    """
     model_args, data_args, training_args, finetuning_args, _ = get_train_args(
         dict(
             stage=stage,
diff --git a/scripts/length_cdf.py b/scripts/length_cdf.py
index 7739dcf0..4cdf01e6 100644
--- a/scripts/length_cdf.py
+++ b/scripts/length_cdf.py
@@ -1,6 +1,17 @@
 # coding=utf-8
-# Calculates the distribution of the input lengths in the dataset.
-# Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 from collections import defaultdict
 
@@ -19,6 +30,10 @@ def length_cdf(
     template: str = "default",
     interval: int = 1000,
 ):
+    r"""
+    Calculates the distribution of the input lengths in the dataset.
+    Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default
+    """
     model_args, data_args, training_args, _, _ = get_train_args(
         dict(
             stage="sft",
diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py
index 727998ae..f315335a 100644
--- a/scripts/llama_pro.py
+++ b/scripts/llama_pro.py
@@ -1,7 +1,20 @@
 # coding=utf-8
-# Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models.
-# Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
-# Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
+# Copyright 2024 Tencent Inc. and the LlamaFactory team.
+#
+# This code is inspired by Tencent's LLaMA-Pro library.
+# https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import json
 import os
@@ -37,6 +50,10 @@ def block_expansion(
     shard_size: Optional[str] = "2GB",
     save_safetensors: Optional[bool] = False,
 ):
+    r"""
+    Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models.
+    Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
+    """
     config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path)
     num_layers = getattr(config, "num_hidden_layers")
     setattr(config, "num_hidden_layers", num_layers + num_expand)
diff --git a/scripts/llamafy_baichuan2.py b/scripts/llamafy_baichuan2.py
index 1ae58879..19284f5f 100644
--- a/scripts/llamafy_baichuan2.py
+++ b/scripts/llamafy_baichuan2.py
@@ -1,8 +1,17 @@
 # coding=utf-8
-# Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
-# Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
-# Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py
-# Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import json
 import os
@@ -79,6 +88,11 @@ def save_config(input_dir: str, output_dir: str):
 def llamafy_baichuan2(
     input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
 ):
+    r"""
+    Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
+    Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
+    Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
+    """
     try:
         os.makedirs(output_dir, exist_ok=False)
     except Exception as e:
diff --git a/scripts/llamafy_qwen.py b/scripts/llamafy_qwen.py
index 69cf3e8e..e5b59483 100644
--- a/scripts/llamafy_qwen.py
+++ b/scripts/llamafy_qwen.py
@@ -1,7 +1,17 @@
 # coding=utf-8
-# Converts the Qwen models in the same format as LLaMA2.
-# Usage: python llamafy_qwen.py --input_dir input --output_dir output
-# Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import json
 import os
@@ -131,6 +141,11 @@ def save_config(input_dir: str, output_dir: str, torch_dtype: str):
 def llamafy_qwen(
     input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
 ):
+    r"""
+    Converts the Qwen models in the same format as LLaMA2.
+    Usage: python llamafy_qwen.py --input_dir input --output_dir output
+    Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
+    """
     try:
         os.makedirs(output_dir, exist_ok=False)
     except Exception as e:
diff --git a/scripts/loftq_init.py b/scripts/loftq_init.py
index 7f244316..159dea06 100644
--- a/scripts/loftq_init.py
+++ b/scripts/loftq_init.py
@@ -1,7 +1,20 @@
 # coding=utf-8
-# Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
-# Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir
-# Inspired by: https://github.com/huggingface/peft/blob/main/examples/loftq_finetuning/quantize_save_load.py
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's PEFT library.
+# https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import os
 from typing import TYPE_CHECKING, Optional
@@ -49,6 +62,10 @@ def quantize_loftq(
     lora_target: Optional[str] = "q_proj,v_proj",
     save_safetensors: Optional[bool] = False,
 ):
+    r"""
+    Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
+    Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir
+    """
     tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
     loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter)
diff --git a/scripts/test_toolcall.py b/scripts/test_toolcall.py
index 7e460017..6f6fd06c 100644
--- a/scripts/test_toolcall.py
+++ b/scripts/test_toolcall.py
@@ -1,3 +1,18 @@
+# coding=utf-8
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 import os
 from typing import Sequence
diff --git a/setup.py b/setup.py
index 1756551e..3d2ac921 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import re
 
diff --git a/src/api.py b/src/api.py
index 3655e393..0f925497 100644
--- a/src/api.py
+++ b/src/api.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 
 import uvicorn
diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py
index 78230937..9d732777 100644
--- a/src/llamafactory/__init__.py
+++ b/src/llamafactory/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Level: api, webui > chat, eval, train > data, model > hparams > extras
 
 from .cli import VERSION
diff --git a/src/llamafactory/api/app.py b/src/llamafactory/api/app.py
index 21edab2f..c1264617 100644
--- a/src/llamafactory/api/app.py
+++ b/src/llamafactory/api/app.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 from contextlib import asynccontextmanager
 from typing import Optional
diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py
index 98957bc1..a2074dbb 100644
--- a/src/llamafactory/api/chat.py
+++ b/src/llamafactory/api/chat.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import base64
 import io
 import json
diff --git a/src/llamafactory/api/common.py b/src/llamafactory/api/common.py
index 5ad9a071..d1ac94de 100644
--- a/src/llamafactory/api/common.py
+++ b/src/llamafactory/api/common.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 from typing import TYPE_CHECKING, Any, Dict
 
diff --git a/src/llamafactory/api/protocol.py b/src/llamafactory/api/protocol.py
index 055fa781..a69132ea 100644
--- a/src/llamafactory/api/protocol.py
+++ b/src/llamafactory/api/protocol.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import time
 from enum import Enum, unique
 from typing import Any, Dict, List, Optional, Union
diff --git a/src/llamafactory/chat/__init__.py b/src/llamafactory/chat/__init__.py
index a1a79de6..07276d48 100644
--- a/src/llamafactory/chat/__init__.py
+++ b/src/llamafactory/chat/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .base_engine import BaseEngine
 from .chat_model import ChatModel
 
diff --git a/src/llamafactory/chat/base_engine.py b/src/llamafactory/chat/base_engine.py
index 65b6c59c..92a51ebe 100644
--- a/src/llamafactory/chat/base_engine.py
+++ b/src/llamafactory/chat/base_engine.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Literal, Optional, Sequence, Union
diff --git a/src/llamafactory/chat/chat_model.py b/src/llamafactory/chat/chat_model.py
index 281ef0c1..fb800106 100644
--- a/src/llamafactory/chat/chat_model.py
+++ b/src/llamafactory/chat/chat_model.py
@@ -1,3 +1,20 @@
+# Copyright 2024 THUDM and the LlamaFactory team.
+#
+# This code is inspired by the THUDM's ChatGLM implementation.
+# https://github.com/THUDM/ChatGLM-6B/blob/main/cli_demo.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import asyncio
 from threading import Thread
 from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, Generator, List, Optional, Sequence
diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py
index 28e6a409..a7ff7015 100644
--- a/src/llamafactory/chat/hf_engine.py
+++ b/src/llamafactory/chat/hf_engine.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import asyncio
 import concurrent.futures
 import os
diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py
index f0812a99..d488a039 100644
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import uuid
 from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union
 
diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py
index 5042e53c..c7f136b3 100644
--- a/src/llamafactory/cli.py
+++ b/src/llamafactory/cli.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import random
 import subprocess
diff --git a/src/llamafactory/data/__init__.py b/src/llamafactory/data/__init__.py
index b08691d3..307853bc 100644
--- a/src/llamafactory/data/__init__.py
+++ b/src/llamafactory/data/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .collator import KTODataCollatorWithPadding, PairwiseDataCollatorWithPadding
 from .data_utils import Role, split_dataset
 from .loader import get_dataset
diff --git a/src/llamafactory/data/aligner.py b/src/llamafactory/data/aligner.py
index 3e9d5c46..299bdca3 100644
--- a/src/llamafactory/data/aligner.py
+++ b/src/llamafactory/data/aligner.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 from functools import partial
 from typing import TYPE_CHECKING, Any, Dict, List, Union
diff --git a/src/llamafactory/data/collator.py b/src/llamafactory/data/collator.py
index 1dc8dd8d..e4859ff5 100644
--- a/src/llamafactory/data/collator.py
+++ b/src/llamafactory/data/collator.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import dataclass
 from typing import Any, Dict, Sequence
 
diff --git a/src/llamafactory/data/data_utils.py b/src/llamafactory/data/data_utils.py
index 9b313112..cc9761b1 100644
--- a/src/llamafactory/data/data_utils.py
+++ b/src/llamafactory/data/data_utils.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from enum import Enum, unique
 from typing import TYPE_CHECKING, Dict, List, Tuple, Union
 
diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py
index 0cd3d6c1..590e682b 100644
--- a/src/llamafactory/data/formatter.py
+++ b/src/llamafactory/data/formatter.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 import re
 from abc import ABC, abstractmethod
diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py
index ba426f81..f44ef5de 100644
--- a/src/llamafactory/data/loader.py
+++ b/src/llamafactory/data/loader.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import inspect
 import os
 import sys
diff --git a/src/llamafactory/data/parser.py b/src/llamafactory/data/parser.py
index ec97bfc1..4bebcd68 100644
--- a/src/llamafactory/data/parser.py
+++ b/src/llamafactory/data/parser.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 import os
 from dataclasses import dataclass
diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py
index 875f55d6..9a8b97f3 100644
--- a/src/llamafactory/data/preprocess.py
+++ b/src/llamafactory/data/preprocess.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from functools import partial
 from typing import TYPE_CHECKING, Callable, Literal, Optional, Tuple
 
diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py
index 5fba452c..219ab353 100644
--- a/src/llamafactory/data/processors/feedback.py
+++ b/src/llamafactory/data/processors/feedback.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
 
 from ...extras.constants import IGNORE_INDEX
diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py
index db52c6a7..b2939348 100644
--- a/src/llamafactory/data/processors/pairwise.py
+++ b/src/llamafactory/data/processors/pairwise.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
 
 from ...extras.constants import IGNORE_INDEX
diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py
index a10ccabd..fb4c840c 100644
--- a/src/llamafactory/data/processors/pretrain.py
+++ b/src/llamafactory/data/processors/pretrain.py
@@ -1,3 +1,20 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's transformers library.
+# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from itertools import chain
 from typing import TYPE_CHECKING, Any, Dict, List
 
diff --git a/src/llamafactory/data/processors/processor_utils.py b/src/llamafactory/data/processors/processor_utils.py
index 9903a053..93df0cd5 100644
--- a/src/llamafactory/data/processors/processor_utils.py
+++ b/src/llamafactory/data/processors/processor_utils.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import bisect
 from typing import TYPE_CHECKING, List, Sequence
 
diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py
index f59f5371..eb5ffb1a 100644
--- a/src/llamafactory/data/processors/supervised.py
+++ b/src/llamafactory/data/processors/supervised.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from collections import defaultdict
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
 
diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py
index 38497a15..75ad4d51 100644
--- a/src/llamafactory/data/processors/unsupervised.py
+++ b/src/llamafactory/data/processors/unsupervised.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
 
 from ...extras.logging import get_logger
diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index b600c567..786c679f 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
 
diff --git a/src/llamafactory/eval/evaluator.py b/src/llamafactory/eval/evaluator.py
index 5c6fb104..bbd7a44b 100644
--- a/src/llamafactory/eval/evaluator.py
+++ b/src/llamafactory/eval/evaluator.py
@@ -1,4 +1,41 @@
-# Inspired by: https://github.com/hendrycks/test/blob/master/evaluate_flan.py
+# Copyright 2024 the LlamaFactory team.
+#
+# This code is inspired by Dan's test library.
+# https://github.com/hendrycks/test/blob/master/evaluate_flan.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# MIT License
+#
+# Copyright (c) 2020 Dan Hendrycks
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
 
 import inspect
 import json
diff --git a/src/llamafactory/eval/template.py b/src/llamafactory/eval/template.py
index 2cbb5aaf..7d524e7c 100644
--- a/src/llamafactory/eval/template.py
+++ b/src/llamafactory/eval/template.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import dataclass
 from typing import Dict, List, Sequence, Tuple
 
diff --git a/src/llamafactory/extras/callbacks.py b/src/llamafactory/extras/callbacks.py
index 441ebbfd..0dff6a69 100644
--- a/src/llamafactory/extras/callbacks.py
+++ b/src/llamafactory/extras/callbacks.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 import logging
 import os
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 7d96fb5f..e31e7419 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from collections import OrderedDict, defaultdict
 from enum import Enum
 from typing import Dict, Optional
diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py
index a8cb799d..586c24c0 100644
--- a/src/llamafactory/extras/env.py
+++ b/src/llamafactory/extras/env.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import platform
 
 import accelerate
diff --git a/src/llamafactory/extras/logging.py b/src/llamafactory/extras/logging.py
index 430b8a48..67622212 100644
--- a/src/llamafactory/extras/logging.py
+++ b/src/llamafactory/extras/logging.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import logging
 import os
 import sys
diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py
index fc33f77e..3d969df1 100644
--- a/src/llamafactory/extras/misc.py
+++ b/src/llamafactory/extras/misc.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import gc
 import os
 from typing import TYPE_CHECKING, Dict, Tuple
diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py
index 0746bb4f..35f546ab 100644
--- a/src/llamafactory/extras/packages.py
+++ b/src/llamafactory/extras/packages.py
@@ -1,3 +1,20 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's transformers library.
+# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/utils/import_utils.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import importlib.metadata
 import importlib.util
 from functools import lru_cache
diff --git a/src/llamafactory/extras/ploting.py b/src/llamafactory/extras/ploting.py
index dea23bbe..596d55e7 100644
--- a/src/llamafactory/extras/ploting.py
+++ b/src/llamafactory/extras/ploting.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 import math
 import os
diff --git a/src/llamafactory/hparams/__init__.py b/src/llamafactory/hparams/__init__.py
index d1ee98dd..cfe448c1 100644
--- a/src/llamafactory/hparams/__init__.py
+++ b/src/llamafactory/hparams/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .data_args import DataArguments
 from .evaluation_args import EvaluationArguments
 from .finetuning_args import FinetuningArguments
diff --git a/src/llamafactory/hparams/data_args.py b/src/llamafactory/hparams/data_args.py
index 1e0cd08c..95284766 100644
--- a/src/llamafactory/hparams/data_args.py
+++ b/src/llamafactory/hparams/data_args.py
@@ -1,3 +1,20 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's transformers library.
+# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import dataclass, field
 from typing import Literal, Optional
 
diff --git a/src/llamafactory/hparams/evaluation_args.py b/src/llamafactory/hparams/evaluation_args.py
index 5a05f6f6..a7f221ca 100644
--- a/src/llamafactory/hparams/evaluation_args.py
+++ b/src/llamafactory/hparams/evaluation_args.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 from dataclasses import dataclass, field
 from typing import Literal, Optional
diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py
index facbe792..52dc299e 100644
--- a/src/llamafactory/hparams/finetuning_args.py
+++ b/src/llamafactory/hparams/finetuning_args.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import dataclass, field
 from typing import List, Literal, Optional
 
diff --git a/src/llamafactory/hparams/generating_args.py b/src/llamafactory/hparams/generating_args.py
index 0ee17d1a..7ebb4eed 100644
--- a/src/llamafactory/hparams/generating_args.py
+++ b/src/llamafactory/hparams/generating_args.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import asdict, dataclass, field
 from typing import Any, Dict, Optional
 
diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index bbac2e4b..0a91f0fa 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -1,3 +1,20 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's transformers library.
+# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import asdict, dataclass, field
 from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Union
 
diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py
index 3476a726..1c57567c 100644
--- a/src/llamafactory/hparams/parser.py
+++ b/src/llamafactory/hparams/parser.py
@@ -1,3 +1,20 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's transformers library.
+# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import logging
 import os
 import sys
diff --git a/src/llamafactory/launcher.py b/src/llamafactory/launcher.py
index de154db9..65e0b68f 100644
--- a/src/llamafactory/launcher.py
+++ b/src/llamafactory/launcher.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from llamafactory.train.tuner import run_exp
 
 
diff --git a/src/llamafactory/model/__init__.py b/src/llamafactory/model/__init__.py
index 9d23d59f..4abbaa1b 100644
--- a/src/llamafactory/model/__init__.py
+++ b/src/llamafactory/model/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .loader import load_config, load_model, load_tokenizer
 from .model_utils.misc import find_all_linear_modules
 from .model_utils.valuehead import load_valuehead_params
diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py
index c37f6009..dfa71525 100644
--- a/src/llamafactory/model/adapter.py
+++ b/src/llamafactory/model/adapter.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import re
 from typing import TYPE_CHECKING
 
diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py
index 697a04e7..69cccd93 100644
--- a/src/llamafactory/model/loader.py
+++ b/src/llamafactory/model/loader.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Any, Dict, Optional, TypedDict
 
 from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForVision2Seq, AutoProcessor, AutoTokenizer
diff --git a/src/llamafactory/model/model_utils/attention.py b/src/llamafactory/model/model_utils/attention.py
index 2bd36fdc..8ff3807b 100644
--- a/src/llamafactory/model/model_utils/attention.py
+++ b/src/llamafactory/model/model_utils/attention.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING
 
 from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
diff --git a/src/llamafactory/model/model_utils/checkpointing.py b/src/llamafactory/model/model_utils/checkpointing.py
index e0657be8..e4e84b12 100644
--- a/src/llamafactory/model/model_utils/checkpointing.py
+++ b/src/llamafactory/model/model_utils/checkpointing.py
@@ -1,3 +1,21 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's Transformers and PEFT library.
+# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/modeling_utils.py
+# https://github.com/huggingface/peft/blob/v0.10.0/src/peft/utils/other.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import inspect
 from functools import partial
 from types import MethodType
@@ -68,7 +86,6 @@ def prepare_model_for_training(
         (1) cast the layernorm in fp32
         (2) make output embedding layer require grads
         (3) add the upcasting of the lm_head in fp32
-    Inspired by: https://github.com/huggingface/peft/blob/v0.7.1/src/peft/utils/other.py#L72
     """
     if model_args.upcast_layernorm:
         logger.info("Upcasting layernorm weights in float32.")
diff --git a/src/llamafactory/model/model_utils/embedding.py b/src/llamafactory/model/model_utils/embedding.py
index 3d9278e3..3ff79828 100644
--- a/src/llamafactory/model/model_utils/embedding.py
+++ b/src/llamafactory/model/model_utils/embedding.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import math
 from contextlib import nullcontext
 from typing import TYPE_CHECKING
diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py
index 4a8c562a..7af43dcf 100644
--- a/src/llamafactory/model/model_utils/longlora.py
+++ b/src/llamafactory/model/model_utils/longlora.py
@@ -1,3 +1,20 @@
+# Copyright 2024 EleutherAI, HuggingFace Inc., and the LlamaFactory team.
+#
+# This code is based on the EleutherAI's GPT-NeoX and HuggingFace's Transformers libraries.
+# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llama/modeling_llama.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import math
 from typing import TYPE_CHECKING, Optional, Tuple
 
diff --git a/src/llamafactory/model/model_utils/misc.py b/src/llamafactory/model/model_utils/misc.py
index 4851bd29..a2812228 100644
--- a/src/llamafactory/model/model_utils/misc.py
+++ b/src/llamafactory/model/model_utils/misc.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, List
 
 from ...extras.logging import get_logger
diff --git a/src/llamafactory/model/model_utils/mod.py b/src/llamafactory/model/model_utils/mod.py
index 5708a1a8..ec73af00 100644
--- a/src/llamafactory/model/model_utils/mod.py
+++ b/src/llamafactory/model/model_utils/mod.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING
 
 from ...extras.constants import MOD_SUPPORTED_MODELS
diff --git a/src/llamafactory/model/model_utils/moe.py b/src/llamafactory/model/model_utils/moe.py
index 8a73c844..5c7473aa 100644
--- a/src/llamafactory/model/model_utils/moe.py
+++ b/src/llamafactory/model/model_utils/moe.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Sequence
 
 import torch
diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py
index 02a54f07..9e6b9da4 100644
--- a/src/llamafactory/model/model_utils/quantization.py
+++ b/src/llamafactory/model/model_utils/quantization.py
@@ -1,3 +1,20 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's Optimum library.
+# https://github.com/huggingface/optimum/blob/v1.20.0/optimum/gptq/data.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import random
 from enum import Enum, unique
@@ -41,7 +58,6 @@ class QuantizationMethod(str, Enum):
 
 def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments") -> List[str]:
     r"""
-    Inspired by: https://github.com/huggingface/optimum/blob/v1.16.0/optimum/gptq/data.py#L133
     TODO: remove tokenizer.decode() https://github.com/huggingface/optimum/pull/1600
     """
     if os.path.isfile(model_args.export_quantization_dataset):
diff --git a/src/llamafactory/model/model_utils/rope.py b/src/llamafactory/model/model_utils/rope.py
index 93ab8929..88303c4d 100644
--- a/src/llamafactory/model/model_utils/rope.py
+++ b/src/llamafactory/model/model_utils/rope.py
@@ -1,3 +1,21 @@
+# Copyright 2024 LMSYS and the LlamaFactory team.
+# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
+#
+# This code is inspired by the LMSYS's FastChat library.
+# https://github.com/lm-sys/FastChat/blob/v0.2.30/fastchat/train/train.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import math
 from typing import TYPE_CHECKING
 
diff --git a/src/llamafactory/model/model_utils/unsloth.py b/src/llamafactory/model/model_utils/unsloth.py
index 8a16409d..9cfaec61 100644
--- a/src/llamafactory/model/model_utils/unsloth.py
+++ b/src/llamafactory/model/model_utils/unsloth.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Any, Dict, Optional
 
 from ...extras.logging import get_logger
diff --git a/src/llamafactory/model/model_utils/valuehead.py b/src/llamafactory/model/model_utils/valuehead.py
index 64333688..9ab3d45a 100644
--- a/src/llamafactory/model/model_utils/valuehead.py
+++ b/src/llamafactory/model/model_utils/valuehead.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Dict
 
 import torch
diff --git a/src/llamafactory/model/model_utils/visual.py b/src/llamafactory/model/model_utils/visual.py
index c8260b7f..37237485 100644
--- a/src/llamafactory/model/model_utils/visual.py
+++ b/src/llamafactory/model/model_utils/visual.py
@@ -1,3 +1,20 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's TRL library.
+# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llava/modeling_llava.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Tuple
 
 import torch
diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py
index b97ff433..053516e4 100644
--- a/src/llamafactory/model/patcher.py
+++ b/src/llamafactory/model/patcher.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 from types import MethodType
 from typing import TYPE_CHECKING, Any, Dict
diff --git a/src/llamafactory/train/dpo/__init__.py b/src/llamafactory/train/dpo/__init__.py
index 43fe9420..9ce0d089 100644
--- a/src/llamafactory/train/dpo/__init__.py
+++ b/src/llamafactory/train/dpo/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .workflow import run_dpo
 
 
diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py
index 5bdb9c43..475d08c3 100644
--- a/src/llamafactory/train/dpo/trainer.py
+++ b/src/llamafactory/train/dpo/trainer.py
@@ -1,3 +1,20 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's TRL library.
+# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/dpo_trainer.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import warnings
 from collections import defaultdict
 from contextlib import nullcontext
diff --git a/src/llamafactory/train/dpo/workflow.py b/src/llamafactory/train/dpo/workflow.py
index 992985b0..8c3c2eb1 100644
--- a/src/llamafactory/train/dpo/workflow.py
+++ b/src/llamafactory/train/dpo/workflow.py
@@ -1,4 +1,19 @@
-# Inspired by: https://github.com/huggingface/trl/blob/main/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's TRL library.
+# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/dpo.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 from typing import TYPE_CHECKING, List, Optional
 
diff --git a/src/llamafactory/train/kto/__init__.py b/src/llamafactory/train/kto/__init__.py
index 34c7905a..a1900368 100644
--- a/src/llamafactory/train/kto/__init__.py
+++ b/src/llamafactory/train/kto/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .workflow import run_kto
 
 
diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py
index 3b4488fc..6e96fc0c 100644
--- a/src/llamafactory/train/kto/trainer.py
+++ b/src/llamafactory/train/kto/trainer.py
@@ -1,3 +1,20 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's TRL library.
+# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/kto_trainer.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import warnings
 from collections import defaultdict
 from contextlib import nullcontext
diff --git a/src/llamafactory/train/kto/workflow.py b/src/llamafactory/train/kto/workflow.py
index c79b160b..8a7af6d4 100644
--- a/src/llamafactory/train/kto/workflow.py
+++ b/src/llamafactory/train/kto/workflow.py
@@ -1,3 +1,20 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's TRL library.
+# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/kto.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, List, Optional
 
 from ...data import KTODataCollatorWithPadding, get_dataset, split_dataset
diff --git a/src/llamafactory/train/ppo/__init__.py b/src/llamafactory/train/ppo/__init__.py
index d17336d5..161f6f5d 100644
--- a/src/llamafactory/train/ppo/__init__.py
+++ b/src/llamafactory/train/ppo/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .workflow import run_ppo
 
 
diff --git a/src/llamafactory/train/ppo/ppo_utils.py b/src/llamafactory/train/ppo/ppo_utils.py
index fec3fc1e..05c40946 100644
--- a/src/llamafactory/train/ppo/ppo_utils.py
+++ b/src/llamafactory/train/ppo/ppo_utils.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 from contextlib import nullcontext
 from typing import TYPE_CHECKING, Dict, List, Literal, Optional
diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py
index 45f47455..61420f3b 100644
--- a/src/llamafactory/train/ppo/trainer.py
+++ b/src/llamafactory/train/ppo/trainer.py
@@ -1,3 +1,20 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's TRL library.
+# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/ppo_trainer.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import math
 import os
 import sys
diff --git a/src/llamafactory/train/ppo/workflow.py b/src/llamafactory/train/ppo/workflow.py
index 111704c6..891d539a 100644
--- a/src/llamafactory/train/ppo/workflow.py
+++ b/src/llamafactory/train/ppo/workflow.py
@@ -1,4 +1,19 @@
-# Inspired by: https://github.com/lvwerra/trl/blob/main/examples/research_projects/stack_llama/scripts/rl_training.py
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's TRL library.
+# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/ppo.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 from typing import TYPE_CHECKING, List, Optional
 
diff --git a/src/llamafactory/train/pt/__init__.py b/src/llamafactory/train/pt/__init__.py
index bdf397f6..d80e6f22 100644
--- a/src/llamafactory/train/pt/__init__.py
+++ b/src/llamafactory/train/pt/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .workflow import run_pt
 
 
diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py
index 1d96e82f..09729f2e 100644
--- a/src/llamafactory/train/pt/trainer.py
+++ b/src/llamafactory/train/pt/trainer.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from types import MethodType
 from typing import TYPE_CHECKING, Dict, Optional
 
diff --git a/src/llamafactory/train/pt/workflow.py b/src/llamafactory/train/pt/workflow.py
index 8a635567..f1df314e 100644
--- a/src/llamafactory/train/pt/workflow.py
+++ b/src/llamafactory/train/pt/workflow.py
@@ -1,4 +1,19 @@
-# Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/language-modeling/run_clm.py
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's transformers library.
+# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import math
 from typing import TYPE_CHECKING, List, Optional
diff --git a/src/llamafactory/train/rm/__init__.py b/src/llamafactory/train/rm/__init__.py
index dedac35f..48278315 100644
--- a/src/llamafactory/train/rm/__init__.py
+++ b/src/llamafactory/train/rm/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .workflow import run_rm
 
 
diff --git a/src/llamafactory/train/rm/metric.py b/src/llamafactory/train/rm/metric.py
index 99dc6ab8..fb880b1c 100644
--- a/src/llamafactory/train/rm/metric.py
+++ b/src/llamafactory/train/rm/metric.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Dict, Sequence, Tuple, Union
 
 import numpy as np
diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py
index bfb344dc..14695d7d 100644
--- a/src/llamafactory/train/rm/trainer.py
+++ b/src/llamafactory/train/rm/trainer.py
@@ -1,3 +1,42 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# This code is inspired by CarperAI's trlx library.
+# https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/reward_model.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# MIT License
+#
+# Copyright (c) 2022 CarperAI
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
 import json
 import os
 from types import MethodType
@@ -79,7 +118,6 @@ class PairwiseTrainer(Trainer):
         chosen_scores, rejected_scores = [], []
 
         # Compute pairwise loss. Only backprop on the different tokens before padding
-        # Inspired by: https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/reward_model.py
         loss = 0
         for i in range(batch_size):
             chosen_length = (chosen_input_ids[i] != self.tokenizer.pad_token_id).nonzero()[-1] + 1
diff --git a/src/llamafactory/train/rm/workflow.py b/src/llamafactory/train/rm/workflow.py
index 2e9e194b..75c0a2bf 100644
--- a/src/llamafactory/train/rm/workflow.py
+++ b/src/llamafactory/train/rm/workflow.py
@@ -1,4 +1,41 @@
-# Inspired by: https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py
+# Copyright 2024 the LlamaFactory team.
+#
+# This code is inspired by CarperAI's trlx library.
+# https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# MIT License
+#
+# Copyright (c) 2022 CarperAI
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
 
 from typing import TYPE_CHECKING, List, Optional
 
diff --git a/src/llamafactory/train/sft/__init__.py b/src/llamafactory/train/sft/__init__.py
index f2f84e78..475dfe5f 100644
--- a/src/llamafactory/train/sft/__init__.py
+++ b/src/llamafactory/train/sft/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .workflow import run_sft
 
 
diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py
index 923238d6..d2147c22 100644
--- a/src/llamafactory/train/sft/metric.py
+++ b/src/llamafactory/train/sft/metric.py
@@ -1,3 +1,21 @@
+# Copyright 2024 HuggingFace Inc., THUDM, and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's transformers library and THUDM's ChatGLM implementation.
+# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py
+# https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Dict, Sequence, Tuple, Union
 
diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py
index 6bf5b7c0..6ab6914e 100644
--- a/src/llamafactory/train/sft/trainer.py
+++ b/src/llamafactory/train/sft/trainer.py
@@ -1,3 +1,20 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's transformers library.
+# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer_seq2seq.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 import os
 from types import MethodType
diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py
index a989b3f7..dfc71cfb 100644
--- a/src/llamafactory/train/sft/workflow.py
+++ b/src/llamafactory/train/sft/workflow.py
@@ -1,4 +1,19 @@
-# Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/summarization/run_summarization.py
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by HuggingFace's transformers library.
+# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 from typing import TYPE_CHECKING, List, Optional
 
diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py
index 48944a63..5621d5df 100644
--- a/src/llamafactory/train/trainer_utils.py
+++ b/src/llamafactory/train/trainer_utils.py
@@ -1,3 +1,22 @@
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by the GaLore's implementation: https://github.com/jiaweizzhao/GaLore
+# and the LoRA+'s implementation: https://github.com/nikhil-ghosh-berkeley/loraplus
+# and the BAdam's implementation: https://github.com/Ledzy/BAdam
+# and the TRL's implementation: https://github.com/huggingface/trl
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
diff --git a/src/llamafactory/train/tuner.py b/src/llamafactory/train/tuner.py
index eed875e9..788b4c4f 100644
--- a/src/llamafactory/train/tuner.py
+++ b/src/llamafactory/train/tuner.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 import torch
diff --git a/src/llamafactory/webui/chatter.py b/src/llamafactory/webui/chatter.py
index c82710d3..864c41c7 100644
--- a/src/llamafactory/webui/chatter.py
+++ b/src/llamafactory/webui/chatter.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 import os
 from typing import TYPE_CHECKING, Dict, Generator, List, Optional, Sequence, Tuple
diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py
index 3b8d5378..980428a4 100644
--- a/src/llamafactory/webui/common.py
+++ b/src/llamafactory/webui/common.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 import os
 from collections import defaultdict
diff --git a/src/llamafactory/webui/components/__init__.py b/src/llamafactory/webui/components/__init__.py
index 5c1e21b8..715fb6e4 100644
--- a/src/llamafactory/webui/components/__init__.py
+++ b/src/llamafactory/webui/components/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .chatbot import create_chat_box
 from .eval import create_eval_tab
 from .export import create_export_tab
diff --git a/src/llamafactory/webui/components/chatbot.py b/src/llamafactory/webui/components/chatbot.py
index f83694b1..ad74114b 100644
--- a/src/llamafactory/webui/components/chatbot.py
+++ b/src/llamafactory/webui/components/chatbot.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Dict, Tuple
 
 from ...data import Role
diff --git a/src/llamafactory/webui/components/data.py b/src/llamafactory/webui/components/data.py
index 232b973d..88e500cf 100644
--- a/src/llamafactory/webui/components/data.py
+++ b/src/llamafactory/webui/components/data.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 import os
 from typing import TYPE_CHECKING, Any, Dict, List, Tuple
diff --git a/src/llamafactory/webui/components/eval.py b/src/llamafactory/webui/components/eval.py
index 0a7a0f44..b522913e 100644
--- a/src/llamafactory/webui/components/eval.py
+++ b/src/llamafactory/webui/components/eval.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Dict
 
 from ...extras.packages import is_gradio_available
diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py
index 9d756a38..14257949 100644
--- a/src/llamafactory/webui/components/export.py
+++ b/src/llamafactory/webui/components/export.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Dict, Generator, List, Union
 
 from ...extras.constants import PEFT_METHODS
diff --git a/src/llamafactory/webui/components/infer.py b/src/llamafactory/webui/components/infer.py
index 970f4629..03bccd7f 100644
--- a/src/llamafactory/webui/components/infer.py
+++ b/src/llamafactory/webui/components/infer.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Dict
 
 from ...extras.packages import is_gradio_available
diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py
index fd0ead3d..2515a83d 100644
--- a/src/llamafactory/webui/components/top.py
+++ b/src/llamafactory/webui/components/top.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Dict
 
 from ...data import TEMPLATES
diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py
index 72dfc858..673f6bf4 100644
--- a/src/llamafactory/webui/components/train.py
+++ b/src/llamafactory/webui/components/train.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Dict
 
 from transformers.trainer_utils import SchedulerType
diff --git a/src/llamafactory/webui/css.py b/src/llamafactory/webui/css.py
index 36e3d4c2..53982119 100644
--- a/src/llamafactory/webui/css.py
+++ b/src/llamafactory/webui/css.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 CSS = r"""
 .duplicate-button {
   margin: auto !important;
diff --git a/src/llamafactory/webui/engine.py b/src/llamafactory/webui/engine.py
index eb6142d3..04893215 100644
--- a/src/llamafactory/webui/engine.py
+++ b/src/llamafactory/webui/engine.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Any, Dict
 
 from .chatter import WebChatModel
diff --git a/src/llamafactory/webui/interface.py b/src/llamafactory/webui/interface.py
index bae3ba76..d25f4d38 100644
--- a/src/llamafactory/webui/interface.py
+++ b/src/llamafactory/webui/interface.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 
 from ..extras.packages import is_gradio_available
diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py
index e30feab2..427f01b8 100644
--- a/src/llamafactory/webui/locales.py
+++ b/src/llamafactory/webui/locales.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 LOCALES = {
     "lang": {
         "en": {
diff --git a/src/llamafactory/webui/manager.py b/src/llamafactory/webui/manager.py
index 326fdb8d..7e9b801a 100644
--- a/src/llamafactory/webui/manager.py
+++ b/src/llamafactory/webui/manager.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import TYPE_CHECKING, Dict, Generator, List, Set, Tuple
 
 
diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index 35014628..76982934 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 from copy import deepcopy
 from subprocess import Popen, TimeoutExpired
diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py
index e39f2aa4..6ce2a8e7 100644
--- a/src/llamafactory/webui/utils.py
+++ b/src/llamafactory/webui/utils.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import json
 import os
 import signal
diff --git a/src/train.py b/src/train.py
index b20aa9d2..6703ffdb 100644
--- a/src/train.py
+++ b/src/train.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from llamafactory.train.tuner import run_exp
 
 
diff --git a/src/webui.py b/src/webui.py
index bbefb54e..99370af2 100644
--- a/src/webui.py
+++ b/src/webui.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 
 from llamafactory.webui.interface import create_ui
diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py
index 63a3453f..a72800d2 100644
--- a/tests/data/test_supervised.py
+++ b/tests/data/test_supervised.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import random
 
diff --git a/tests/eval/test_eval_template.py b/tests/eval/test_eval_template.py
index f6a91a67..f85d9d57 100644
--- a/tests/eval/test_eval_template.py
+++ b/tests/eval/test_eval_template.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from llamafactory.eval.template import get_eval_template
 
 
diff --git a/tests/model/model_utils/test_attention.py b/tests/model/model_utils/test_attention.py
index 751adda4..97ac9dcc 100644
--- a/tests/model/model_utils/test_attention.py
+++ b/tests/model/model_utils/test_attention.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 
 from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
diff --git a/tests/model/test_base.py b/tests/model/test_base.py
index 32a3918e..462e8cfa 100644
--- a/tests/model/test_base.py
+++ b/tests/model/test_base.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 
 import torch
diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py
index a0618315..ac5a023c 100644
--- a/tests/model/test_freeze.py
+++ b/tests/model/test_freeze.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 
 import torch
diff --git a/tests/model/test_full.py b/tests/model/test_full.py
index 802b987c..bcd6480f 100644
--- a/tests/model/test_full.py
+++ b/tests/model/test_full.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 
 import torch
diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py
index 3e2503f1..e49c026c 100644
--- a/tests/model/test_lora.py
+++ b/tests/model/test_lora.py
@@ -1,3 +1,17 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 from typing import Sequence
 

From 61aaab22c93fe89145f1bea926b00004352bd7fc Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 15 Jun 2024 17:58:52 +0800
Subject: [PATCH 152/162] add minicpm #4227

Former-commit-id: e1bb18ce60be9a1b203989def30f1b9194286325
---
 src/llamafactory/extras/constants.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index e31e7419..73a9969d 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -682,6 +682,21 @@ register_model_group(
 )
 
 
+register_model_group(
+    models={
+        "MiniCPM-2B-SFT-Chat": {
+            DownloadSource.DEFAULT: "openbmb/MiniCPM-2B-sft-bf16",
+            DownloadSource.MODELSCOPE: "OpenBMB/miniCPM-bf16",
+        },
+        "MiniCPM-2B-DPO-Chat": {
+            DownloadSource.DEFAULT: "openbmb/MiniCPM-2B-dpo-bf16",
+            DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-2B-dpo-bf16",
+        },
+    },
+    template="cpm",
+)
+
+
 register_model_group(
     models={
         "Mistral-7B-v0.1": {

From 4851ef85b795f2c03d1d90f9fb57caa2d1f59258 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 15 Jun 2024 19:51:20 +0800
Subject: [PATCH 153/162] add tests

Former-commit-id: 484634ee9c982e82e919ff67d507e0210345182d
---
 Makefile                                      |  2 +-
 src/llamafactory/extras/misc.py               |  9 ++-
 tests/data/test_supervised.py                 |  2 +-
 tests/model/model_utils/test_checkpointing.py | 74 +++++++++++++++++++
 tests/model/test_base.py                      | 30 +++++++-
 tests/model/test_freeze.py                    |  3 +
 tests/model/test_full.py                      |  2 +
 tests/model/test_lora.py                      | 58 +++++++++++++--
 8 files changed, 166 insertions(+), 14 deletions(-)
 create mode 100644 tests/model/model_utils/test_checkpointing.py

diff --git a/Makefile b/Makefile
index 65be047b..3f13b215 100644
--- a/Makefile
+++ b/Makefile
@@ -11,4 +11,4 @@ style:
 	ruff format $(check_dirs)
 
 test:
-	pytest tests/
+	CUDA_VISIBLE_DEVICES= pytest tests/
diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py
index 3d969df1..93153b3e 100644
--- a/src/llamafactory/extras/misc.py
+++ b/src/llamafactory/extras/misc.py
@@ -22,6 +22,7 @@ from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList, PreTr
 from transformers.utils import (
     SAFE_WEIGHTS_NAME,
     WEIGHTS_NAME,
+    is_safetensors_available,
     is_torch_bf16_gpu_available,
     is_torch_cuda_available,
     is_torch_mps_available,
@@ -34,6 +35,11 @@ from .constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME
 from .logging import get_logger
 
 
+if is_safetensors_available():
+    from safetensors import safe_open
+    from safetensors.torch import save_file
+
+
 _is_fp16_available = is_torch_npu_available() or is_torch_cuda_available()
 try:
     _is_bf16_available = is_torch_bf16_gpu_available()
@@ -128,9 +134,6 @@ def fix_valuehead_checkpoint(
         return
 
     if safe_serialization:
-        from safetensors import safe_open
-        from safetensors.torch import save_file
-
         path_to_checkpoint = os.path.join(output_dir, SAFE_WEIGHTS_NAME)
         with safe_open(path_to_checkpoint, framework="pt", device="cpu") as f:
             state_dict: Dict[str, torch.Tensor] = {key: f.get_tensor(key) for key in f.keys()}
diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py
index a72800d2..9f7b2dbf 100644
--- a/tests/data/test_supervised.py
+++ b/tests/data/test_supervised.py
@@ -41,7 +41,7 @@ TRAIN_ARGS = {
 }
 
 
-@pytest.mark.parametrize("num_samples", [10])
+@pytest.mark.parametrize("num_samples", [16])
 def test_supervised(num_samples: int):
     model_args, data_args, training_args, _, _ = get_train_args(TRAIN_ARGS)
     tokenizer_module = load_tokenizer(model_args)
diff --git a/tests/model/model_utils/test_checkpointing.py b/tests/model/model_utils/test_checkpointing.py
new file mode 100644
index 00000000..670e693d
--- /dev/null
+++ b/tests/model/model_utils/test_checkpointing.py
@@ -0,0 +1,74 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import torch
+
+from llamafactory.extras.misc import get_current_device
+from llamafactory.hparams import get_train_args
+from llamafactory.model import load_model, load_tokenizer
+
+
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
+
+TRAIN_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "stage": "sft",
+    "do_train": True,
+    "finetuning_type": "lora",
+    "lora_target": "all",
+    "dataset": "llamafactory/tiny-supervised-dataset",
+    "dataset_dir": "ONLINE",
+    "template": "llama3",
+    "cutoff_len": 1024,
+    "overwrite_cache": True,
+    "output_dir": "dummy_dir",
+    "overwrite_output_dir": True,
+    "fp16": True,
+}
+
+
+def test_checkpointing_enable():
+    model_args, _, _, finetuning_args, _ = get_train_args({"disable_gradient_checkpointing": False, **TRAIN_ARGS})
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+    for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()):
+        assert getattr(module, "gradient_checkpointing") is True
+
+
+def test_checkpointing_disable():
+    model_args, _, _, finetuning_args, _ = get_train_args({"disable_gradient_checkpointing": True, **TRAIN_ARGS})
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+    for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()):
+        assert getattr(module, "gradient_checkpointing") is False
+
+
+def test_upcast_layernorm():
+    model_args, _, _, finetuning_args, _ = get_train_args({"upcast_layernorm": True, **TRAIN_ARGS})
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+    for name, param in model.named_parameters():
+        if param.ndim == 1 and "norm" in name:
+            assert param.dtype == torch.float32
+
+
+def test_upcast_lmhead_output():
+    model_args, _, _, finetuning_args, _ = get_train_args({"upcast_lmhead_output": True, **TRAIN_ARGS})
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+    inputs = torch.randn((1, 16), dtype=torch.float16, device=get_current_device())
+    outputs: "torch.Tensor" = model.lm_head(inputs)
+    assert outputs.dtype == torch.float32
diff --git a/tests/model/test_base.py b/tests/model/test_base.py
index 462e8cfa..ee0b2886 100644
--- a/tests/model/test_base.py
+++ b/tests/model/test_base.py
@@ -13,16 +13,21 @@
 # limitations under the License.
 
 import os
+from typing import Dict
 
 import torch
 from transformers import AutoModelForCausalLM
+from trl import AutoModelForCausalLMWithValueHead
 
+from llamafactory.extras.misc import get_current_device
 from llamafactory.hparams import get_infer_args
 from llamafactory.model import load_model, load_tokenizer
 
 
 TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
 
+TINY_LLAMA_VALUEHEAD = os.environ.get("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead")
+
 INFER_ARGS = {
     "model_name_or_path": TINY_LLAMA,
     "template": "llama3",
@@ -38,9 +43,32 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"):
         assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True
 
 
+def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]):
+    state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")}
+    self.v_head.load_state_dict(state_dict, strict=False)
+    del state_dict
+
+
 def test_base():
     model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False)
-    ref_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device)
+
+    ref_model = AutoModelForCausalLM.from_pretrained(
+        TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device()
+    )
+    compare_model(model, ref_model)
+
+
+def test_valuehead():
+    AutoModelForCausalLMWithValueHead.post_init = post_init  # patch for CPU test
+    model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(
+        tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False, add_valuehead=True
+    )
+
+    ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(
+        TINY_LLAMA_VALUEHEAD, torch_dtype=torch.float16, device_map=get_current_device()
+    )
     compare_model(model, ref_model)
diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py
index ac5a023c..5f478af6 100644
--- a/tests/model/test_freeze.py
+++ b/tests/model/test_freeze.py
@@ -49,6 +49,7 @@ def test_freeze_train_all_modules():
     model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS})
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+
     for name, param in model.named_parameters():
         if name.startswith("model.layers.1."):
             assert param.requires_grad is True
@@ -64,6 +65,7 @@ def test_freeze_train_extra_modules():
     )
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+
     for name, param in model.named_parameters():
         if name.startswith("model.layers.1.") or any(module in name for module in ["embed_tokens", "lm_head"]):
             assert param.requires_grad is True
@@ -77,6 +79,7 @@ def test_freeze_inference():
     model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False)
+
     for param in model.parameters():
         assert param.requires_grad is False
         assert param.dtype == torch.float16
diff --git a/tests/model/test_full.py b/tests/model/test_full.py
index bcd6480f..0a6e0743 100644
--- a/tests/model/test_full.py
+++ b/tests/model/test_full.py
@@ -49,6 +49,7 @@ def test_full_train():
     model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS)
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+
     for param in model.parameters():
         assert param.requires_grad is True
         assert param.dtype == torch.float32
@@ -58,6 +59,7 @@ def test_full_inference():
     model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False)
+
     for param in model.parameters():
         assert param.requires_grad is False
         assert param.dtype == torch.float16
diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py
index e49c026c..4923c8ad 100644
--- a/tests/model/test_lora.py
+++ b/tests/model/test_lora.py
@@ -18,7 +18,9 @@ from typing import Sequence
 import torch
 from peft import LoraModel, PeftModel
 from transformers import AutoModelForCausalLM
+from trl import AutoModelForCausalLMWithValueHead
 
+from llamafactory.extras.misc import get_current_device
 from llamafactory.hparams import get_infer_args, get_train_args
 from llamafactory.model import load_model, load_tokenizer
 
@@ -27,6 +29,8 @@ TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
 
 TINY_LLAMA_ADAPTER = os.environ.get("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora")
 
+TINY_LLAMA_VALUEHEAD = os.environ.get("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead")
+
 TRAIN_ARGS = {
     "model_name_or_path": TINY_LLAMA,
     "stage": "sft",
@@ -67,10 +71,29 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k
             assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True
 
 
+def test_lora_train_qv_modules():
+    model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "q_proj,v_proj", **TRAIN_ARGS})
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+
+    linear_modules = set()
+    for name, param in model.named_parameters():
+        if any(module in name for module in ["lora_A", "lora_B"]):
+            linear_modules.add(name.split(".lora_", maxsplit=1)[0].split(".")[-1])
+            assert param.requires_grad is True
+            assert param.dtype == torch.float32
+        else:
+            assert param.requires_grad is False
+            assert param.dtype == torch.float16
+
+    assert linear_modules == {"q_proj", "v_proj"}
+
+
 def test_lora_train_all_modules():
     model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS})
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+
     linear_modules = set()
     for name, param in model.named_parameters():
         if any(module in name for module in ["lora_A", "lora_B"]):
@@ -90,6 +113,7 @@ def test_lora_train_extra_modules():
     )
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+
     extra_modules = set()
     for name, param in model.named_parameters():
         if any(module in name for module in ["lora_A", "lora_B"]):
@@ -113,7 +137,9 @@ def test_lora_train_old_adapters():
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
 
-    base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device)
+    base_model = AutoModelForCausalLM.from_pretrained(
+        TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device()
+    )
     ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True)
     for param in filter(lambda p: p.requires_grad, ref_model.parameters()):
         param.data = param.data.to(torch.float32)
@@ -128,7 +154,9 @@ def test_lora_train_new_adapters():
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
 
-    base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device)
+    base_model = AutoModelForCausalLM.from_pretrained(
+        TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device()
+    )
     ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True)
     for param in filter(lambda p: p.requires_grad, ref_model.parameters()):
         param.data = param.data.to(torch.float32)
@@ -138,17 +166,31 @@ def test_lora_train_new_adapters():
     )
 
 
+def test_lora_train_valuehead():
+    model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(
+        tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True, add_valuehead=True
+    )
+
+    ref_model: "AutoModelForCausalLMWithValueHead" = AutoModelForCausalLMWithValueHead.from_pretrained(
+        TINY_LLAMA_VALUEHEAD, torch_dtype=torch.float16, device_map=get_current_device()
+    )
+    state_dict = model.state_dict()
+    ref_state_dict = ref_model.state_dict()
+
+    assert torch.allclose(state_dict["v_head.summary.weight"], ref_state_dict["v_head.summary.weight"])
+    assert torch.allclose(state_dict["v_head.summary.bias"], ref_state_dict["v_head.summary.bias"])
+
+
 def test_lora_inference():
     model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False)
 
-    base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device)
+    base_model = AutoModelForCausalLM.from_pretrained(
+        TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device()
+    )
     ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER)
     ref_model = ref_model.merge_and_unload()
     compare_model(model, ref_model)
-
-    for name, param in model.named_parameters():
-        assert param.requires_grad is False
-        assert param.dtype == torch.float16
-        assert "lora" not in name

From f9653ac29c36aa44284d07c102d2fe979b796391 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sat, 15 Jun 2024 20:06:17 +0800
Subject: [PATCH 154/162] use fixture

Former-commit-id: 10761985691b9f934f7689c1f82aa6dd68febcca
---
 src/llamafactory/hparams/model_args.py |  2 +-
 tests/model/test_base.py               | 15 ++++++++++-----
 tests/model/test_lora.py               | 14 +++++++++++++-
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 0a91f0fa..53bdbdf2 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -163,7 +163,7 @@ class ModelArguments:
     )
     infer_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field(
         default="auto",
-        metadata={"help": "Data type for model weights and activations at inference."}
+        metadata={"help": "Data type for model weights and activations at inference."},
     )
     hf_hub_token: Optional[str] = field(
         default=None,
diff --git a/tests/model/test_base.py b/tests/model/test_base.py
index ee0b2886..2deedde2 100644
--- a/tests/model/test_base.py
+++ b/tests/model/test_base.py
@@ -15,6 +15,7 @@
 import os
 from typing import Dict
 
+import pytest
 import torch
 from transformers import AutoModelForCausalLM
 from trl import AutoModelForCausalLMWithValueHead
@@ -43,10 +44,14 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"):
         assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True
 
 
-def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]):
-    state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")}
-    self.v_head.load_state_dict(state_dict, strict=False)
-    del state_dict
+@pytest.fixture
+def fix_valuehead_cpu_loading():
+    def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]):
+        state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")}
+        self.v_head.load_state_dict(state_dict, strict=False)
+        del state_dict
+
+    AutoModelForCausalLMWithValueHead.post_init = post_init
 
 
 def test_base():
@@ -60,8 +65,8 @@ def test_base():
     compare_model(model, ref_model)
 
 
+@pytest.mark.usefixtures("fix_valuehead_cpu_loading")
 def test_valuehead():
-    AutoModelForCausalLMWithValueHead.post_init = post_init  # patch for CPU test
     model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(
diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py
index 4923c8ad..fe032332 100644
--- a/tests/model/test_lora.py
+++ b/tests/model/test_lora.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 import os
-from typing import Sequence
+from typing import Dict, Sequence
 
+import pytest
 import torch
 from peft import LoraModel, PeftModel
 from transformers import AutoModelForCausalLM
@@ -71,6 +72,16 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k
             assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True
 
 
+@pytest.fixture
+def fix_valuehead_cpu_loading():
+    def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]):
+        state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")}
+        self.v_head.load_state_dict(state_dict, strict=False)
+        del state_dict
+
+    AutoModelForCausalLMWithValueHead.post_init = post_init
+
+
 def test_lora_train_qv_modules():
     model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "q_proj,v_proj", **TRAIN_ARGS})
     tokenizer_module = load_tokenizer(model_args)
@@ -166,6 +177,7 @@ def test_lora_train_new_adapters():
     )
 
 
+@pytest.mark.usefixtures("fix_valuehead_cpu_loading")
 def test_lora_train_valuehead():
     model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
     tokenizer_module = load_tokenizer(model_args)

From 640372cb66f0c06e5bb3cd02f322fdaf0f67219b Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sun, 16 Jun 2024 01:06:41 +0800
Subject: [PATCH 155/162] tiny fix

Former-commit-id: f7f440986b0ae3b38ea9f2da80789629d4f79ea1
---
 scripts/cal_flops.py                                | 2 +-
 scripts/cal_lr.py                                   | 2 +-
 scripts/llama_pro.py                                | 2 +-
 src/llamafactory/data/processors/pretrain.py        | 2 +-
 src/llamafactory/eval/evaluator.py                  | 2 +-
 src/llamafactory/extras/packages.py                 | 2 +-
 src/llamafactory/hparams/data_args.py               | 2 +-
 src/llamafactory/model/model_utils/checkpointing.py | 2 +-
 src/llamafactory/model/model_utils/longlora.py      | 6 ++++--
 src/llamafactory/model/model_utils/quantization.py  | 2 +-
 src/llamafactory/model/model_utils/visual.py        | 2 +-
 src/llamafactory/train/dpo/workflow.py              | 2 +-
 src/llamafactory/train/kto/trainer.py               | 4 ++--
 src/llamafactory/train/kto/workflow.py              | 2 +-
 src/llamafactory/train/ppo/trainer.py               | 2 +-
 src/llamafactory/train/ppo/workflow.py              | 2 +-
 src/llamafactory/train/pt/workflow.py               | 2 +-
 src/llamafactory/train/rm/trainer.py                | 4 ++--
 src/llamafactory/train/rm/workflow.py               | 2 +-
 src/llamafactory/train/sft/metric.py                | 2 +-
 src/llamafactory/train/sft/workflow.py              | 2 +-
 tests/model/test_base.py                            | 2 +-
 22 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/scripts/cal_flops.py b/scripts/cal_flops.py
index 627b5534..32526d89 100644
--- a/scripts/cal_flops.py
+++ b/scripts/cal_flops.py
@@ -1,7 +1,7 @@
 # coding=utf-8
 # Copyright 2024 Microsoft Corporation and the LlamaFactory team.
 #
-# This code is inspired by Microsoft's DeepSpeed library.
+# This code is inspired by the Microsoft's DeepSpeed library.
 # https://www.deepspeed.ai/tutorials/flops-profiler/
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/scripts/cal_lr.py b/scripts/cal_lr.py
index ff21d27c..ad6992cb 100644
--- a/scripts/cal_lr.py
+++ b/scripts/cal_lr.py
@@ -1,7 +1,7 @@
 # coding=utf-8
 # Copyright 2024 imoneoi and the LlamaFactory team.
 #
-# This code is inspired by imoneoi's OpenChat library.
+# This code is inspired by the imoneoi's OpenChat library.
 # https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py
index f315335a..395375ef 100644
--- a/scripts/llama_pro.py
+++ b/scripts/llama_pro.py
@@ -1,7 +1,7 @@
 # coding=utf-8
 # Copyright 2024 Tencent Inc. and the LlamaFactory team.
 #
-# This code is inspired by Tencent's LLaMA-Pro library.
+# This code is inspired by the Tencent's LLaMA-Pro library.
 # https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py
index fb4c840c..67d6009b 100644
--- a/src/llamafactory/data/processors/pretrain.py
+++ b/src/llamafactory/data/processors/pretrain.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's transformers library.
+# This code is inspired by the HuggingFace's transformers library.
 # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/eval/evaluator.py b/src/llamafactory/eval/evaluator.py
index bbd7a44b..d3140793 100644
--- a/src/llamafactory/eval/evaluator.py
+++ b/src/llamafactory/eval/evaluator.py
@@ -1,6 +1,6 @@
 # Copyright 2024 the LlamaFactory team.
 #
-# This code is inspired by Dan's test library.
+# This code is inspired by the Dan's test library.
 # https://github.com/hendrycks/test/blob/master/evaluate_flan.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py
index 35f546ab..0a84a293 100644
--- a/src/llamafactory/extras/packages.py
+++ b/src/llamafactory/extras/packages.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's transformers library.
+# This code is inspired by the HuggingFace's transformers library.
 # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/utils/import_utils.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/hparams/data_args.py b/src/llamafactory/hparams/data_args.py
index 95284766..39290e21 100644
--- a/src/llamafactory/hparams/data_args.py
+++ b/src/llamafactory/hparams/data_args.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's transformers library.
+# This code is inspired by the HuggingFace's transformers library.
 # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/model/model_utils/checkpointing.py b/src/llamafactory/model/model_utils/checkpointing.py
index e4e84b12..f5314125 100644
--- a/src/llamafactory/model/model_utils/checkpointing.py
+++ b/src/llamafactory/model/model_utils/checkpointing.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's Transformers and PEFT library.
+# This code is inspired by the HuggingFace's Transformers and PEFT library.
 # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/modeling_utils.py
 # https://github.com/huggingface/peft/blob/v0.10.0/src/peft/utils/other.py
 #
diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py
index 7af43dcf..af30bd50 100644
--- a/src/llamafactory/model/model_utils/longlora.py
+++ b/src/llamafactory/model/model_utils/longlora.py
@@ -1,7 +1,9 @@
-# Copyright 2024 EleutherAI, HuggingFace Inc., and the LlamaFactory team.
+# Copyright 2024 EleutherAI, HuggingFace Inc., Yukang Chen, and the LlamaFactory team.
 #
-# This code is based on the EleutherAI's GPT-NeoX and HuggingFace's Transformers libraries.
+# This code is based on the EleutherAI's GPT-NeoX and the HuggingFace's Transformers libraries.
 # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llama/modeling_llama.py
+# This code is also inspired by the original LongLoRA implementation.
+# https://github.com/dvlab-research/LongLoRA/blob/main/llama_attn_replace.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py
index 9e6b9da4..0a0fca34 100644
--- a/src/llamafactory/model/model_utils/quantization.py
+++ b/src/llamafactory/model/model_utils/quantization.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's Optimum library.
+# This code is inspired by the HuggingFace's Optimum library.
 # https://github.com/huggingface/optimum/blob/v1.20.0/optimum/gptq/data.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/model/model_utils/visual.py b/src/llamafactory/model/model_utils/visual.py
index 37237485..700bf470 100644
--- a/src/llamafactory/model/model_utils/visual.py
+++ b/src/llamafactory/model/model_utils/visual.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's TRL library.
+# This code is inspired by the HuggingFace's Transformers library.
 # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llava/modeling_llava.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/train/dpo/workflow.py b/src/llamafactory/train/dpo/workflow.py
index 8c3c2eb1..431b5285 100644
--- a/src/llamafactory/train/dpo/workflow.py
+++ b/src/llamafactory/train/dpo/workflow.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's TRL library.
+# This code is inspired by the HuggingFace's TRL library.
 # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/dpo.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py
index 6e96fc0c..91d68975 100644
--- a/src/llamafactory/train/kto/trainer.py
+++ b/src/llamafactory/train/kto/trainer.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's TRL library.
+# This code is inspired by the HuggingFace's TRL library.
 # https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/kto_trainer.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -114,8 +114,8 @@ class CustomKTOTrainer(KTOTrainer):
 
     def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None:
         super()._save(output_dir, state_dict)
+        output_dir = output_dir if output_dir is not None else self.args.output_dir
         if self.processor is not None:
-            output_dir = output_dir if output_dir is not None else self.args.output_dir
             getattr(self.processor, "image_processor").save_pretrained(output_dir)
 
     def forward(
diff --git a/src/llamafactory/train/kto/workflow.py b/src/llamafactory/train/kto/workflow.py
index 8a7af6d4..8182a184 100644
--- a/src/llamafactory/train/kto/workflow.py
+++ b/src/llamafactory/train/kto/workflow.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's TRL library.
+# This code is inspired by the HuggingFace's TRL library.
 # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/kto.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py
index 61420f3b..df4a37be 100644
--- a/src/llamafactory/train/ppo/trainer.py
+++ b/src/llamafactory/train/ppo/trainer.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's TRL library.
+# This code is inspired by the HuggingFace's TRL library.
 # https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/ppo_trainer.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/train/ppo/workflow.py b/src/llamafactory/train/ppo/workflow.py
index 891d539a..4f4d2820 100644
--- a/src/llamafactory/train/ppo/workflow.py
+++ b/src/llamafactory/train/ppo/workflow.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's TRL library.
+# This code is inspired by the HuggingFace's TRL library.
 # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/ppo.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/train/pt/workflow.py b/src/llamafactory/train/pt/workflow.py
index f1df314e..b84a0e7d 100644
--- a/src/llamafactory/train/pt/workflow.py
+++ b/src/llamafactory/train/pt/workflow.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's transformers library.
+# This code is inspired by the HuggingFace's transformers library.
 # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py
index 14695d7d..7f91e5f5 100644
--- a/src/llamafactory/train/rm/trainer.py
+++ b/src/llamafactory/train/rm/trainer.py
@@ -1,6 +1,6 @@
 # Copyright 2024 the LlamaFactory team.
 #
-# This code is inspired by CarperAI's trlx library.
+# This code is inspired by the CarperAI's trlx library.
 # https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/reward_model.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -89,8 +89,8 @@ class PairwiseTrainer(Trainer):
 
     def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None:
         super()._save(output_dir, state_dict)
+        output_dir = output_dir if output_dir is not None else self.args.output_dir
         if self.processor is not None:
-            output_dir = output_dir if output_dir is not None else self.args.output_dir
             getattr(self.processor, "image_processor").save_pretrained(output_dir)
 
     def compute_loss(
diff --git a/src/llamafactory/train/rm/workflow.py b/src/llamafactory/train/rm/workflow.py
index 75c0a2bf..6f24e964 100644
--- a/src/llamafactory/train/rm/workflow.py
+++ b/src/llamafactory/train/rm/workflow.py
@@ -1,6 +1,6 @@
 # Copyright 2024 the LlamaFactory team.
 #
-# This code is inspired by CarperAI's trlx library.
+# This code is inspired by the CarperAI's trlx library.
 # https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py
index d2147c22..95bfcb69 100644
--- a/src/llamafactory/train/sft/metric.py
+++ b/src/llamafactory/train/sft/metric.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc., THUDM, and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's transformers library and THUDM's ChatGLM implementation.
+# This code is inspired by the HuggingFace's transformers library and the THUDM's ChatGLM implementation.
 # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py
 # https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py
 #
diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py
index dfc71cfb..885bc7ac 100644
--- a/src/llamafactory/train/sft/workflow.py
+++ b/src/llamafactory/train/sft/workflow.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's transformers library.
+# This code is inspired by the HuggingFace's transformers library.
 # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tests/model/test_base.py b/tests/model/test_base.py
index 2deedde2..954492ef 100644
--- a/tests/model/test_base.py
+++ b/tests/model/test_base.py
@@ -41,7 +41,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"):
     state_dict_b = model_b.state_dict()
     assert set(state_dict_a.keys()) == set(state_dict_b.keys())
     for name in state_dict_a.keys():
-        assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True
+        assert torch.allclose(state_dict_a[name], state_dict_b[name])
 
 
 @pytest.fixture

From 0b571f84b457bc6dd35fffa55e5f4658a210f743 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sun, 16 Jun 2024 01:08:12 +0800
Subject: [PATCH 156/162] support pissa

Former-commit-id: ef8e45f2eaf466c54e9a671512a2974575677b08
---
 README.md                                   |  6 +-
 README_zh.md                                |  6 +-
 examples/README.md                          |  6 ++
 examples/README_zh.md                       |  6 ++
 examples/extras/pissa/llama3_lora_sft.yaml  | 42 ++++++++++
 scripts/loftq_init.py                       | 72 +++++++----------
 scripts/pissa_init.py                       | 79 ++++++++++++++++++
 src/llamafactory/hparams/finetuning_args.py | 20 ++++-
 src/llamafactory/hparams/model_args.py      |  8 +-
 src/llamafactory/hparams/parser.py          |  5 +-
 src/llamafactory/model/adapter.py           | 25 ++++--
 src/llamafactory/train/dpo/trainer.py       | 13 ++-
 src/llamafactory/train/pt/trainer.py        | 12 ++-
 src/llamafactory/train/sft/trainer.py       | 13 ++-
 src/llamafactory/train/trainer_utils.py     | 54 ++++++++++++-
 src/llamafactory/webui/components/train.py  |  9 ++-
 src/llamafactory/webui/locales.py           | 14 ++++
 src/llamafactory/webui/runner.py            |  2 +
 tests/model/test_pissa.py                   | 90 +++++++++++++++++++++
 19 files changed, 406 insertions(+), 76 deletions(-)
 create mode 100644 examples/extras/pissa/llama3_lora_sft.yaml
 create mode 100644 scripts/pissa_init.py
 create mode 100644 tests/model/test_pissa.py

diff --git a/README.md b/README.md
index cae79694..cb9a7222 100644
--- a/README.md
+++ b/README.md
@@ -49,7 +49,7 @@ Choose your path:
 - **Various models**: LLaMA, LLaVA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc.
 - **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO, KTO, ORPO, etc.
 - **Scalable resources**: 32-bit full-tuning, 16-bit freeze-tuning, 16-bit LoRA and 2/4/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8.
-- **Advanced algorithms**: GaLore, BAdam, DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ and Agent tuning.
+- **Advanced algorithms**: GaLore, BAdam, DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ, PiSSA and Agent tuning.
 - **Practical tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune and rsLoRA.
 - **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, etc.
 - **Faster inference**: OpenAI-style API, Gradio UI and CLI with vLLM worker.
@@ -71,9 +71,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 
 ## Changelog
 
-[24/06/07] We supported fine-tuning the **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** series models.
+[24/06/16] We support **[PiSSA](https://arxiv.org/abs/2404.02948)** algorithm. See [examples](examples/README.md) for usage.
 
-[24/06/05] We supported fine-tuning the **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** models.
+[24/06/07] We supported fine-tuning the **[Qwen2](https://qwenlm.github.io/blog/qwen2/)** and **[GLM-4](https://github.com/THUDM/GLM-4)** models.
 
 [24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage.
 
diff --git a/README_zh.md b/README_zh.md
index af3ff8f0..5c005f30 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -49,7 +49,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 - **多种模型**：LLaMA、LLaVA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。
 - **集成方法**：（增量）预训练、（多模态）指令监督微调、奖励模型训练、PPO 训练、DPO 训练、KTO 训练、ORPO 训练等等。
 - **多种精度**：32 比特全参数微调、16 比特冻结微调、16 比特 LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 QLoRA 微调。
-- **先进算法**：GaLore、BAdam、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ 和 Agent 微调。
+- **先进算法**：GaLore、BAdam、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ、PiSSA 和 Agent 微调。
 - **实用技巧**：FlashAttention-2、Unsloth、RoPE scaling、NEFTune 和 rsLoRA。
 - **实验监控**：LlamaBoard、TensorBoard、Wandb、MLflow 等等。
 - **极速推理**：基于 vLLM 的 OpenAI 风格 API、浏览器界面和命令行接口。
@@ -71,9 +71,9 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 
 ## 更新日志
 
-[24/06/07] 我们支持了 **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** 系列模型的微调。
+[24/06/16] 我们支持了 **[PiSSA](https://arxiv.org/abs/2404.02948)** 算法。详细用法请参照 [examples](examples/README_zh.md)。
 
-[24/06/05] 我们支持了 **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** 模型的微调。
+[24/06/07] 我们支持了 **[Qwen2](https://qwenlm.github.io/blog/qwen2/)** 和 **[GLM-4](https://github.com/THUDM/GLM-4)** 模型的微调。
 
 [24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。
 
diff --git a/examples/README.md b/examples/README.md
index a6d78936..902d26b1 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -213,3 +213,9 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
 ```bash
 bash examples/extras/fsdp_qlora/single_node.sh
 ```
+
+#### PiSSA Fine-Tuning
+
+```bash
+llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml
+```
diff --git a/examples/README_zh.md b/examples/README_zh.md
index b6168a95..586e498c 100644
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@@ -213,3 +213,9 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
 ```bash
 bash examples/extras/fsdp_qlora/single_node.sh
 ```
+
+#### PiSSA 微调
+
+```bash
+llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml
+```
diff --git a/examples/extras/pissa/llama3_lora_sft.yaml b/examples/extras/pissa/llama3_lora_sft.yaml
new file mode 100644
index 00000000..fd4b9f1d
--- /dev/null
+++ b/examples/extras/pissa/llama3_lora_sft.yaml
@@ -0,0 +1,42 @@
+### model
+model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
+
+### method
+stage: sft
+do_train: true
+finetuning_type: lora
+lora_target: all
+pissa_init: true
+pissa_iter: 4
+pissa_convert: true
+
+### dataset
+dataset: identity,alpaca_en_demo
+template: llama3
+cutoff_len: 1024
+max_samples: 1000
+overwrite_cache: true
+preprocessing_num_workers: 16
+
+### output
+output_dir: saves/llama3-8b/lora/sft
+logging_steps: 10
+save_steps: 500
+plot_loss: true
+overwrite_output_dir: true
+
+### train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 8
+learning_rate: 1.0e-4
+num_train_epochs: 3.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+fp16: true
+ddp_timeout: 180000000
+
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 500
diff --git a/scripts/loftq_init.py b/scripts/loftq_init.py
index 159dea06..556f342c 100644
--- a/scripts/loftq_init.py
+++ b/scripts/loftq_init.py
@@ -1,7 +1,7 @@
 # coding=utf-8
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's PEFT library.
+# This code is based on the HuggingFace's PEFT library.
 # https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,11 +17,9 @@
 # limitations under the License.
 
 import os
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 
 import fire
-import torch
-import torch.nn as nn
 from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
@@ -30,41 +28,20 @@ if TYPE_CHECKING:
     from transformers import PreTrainedModel
 
 
-class Shell(nn.Module):
-    def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
-        super().__init__()
-        self.weight = nn.Parameter(weight, requires_grad=False)
-        if bias is not None:
-            self.bias = nn.Parameter(bias, requires_grad=False)
-
-
-def unwrap_model(model: nn.Module, pattern=".base_layer") -> None:
-    for name in {k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k}:
-        parent_name = ".".join(name.split(".")[:-1])
-        child_name = name.split(".")[-1]
-        parent_module = model.get_submodule(parent_name)
-        child_module = getattr(parent_module, child_name)
-        base_layer = getattr(child_module, "base_layer")
-        weight = getattr(base_layer, "weight", None)
-        bias = getattr(base_layer, "bias", None)
-        setattr(parent_module, child_name, Shell(weight, bias))
-
-    print("Model unwrapped.")
-
-
 def quantize_loftq(
     model_name_or_path: str,
-    save_dir: str,
-    loftq_bits: Optional[int] = 4,
-    loftq_iter: Optional[int] = 1,
-    lora_alpha: Optional[int] = None,
-    lora_rank: Optional[int] = 16,
-    lora_target: Optional[str] = "q_proj,v_proj",
-    save_safetensors: Optional[bool] = False,
+    output_dir: str,
+    loftq_bits: int = 4,
+    loftq_iter: int = 4,
+    lora_alpha: int = None,
+    lora_rank: int = 16,
+    lora_dropout: float = 0,
+    lora_target: str = "q_proj,v_proj",
+    save_safetensors: bool = True,
 ):
     r"""
     Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
-    Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir
+    Usage: python loftq_init.py --model_name_or_path path_to_model --output_dir output_dir
     """
     tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
@@ -74,25 +51,34 @@ def quantize_loftq(
         inference_mode=True,
         r=lora_rank,
         lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
-        lora_dropout=0.1,
+        lora_dropout=lora_dropout,
         target_modules=[name.strip() for name in lora_target.split(",")],
         init_lora_weights="loftq",
         loftq_config=loftq_config,
     )
 
     # Init LoftQ model
-    lora_model = get_peft_model(model, lora_config)
-    base_model: "PreTrainedModel" = lora_model.get_base_model()
+    print("Initializing LoftQ weights, it may be take several minutes, wait patiently.")
+    peft_model = get_peft_model(model, lora_config)
+    loftq_dir = os.path.join(output_dir, "loftq_init")
 
     # Save LoftQ model
-    setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir)
-    setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True)
-    lora_model.save_pretrained(os.path.join(save_dir, "adapters"), safe_serialization=save_safetensors)
+    setattr(peft_model.peft_config["default"], "base_model_name_or_path", output_dir)
+    setattr(peft_model.peft_config["default"], "init_lora_weights", True)  # don't apply loftq again
+    peft_model.save_pretrained(loftq_dir, safe_serialization=save_safetensors)
+    print("Adapter weights saved in {}".format(loftq_dir))
 
     # Save base model
-    unwrap_model(base_model)
-    base_model.save_pretrained(save_dir, safe_serialization=save_safetensors)
-    tokenizer.save_pretrained(save_dir)
+    base_model: "PreTrainedModel" = peft_model.unload()
+    base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
+    tokenizer.save_pretrained(output_dir)
+    print("Model weights saved in {}".format(output_dir))
+
+    print("Fine-tune this model with:")
+    print("model_name_or_path: {}".format(output_dir))
+    print("adapter_name_or_path: {}".format(loftq_dir))
+    print("finetuning_type: lora")
+    print("quantization_bit: {}".format(loftq_bits))
 
 
 if __name__ == "__main__":
diff --git a/scripts/pissa_init.py b/scripts/pissa_init.py
new file mode 100644
index 00000000..1b673c45
--- /dev/null
+++ b/scripts/pissa_init.py
@@ -0,0 +1,79 @@
+# coding=utf-8
+# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is based on the HuggingFace's PEFT library.
+# https://github.com/huggingface/peft/blob/v0.11.0/examples/pissa_finetuning/preprocess.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import TYPE_CHECKING
+
+import fire
+from peft import LoraConfig, TaskType, get_peft_model
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel
+
+
+def quantize_pissa(
+    model_name_or_path: str,
+    output_dir: str,
+    pissa_iter: int = 4,
+    lora_alpha: int = None,
+    lora_rank: int = 16,
+    lora_dropout: float = 0,
+    lora_target: str = "q_proj,v_proj",
+    save_safetensors: bool = True,
+):
+    r"""
+    Initializes LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA)
+    Usage: python pissa_init.py --model_name_or_path path_to_model --output_dir output_dir
+    """
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
+    lora_config = LoraConfig(
+        task_type=TaskType.CAUSAL_LM,
+        r=lora_rank,
+        lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
+        lora_dropout=lora_dropout,
+        target_modules=[name.strip() for name in lora_target.split(",")],
+        init_lora_weights="pissa" if pissa_iter == -1 else "pissa_niter_{}".format(pissa_iter)
+    )
+
+    # Init PiSSA model
+    peft_model = get_peft_model(model, lora_config)
+    pissa_dir = os.path.join(output_dir, "pissa_init")
+
+    # Save PiSSA model
+    setattr(peft_model.peft_config["default"], "init_lora_weights", True)  # don't apply pissa again
+    peft_model.save_pretrained(pissa_dir, safe_serialization=save_safetensors)
+    print("Adapter weights saved in {}".format(pissa_dir))
+
+    # Save base model
+    base_model: "PreTrainedModel" = peft_model.unload()
+    base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
+    tokenizer.save_pretrained(output_dir)
+    print("Model weights saved in {}".format(output_dir))
+
+    print("Fine-tune this model with:")
+    print("model_name_or_path: {}".format(output_dir))
+    print("adapter_name_or_path: {}".format(pissa_dir))
+    print("finetuning_type: lora")
+    print("pissa_convert: true")
+
+
+if __name__ == "__main__":
+    fire.Fire(quantize_pissa)
diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py
index 52dc299e..1ef46eca 100644
--- a/src/llamafactory/hparams/finetuning_args.py
+++ b/src/llamafactory/hparams/finetuning_args.py
@@ -108,6 +108,18 @@ class LoraArguments:
         default=False,
         metadata={"help": "Whether or not to use the weight-decomposed lora method (DoRA)."},
     )
+    pissa_init: bool = field(
+        default=False,
+        metadata={"help": "Whether or not to initialize a PiSSA adapter."},
+    )
+    pissa_iter: int = field(
+        default=4,
+        metadata={"help": "The number of iteration steps performed by FSVD in PiSSA. Use -1 to disable it."},
+    )
+    pissa_convert: bool = field(
+        default=False,
+        metadata={"help": "Whether or not to convert the PiSSA adapter to a normal LoRA adapter."},
+    )
     create_new_adapter: bool = field(
         default=False,
         metadata={"help": "Whether or not to create a new adapter with randomly initialized weight."},
@@ -340,7 +352,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA
         self.additional_target: Optional[List[str]] = split_arg(self.additional_target)
         self.galore_target: List[str] = split_arg(self.galore_target)
         self.freeze_vision_tower = self.freeze_vision_tower or self.train_mm_proj_only
-        self.use_ref_model = self.pref_loss not in ["orpo", "simpo"]
+        self.use_ref_model = (self.stage == "dpo" and self.pref_loss not in ["orpo", "simpo"])
 
         assert self.finetuning_type in ["lora", "freeze", "full"], "Invalid fine-tuning method."
         assert self.ref_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization."
@@ -367,5 +379,11 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA
         if self.loraplus_lr_ratio is not None and self.finetuning_type != "lora":
             raise ValueError("`loraplus_lr_ratio` is only valid for LoRA training.")
 
+        if self.pissa_convert and self.finetuning_type != "lora":
+            raise ValueError("`pissa_convert` is only valid for LoRA training.")
+
+        if self.pissa_convert and (self.stage in ["rm", "ppo", "kto"] or self.use_ref_model):
+            raise ValueError("Cannot use PiSSA for current training stage.")
+
         if self.train_mm_proj_only and self.finetuning_type != "full":
             raise ValueError("`train_mm_proj_only` is only valid for full training.")
diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 53bdbdf2..996e9130 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's transformers library.
+# This code is inspired by the HuggingFace's transformers library.
 # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -45,6 +45,10 @@ class ModelArguments:
             )
         },
     )
+    adapter_folder: Optional[str] = field(
+        default=None,
+        metadata={"help": "The folder containing the adapter weights to load."},
+    )
     cache_dir: Optional[str] = field(
         default=None,
         metadata={"help": "Where to store the pre-trained models downloaded from huggingface.co or modelscope.cn."},
@@ -150,7 +154,7 @@ class ModelArguments:
         metadata={"help": "Whether or not to disable CUDA graph in the vLLM engine."},
     )
     vllm_max_lora_rank: int = field(
-        default=8,
+        default=32,
         metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."},
     )
     offload_folder: str = field(
diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py
index 1c57567c..31a805f6 100644
--- a/src/llamafactory/hparams/parser.py
+++ b/src/llamafactory/hparams/parser.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's transformers library.
+# This code is inspired by the HuggingFace's transformers library.
 # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -90,6 +90,9 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin
         if finetuning_args.finetuning_type != "lora":
             raise ValueError("Quantization is only compatible with the LoRA method.")
 
+        if finetuning_args.use_pissa:
+            raise ValueError("Please use scripts/pissa_init.py for quantized PiSSA.")
+
         if model_args.resize_vocab:
             raise ValueError("Cannot resize embedding layers of a quantized model.")
 
diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py
index dfa71525..a8f3a256 100644
--- a/src/llamafactory/model/adapter.py
+++ b/src/llamafactory/model/adapter.py
@@ -179,8 +179,16 @@ def _setup_lora_tuning(
         else:
             adapter_to_merge = model_args.adapter_name_or_path
 
+        init_kwargs = {
+            "subfolder": model_args.adapter_folder,
+            "offload_folder": model_args.offload_folder,
+            "cache_dir": model_args.cache_dir,
+            "revision": model_args.model_revision,
+            "token": model_args.hf_hub_token,
+        }
+
         for adapter in adapter_to_merge:
-            model: "LoraModel" = PeftModel.from_pretrained(model, adapter, offload_folder=model_args.offload_folder)
+            model: "LoraModel" = PeftModel.from_pretrained(model, adapter, **init_kwargs)
             model = model.merge_and_unload()
 
         if len(adapter_to_merge) > 0:
@@ -190,12 +198,7 @@ def _setup_lora_tuning(
             if model_args.use_unsloth:
                 model = load_unsloth_peft_model(config, model_args, is_trainable=is_trainable)
             else:
-                model = PeftModel.from_pretrained(
-                    model,
-                    adapter_to_resume,
-                    is_trainable=is_trainable,
-                    offload_folder=model_args.offload_folder,
-                )
+                model = PeftModel.from_pretrained(model, adapter_to_resume, is_trainable=is_trainable, **init_kwargs)
 
         logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path)))
 
@@ -242,6 +245,14 @@ def _setup_lora_tuning(
         if model_args.use_unsloth:
             model = get_unsloth_peft_model(model, model_args, peft_kwargs)
         else:
+            if finetuning_args.pissa_init:
+                if finetuning_args.pissa_iter == -1:
+                    logger.info("Using PiSSA initialization.")
+                    peft_kwargs["init_lora_weights"] = "pissa"
+                else:
+                    logger.info("Using PiSSA initialization with FSVD steps {}.".format(finetuning_args.pissa_iter))
+                    peft_kwargs["init_lora_weights"] = "pissa_niter_{}".format(finetuning_args.pissa_iter)
+
             lora_config = LoraConfig(
                 task_type=TaskType.CAUSAL_LM,
                 inference_mode=False,
diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py
index 475d08c3..9928d0bc 100644
--- a/src/llamafactory/train/dpo/trainer.py
+++ b/src/llamafactory/train/dpo/trainer.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's TRL library.
+# This code is inspired by the HuggingFace's TRL library.
 # https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/dpo_trainer.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,6 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import warnings
 from collections import defaultdict
 from contextlib import nullcontext
@@ -28,7 +29,7 @@ from trl import DPOTrainer
 from trl.trainer import disable_dropout_in_model
 
 from ...extras.constants import IGNORE_INDEX
-from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps
+from ..trainer_utils import convert_pissa_adapter, create_custom_optimzer, create_custom_scheduler, get_batch_logps
 
 
 if TYPE_CHECKING:
@@ -91,6 +92,9 @@ class CustomDPOTrainer(DPOTrainer):
                 self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
                 self.ref_model.eval()
 
+        if finetuning_args.pissa_convert:
+            self.save_model(os.path.join(self.args.output_dir, "pissa_init"))
+
         if finetuning_args.use_badam:
             from badam import clip_grad_norm_for_sparse_tensor
 
@@ -109,8 +113,11 @@ class CustomDPOTrainer(DPOTrainer):
 
     def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None:
         super()._save(output_dir, state_dict)
+        output_dir = output_dir if output_dir is not None else self.args.output_dir
+        if self.finetuning_args.pissa_convert:
+            convert_pissa_adapter(output_dir, state_dict, self.accelerator, self.model, self.args)
+
         if self.processor is not None:
-            output_dir = output_dir if output_dir is not None else self.args.output_dir
             getattr(self.processor, "image_processor").save_pretrained(output_dir)
 
     def odds_ratio_loss(self, chosen_logps: "torch.Tensor", rejected_logps: "torch.Tensor") -> "torch.Tensor":
diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py
index 09729f2e..f9e04cb5 100644
--- a/src/llamafactory/train/pt/trainer.py
+++ b/src/llamafactory/train/pt/trainer.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 from types import MethodType
 from typing import TYPE_CHECKING, Dict, Optional
 
 from transformers import Trainer
 
 from ...extras.logging import get_logger
-from ..trainer_utils import create_custom_optimzer, create_custom_scheduler
+from ..trainer_utils import convert_pissa_adapter, create_custom_optimzer, create_custom_scheduler
 
 
 if TYPE_CHECKING:
@@ -42,6 +43,10 @@ class CustomTrainer(Trainer):
         super().__init__(**kwargs)
         self.finetuning_args = finetuning_args
         self.processor = processor
+
+        if finetuning_args.pissa_convert:
+            self.save_model(os.path.join(self.args.output_dir, "pissa_init"))
+
         if finetuning_args.use_badam:
             from badam import clip_grad_norm_for_sparse_tensor
 
@@ -60,6 +65,9 @@ class CustomTrainer(Trainer):
 
     def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None:
         super()._save(output_dir, state_dict)
+        output_dir = output_dir if output_dir is not None else self.args.output_dir
+        if self.finetuning_args.pissa_convert:
+            convert_pissa_adapter(output_dir, state_dict, self.accelerator, self.model, self.args)
+
         if self.processor is not None:
-            output_dir = output_dir if output_dir is not None else self.args.output_dir
             getattr(self.processor, "image_processor").save_pretrained(output_dir)
diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py
index 6ab6914e..921e49ab 100644
--- a/src/llamafactory/train/sft/trainer.py
+++ b/src/llamafactory/train/sft/trainer.py
@@ -1,6 +1,6 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by HuggingFace's transformers library.
+# This code is inspired by the HuggingFace's transformers library.
 # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer_seq2seq.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -26,7 +26,7 @@ from transformers import Seq2SeqTrainer
 
 from ...extras.constants import IGNORE_INDEX
 from ...extras.logging import get_logger
-from ..trainer_utils import create_custom_optimzer, create_custom_scheduler
+from ..trainer_utils import convert_pissa_adapter, create_custom_optimzer, create_custom_scheduler
 
 
 if TYPE_CHECKING:
@@ -51,6 +51,10 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
         super().__init__(**kwargs)
         self.finetuning_args = finetuning_args
         self.processor = processor
+
+        if finetuning_args.pissa_convert:
+            self.save_model(os.path.join(self.args.output_dir, "pissa_init"))
+
         if finetuning_args.use_badam:
             from badam import clip_grad_norm_for_sparse_tensor
 
@@ -69,8 +73,11 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
 
     def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None:
         super()._save(output_dir, state_dict)
+        output_dir = output_dir if output_dir is not None else self.args.output_dir
+        if self.finetuning_args.pissa_convert:
+            convert_pissa_adapter(output_dir, state_dict, self.accelerator, self.model, self.args)
+
         if self.processor is not None:
-            output_dir = output_dir if output_dir is not None else self.args.output_dir
             getattr(self.processor, "image_processor").save_pretrained(output_dir)
 
     def prediction_step(
diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py
index 5621d5df..2d6bab24 100644
--- a/src/llamafactory/train/trainer_utils.py
+++ b/src/llamafactory/train/trainer_utils.py
@@ -1,9 +1,9 @@
 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 #
-# This code is inspired by the GaLore's implementation: https://github.com/jiaweizzhao/GaLore
-# and the LoRA+'s implementation: https://github.com/nikhil-ghosh-berkeley/loraplus
-# and the BAdam's implementation: https://github.com/Ledzy/BAdam
-# and the TRL's implementation: https://github.com/huggingface/trl
+# This code is inspired by the original GaLore's implementation: https://github.com/jiaweizzhao/GaLore
+# and the original LoRA+'s implementation: https://github.com/nikhil-ghosh-berkeley/loraplus
+# and the original BAdam's implementation: https://github.com/Ledzy/BAdam
+# and the HuggingFace's TRL library: https://github.com/huggingface/trl
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,9 +17,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
+from peft import PeftModel
 from transformers import Trainer
 from transformers.optimization import get_scheduler
 from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
@@ -37,6 +39,7 @@ if is_galore_available():
 
 
 if TYPE_CHECKING:
+    from accelerate import Accelerator
     from transformers import PreTrainedModel, Seq2SeqTrainingArguments
     from trl import AutoModelForCausalLMWithValueHead
 
@@ -171,6 +174,49 @@ def create_reward_model(
         return reward_model
 
 
+def convert_pissa_adapter(
+    output_dir: str,
+    state_dict: Dict[str, "torch.Tensor"],
+    accelerator: "Accelerator",
+    model: "PreTrainedModel",
+    training_args: "Seq2SeqTrainingArguments",
+) -> None:
+    r"""
+    Converts the PiSSA adapter to a LoRA adapter.
+    """
+    pissa_init_dir = os.path.join(training_args.output_dir, "pissa_init")
+    pissa_backup_dir = os.path.join(output_dir, "pissa_backup")
+    if output_dir == pissa_init_dir:
+        logger.info("Initial PiSSA adatper will be saved at: {}.".format(pissa_init_dir))
+        unwrapped_model = accelerator.unwrap_model(model)
+        if isinstance(unwrapped_model, PeftModel):
+            init_lora_weights = getattr(unwrapped_model.peft_config["default"], "init_lora_weights")
+            setattr(unwrapped_model.peft_config["default"], "init_lora_weights", True)
+            unwrapped_model.save_pretrained(
+                output_dir,
+                state_dict=state_dict,
+                safe_serialization=training_args.save_safetensors,
+            )
+            setattr(unwrapped_model.peft_config["default"], "init_lora_weights", init_lora_weights)
+    elif output_dir == training_args.output_dir:  # at the end of training
+        logger.info("Converted PiSSA adapter will be saved at: {}.".format(output_dir))
+        unwrapped_model = accelerator.unwrap_model(model)
+        if isinstance(unwrapped_model, PeftModel):  # backup the pissa adapter for further use
+            unwrapped_model.save_pretrained(
+                pissa_backup_dir,
+                state_dict=state_dict,
+                safe_serialization=training_args.save_safetensors,
+            )
+            unwrapped_model.save_pretrained(
+                output_dir,
+                state_dict=state_dict,
+                safe_serialization=training_args.save_safetensors,
+                convert_pissa_to_lora=pissa_init_dir,
+            )
+            unwrapped_model.load_adapter(pissa_backup_dir, "default", is_trainable=True)
+            unwrapped_model.set_adapter("default")
+
+
 def _get_decay_parameter_names(model: "PreTrainedModel") -> List[str]:
     r"""
     Returns a list of names of parameters with weight decay. (weights in non-layernorm layers)
diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py
index 673f6bf4..874f3c5e 100644
--- a/src/llamafactory/webui/components/train.py
+++ b/src/llamafactory/webui/components/train.py
@@ -163,10 +163,9 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
             create_new_adapter = gr.Checkbox()
 
         with gr.Row():
-            with gr.Column(scale=1):
-                use_rslora = gr.Checkbox()
-                use_dora = gr.Checkbox()
-
+            use_rslora = gr.Checkbox()
+            use_dora = gr.Checkbox()
+            use_pissa = gr.Checkbox()
             lora_target = gr.Textbox(scale=2)
             additional_target = gr.Textbox(scale=2)
 
@@ -179,6 +178,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
             create_new_adapter,
             use_rslora,
             use_dora,
+            use_pissa,
             lora_target,
             additional_target,
         }
@@ -193,6 +193,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
             create_new_adapter=create_new_adapter,
             use_rslora=use_rslora,
             use_dora=use_dora,
+            use_pissa=use_pissa,
             lora_target=lora_target,
             additional_target=additional_target,
         )
diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py
index 427f01b8..8e8d6fce 100644
--- a/src/llamafactory/webui/locales.py
+++ b/src/llamafactory/webui/locales.py
@@ -732,6 +732,20 @@ LOCALES = {
             "info": "使用权重分解的 LoRA。",
         },
     },
+    "use_pissa": {
+        "en": {
+            "label": "Use PiSSA",
+            "info": "Use PiSSA method.",
+        },
+        "ru": {
+            "label": "используйте PiSSA",
+            "info": "Используйте метод PiSSA.",
+        },
+        "zh": {
+            "label": "使用 PiSSA",
+            "info": "使用 PiSSA 方法。",
+        },
+    },
     "lora_target": {
         "en": {
             "label": "LoRA modules (optional)",
diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py
index 76982934..13dbba03 100644
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -173,6 +173,8 @@ class Runner:
             args["create_new_adapter"] = get("train.create_new_adapter")
             args["use_rslora"] = get("train.use_rslora")
             args["use_dora"] = get("train.use_dora")
+            args["pissa_init"] = get("train.use_pissa")
+            args["pissa_convert"] = get("train.use_pissa")
             args["lora_target"] = get("train.lora_target") or "all"
             args["additional_target"] = get("train.additional_target") or None
 
diff --git a/tests/model/test_pissa.py b/tests/model/test_pissa.py
new file mode 100644
index 00000000..70c424fd
--- /dev/null
+++ b/tests/model/test_pissa.py
@@ -0,0 +1,90 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import torch
+from peft import LoraModel, PeftModel
+from transformers import AutoModelForCausalLM
+
+from llamafactory.extras.misc import get_current_device
+from llamafactory.hparams import get_infer_args, get_train_args
+from llamafactory.model import load_model, load_tokenizer
+
+
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
+
+TINY_LLAMA_PISSA = os.environ.get("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-pissa")
+
+TRAIN_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "stage": "sft",
+    "do_train": True,
+    "finetuning_type": "lora",
+    "pissa_init": True,
+    "pissa_iter": -1,
+    "dataset": "llamafactory/tiny-supervised-dataset",
+    "dataset_dir": "ONLINE",
+    "template": "llama3",
+    "cutoff_len": 1024,
+    "overwrite_cache": True,
+    "output_dir": "dummy_dir",
+    "overwrite_output_dir": True,
+    "fp16": True,
+}
+
+INFER_ARGS = {
+    "model_name_or_path": TINY_LLAMA_PISSA,
+    "adapter_name_or_path": TINY_LLAMA_PISSA,
+    "adapter_folder": "pissa_init",
+    "finetuning_type": "lora",
+    "template": "llama3",
+    "infer_dtype": "float16",
+}
+
+
+def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"):
+    state_dict_a = model_a.state_dict()
+    state_dict_b = model_b.state_dict()
+    assert set(state_dict_a.keys()) == set(state_dict_b.keys())
+    for name in state_dict_a.keys():
+        assert torch.allclose(state_dict_a[name], state_dict_b[name])
+
+
+def test_pissa_init():
+    model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS)
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+
+    base_model = AutoModelForCausalLM.from_pretrained(
+        TINY_LLAMA_PISSA, torch_dtype=torch.float16, device_map=get_current_device()
+    )
+    ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_PISSA, subfolder="pissa_init", is_trainable=True)
+    for param in filter(lambda p: p.requires_grad, ref_model.parameters()):
+        param.data = param.data.to(torch.float32)
+
+    compare_model(model, ref_model)
+
+
+def test_pissa_inference():
+    model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS)
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False)
+
+    base_model = AutoModelForCausalLM.from_pretrained(
+        TINY_LLAMA_PISSA, torch_dtype=torch.float16, device_map=get_current_device()
+    )
+    ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_PISSA, subfolder="pissa_init")
+    ref_model = ref_model.merge_and_unload()
+    compare_model(model, ref_model)

From bf46a8ca3e47057cb481077da778ca77f9f94a7f Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sun, 16 Jun 2024 01:21:06 +0800
Subject: [PATCH 157/162] increase tol

Former-commit-id: c29071445e34aed23123fdf883a4d877744a1b0e
---
 tests/model/test_pissa.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/model/test_pissa.py b/tests/model/test_pissa.py
index 70c424fd..41d02752 100644
--- a/tests/model/test_pissa.py
+++ b/tests/model/test_pissa.py
@@ -59,7 +59,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"):
     state_dict_b = model_b.state_dict()
     assert set(state_dict_a.keys()) == set(state_dict_b.keys())
     for name in state_dict_a.keys():
-        assert torch.allclose(state_dict_a[name], state_dict_b[name])
+        assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-3, atol=1e-3)
 
 
 def test_pissa_init():

From 1a66bb1438a5dfe86eac73662002a4ea8fee5207 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sun, 16 Jun 2024 01:22:23 +0800
Subject: [PATCH 158/162] Update tests.yml

Former-commit-id: 82e83615a706293abbf266d11c57caedafdd4c5b
---
 .github/workflows/tests.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 96092662..98bd9455 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -9,8 +9,6 @@ on:
       - "requirements.txt"
       - ".github/workflows/*.yml"
   pull_request:
-    types:
-      - review_requested
     branches:
       - main
     paths:

From d5a0cc93a26e45ed81178ad614259de03bdeb662 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sun, 16 Jun 2024 01:38:44 +0800
Subject: [PATCH 159/162] fix tol

Former-commit-id: bdb54bcb477126687db789bd89f2df84e424a2a3
---
 src/llamafactory/train/trainer_utils.py | 1 +
 tests/model/test_base.py                | 2 +-
 tests/model/test_lora.py                | 4 ++--
 tests/model/test_pissa.py               | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py
index 2d6bab24..9052c96d 100644
--- a/src/llamafactory/train/trainer_utils.py
+++ b/src/llamafactory/train/trainer_utils.py
@@ -213,6 +213,7 @@ def convert_pissa_adapter(
                 safe_serialization=training_args.save_safetensors,
                 convert_pissa_to_lora=pissa_init_dir,
             )
+            # TODO: the model is applied pissa again unexpectedly
             unwrapped_model.load_adapter(pissa_backup_dir, "default", is_trainable=True)
             unwrapped_model.set_adapter("default")
 
diff --git a/tests/model/test_base.py b/tests/model/test_base.py
index 954492ef..e1991b20 100644
--- a/tests/model/test_base.py
+++ b/tests/model/test_base.py
@@ -41,7 +41,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"):
     state_dict_b = model_b.state_dict()
     assert set(state_dict_a.keys()) == set(state_dict_b.keys())
     for name in state_dict_a.keys():
-        assert torch.allclose(state_dict_a[name], state_dict_b[name])
+        assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-4, atol=1e-5)
 
 
 @pytest.fixture
diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py
index fe032332..64566fe8 100644
--- a/tests/model/test_lora.py
+++ b/tests/model/test_lora.py
@@ -67,9 +67,9 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k
     assert set(state_dict_a.keys()) == set(state_dict_b.keys())
     for name in state_dict_a.keys():
         if any(key in name for key in diff_keys):
-            assert torch.allclose(state_dict_a[name], state_dict_b[name]) is False
+            assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-4, atol=1e-5) is False
         else:
-            assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True
+            assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-4, atol=1e-5) is True
 
 
 @pytest.fixture
diff --git a/tests/model/test_pissa.py b/tests/model/test_pissa.py
index 41d02752..030310d0 100644
--- a/tests/model/test_pissa.py
+++ b/tests/model/test_pissa.py
@@ -59,7 +59,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"):
     state_dict_b = model_b.state_dict()
     assert set(state_dict_a.keys()) == set(state_dict_b.keys())
     for name in state_dict_a.keys():
-        assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-3, atol=1e-3)
+        assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-4, atol=1e-5)
 
 
 def test_pissa_init():

From 18af6f0d3b757c4b15e55cb665baa7c05147d120 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Sun, 16 Jun 2024 01:43:43 +0800
Subject: [PATCH 160/162] update pr template

Former-commit-id: 0b7c29674fda10c0ac87e0a0c75990feabb5a3de
---
 .github/PULL_REQUEST_TEMPLATE.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index b31e9d19..d23d6be3 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -5,3 +5,4 @@ Fixes # (issue)
 ## Before submitting
 
 - [ ] Did you read the [contributor guideline](https://github.com/hiyouga/LLaMA-Factory/blob/main/.github/CONTRIBUTING.md)?
+- [ ] Did you write any new necessary tests?

From 6b729cccb9f231aa60944f734bd100d2a06cb83f Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Sun, 16 Jun 2024 02:57:00 +0800
Subject: [PATCH 161/162] Update parser.py

Former-commit-id: d10c97193d08bd368aca1a72f0d1d8a96c76765d
---
 src/llamafactory/hparams/parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py
index 31a805f6..467fc43d 100644
--- a/src/llamafactory/hparams/parser.py
+++ b/src/llamafactory/hparams/parser.py
@@ -90,8 +90,8 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin
         if finetuning_args.finetuning_type != "lora":
             raise ValueError("Quantization is only compatible with the LoRA method.")
 
-        if finetuning_args.use_pissa:
-            raise ValueError("Please use scripts/pissa_init.py for quantized PiSSA.")
+        if finetuning_args.pissa_init:
+            raise ValueError("Please use scripts/pissa_init.py to initialize PiSSA for a quantized model.")
 
         if model_args.resize_vocab:
             raise ValueError("Cannot resize embedding layers of a quantized model.")

From aea2e4083fe4117a8b394b8ad649d4838dd31e54 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 17 Jun 2024 17:47:25 +0800
Subject: [PATCH 162/162] tiny fix

Former-commit-id: 2289436567a7860d25d9da0afb39e4a3e5e83839
---
 examples/README.md                            | 14 ++++----
 examples/README_zh.md                         | 14 ++++----
 .../fsdp_qlora/{single_node.sh => train.sh}   |  0
 scripts/llama_pro.py                          |  2 +-
 scripts/loftq_init.py                         |  2 +-
 scripts/pissa_init.py                         |  5 ++-
 tests/model/test_lora.py                      | 34 +++++++------------
 7 files changed, 32 insertions(+), 39 deletions(-)
 rename examples/extras/fsdp_qlora/{single_node.sh => train.sh} (100%)

diff --git a/examples/README.md b/examples/README.md
index 902d26b1..007a81ab 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -195,6 +195,12 @@ llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
 llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml
 ```
 
+#### PiSSA Fine-Tuning
+
+```bash
+llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml
+```
+
 #### Mixture-of-Depths Fine-Tuning
 
 ```bash
@@ -211,11 +217,5 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
 #### FSDP+QLoRA Fine-Tuning
 
 ```bash
-bash examples/extras/fsdp_qlora/single_node.sh
-```
-
-#### PiSSA Fine-Tuning
-
-```bash
-llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml
+bash examples/extras/fsdp_qlora/train.sh
 ```
diff --git a/examples/README_zh.md b/examples/README_zh.md
index 586e498c..b9d90f25 100644
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@@ -195,6 +195,12 @@ llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
 llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml
 ```
 
+#### PiSSA 微调
+
+```bash
+llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml
+```
+
 #### 深度混合微调
 
 ```bash
@@ -211,11 +217,5 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
 #### FSDP+QLoRA 微调
 
 ```bash
-bash examples/extras/fsdp_qlora/single_node.sh
-```
-
-#### PiSSA 微调
-
-```bash
-llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml
+bash examples/extras/fsdp_qlora/train.sh
 ```
diff --git a/examples/extras/fsdp_qlora/single_node.sh b/examples/extras/fsdp_qlora/train.sh
similarity index 100%
rename from examples/extras/fsdp_qlora/single_node.sh
rename to examples/extras/fsdp_qlora/train.sh
diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py
index 395375ef..17bf6fc2 100644
--- a/scripts/llama_pro.py
+++ b/scripts/llama_pro.py
@@ -120,7 +120,7 @@ def block_expansion(
             json.dump(index, f, indent=2, sort_keys=True)
         print("Model weights saved in {}".format(output_dir))
 
-    print("Fine-tune this model with:")
+    print("- Fine-tune this model with:")
     print("model_name_or_path: {}".format(output_dir))
     print("finetuning_type: freeze")
     print("freeze_trainable_layers: {}".format(num_expand))
diff --git a/scripts/loftq_init.py b/scripts/loftq_init.py
index 556f342c..b9506fa3 100644
--- a/scripts/loftq_init.py
+++ b/scripts/loftq_init.py
@@ -74,7 +74,7 @@ def quantize_loftq(
     tokenizer.save_pretrained(output_dir)
     print("Model weights saved in {}".format(output_dir))
 
-    print("Fine-tune this model with:")
+    print("- Fine-tune this model with:")
     print("model_name_or_path: {}".format(output_dir))
     print("adapter_name_or_path: {}".format(loftq_dir))
     print("finetuning_type: lora")
diff --git a/scripts/pissa_init.py b/scripts/pissa_init.py
index 1b673c45..10b81efc 100644
--- a/scripts/pissa_init.py
+++ b/scripts/pissa_init.py
@@ -68,11 +68,14 @@ def quantize_pissa(
     tokenizer.save_pretrained(output_dir)
     print("Model weights saved in {}".format(output_dir))
 
-    print("Fine-tune this model with:")
+    print("- Fine-tune this model with:")
     print("model_name_or_path: {}".format(output_dir))
     print("adapter_name_or_path: {}".format(pissa_dir))
     print("finetuning_type: lora")
+    print("pissa_init: false")
     print("pissa_convert: true")
+    print("- and optionally with:")
+    print("quantization_bit: 4")
 
 
 if __name__ == "__main__":
diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py
index 64566fe8..630e5f75 100644
--- a/tests/model/test_lora.py
+++ b/tests/model/test_lora.py
@@ -56,9 +56,15 @@ INFER_ARGS = {
 }
 
 
-def load_reference_model() -> "torch.nn.Module":
-    model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA)
-    return PeftModel.from_pretrained(model, TINY_LLAMA_ADAPTER)
+def load_reference_model(is_trainable: bool = False) -> "LoraModel":
+    model = AutoModelForCausalLM.from_pretrained(
+        TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device()
+    )
+    lora_model = PeftModel.from_pretrained(model, TINY_LLAMA_ADAPTER, is_trainable=is_trainable)
+    for param in filter(lambda p: p.requires_grad, lora_model.parameters()):
+        param.data = param.data.to(torch.float32)
+
+    return lora_model
 
 
 def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_keys: Sequence[str] = []):
@@ -148,13 +154,7 @@ def test_lora_train_old_adapters():
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
 
-    base_model = AutoModelForCausalLM.from_pretrained(
-        TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device()
-    )
-    ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True)
-    for param in filter(lambda p: p.requires_grad, ref_model.parameters()):
-        param.data = param.data.to(torch.float32)
-
+    ref_model = load_reference_model(is_trainable=True)
     compare_model(model, ref_model)
 
 
@@ -165,13 +165,7 @@ def test_lora_train_new_adapters():
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
 
-    base_model = AutoModelForCausalLM.from_pretrained(
-        TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device()
-    )
-    ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True)
-    for param in filter(lambda p: p.requires_grad, ref_model.parameters()):
-        param.data = param.data.to(torch.float32)
-
+    ref_model = load_reference_model(is_trainable=True)
     compare_model(
         model, ref_model, diff_keys=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"]
     )
@@ -200,9 +194,5 @@ def test_lora_inference():
     tokenizer_module = load_tokenizer(model_args)
     model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False)
 
-    base_model = AutoModelForCausalLM.from_pretrained(
-        TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device()
-    )
-    ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER)
-    ref_model = ref_model.merge_and_unload()
+    ref_model = load_reference_model().merge_and_unload()
     compare_model(model, ref_model)