From 1c43fb6a415121930a273bd18e184b541295dda1 Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 30 Nov 2023 19:16:13 +0800
Subject: [PATCH 01/12] add models

Former-commit-id: 509abe8864ada29ac7fa0f636b662531c8dd3a33
---
 README.md                        |  6 +++---
 README_zh.md                     |  6 +++---
 src/llmtuner/data/template.py    | 19 ++++++++++++++++---
 src/llmtuner/extras/constants.py | 32 +++++++++++++++++++++++++++++++-
 4 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 7ef3b6f1..65c53c18 100644
--- a/README.md
+++ b/README.md
@@ -92,7 +92,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [LLaMA-2](https://huggingface.co/meta-llama)             | 7B/13B/70B                  | q_proj,v_proj     | llama2    |
 | [Mistral](https://huggingface.co/mistralai)              | 7B                          | q_proj,v_proj     | mistral   |
 | [Phi-1.5](https://huggingface.co/microsoft/phi-1_5)      | 1.3B                        | Wqkv              | -         |
-| [Qwen](https://github.com/QwenLM/Qwen)                   | 7B/14B                      | c_attn            | qwen      |
+| [Qwen](https://github.com/QwenLM/Qwen)                   | 1.8B/7B/14B/72B             | c_attn            | qwen      |
 | [XVERSE](https://github.com/xverse-ai)                   | 7B/13B/65B                  | q_proj,v_proj     | xverse    |
 
 > [!NOTE]
@@ -199,8 +199,8 @@ huggingface-cli login
 | Full   |  16  | 140GB | 240GB | 520GB | 1200GB |
 | Freeze |  16  |  20GB |  40GB | 120GB |  240GB |
 | LoRA   |  16  |  16GB |  32GB |  80GB |  160GB |
-| LoRA   |   8  |  10GB |  16GB |  40GB |   80GB |
-| LoRA   |   4  |   6GB |  12GB |  24GB |   48GB |
+| QLoRA  |   8  |  10GB |  16GB |  40GB |   80GB |
+| QLoRA  |   4  |   6GB |  12GB |  24GB |   48GB |
 
 ## Getting Started
 
diff --git a/README_zh.md b/README_zh.md
index d6f01d2f..7bb3c16f 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -92,7 +92,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
 | [LLaMA-2](https://huggingface.co/meta-llama)             | 7B/13B/70B                  | q_proj,v_proj     | llama2    |
 | [Mistral](https://huggingface.co/mistralai)              | 7B                          | q_proj,v_proj     | mistral   |
 | [Phi-1.5](https://huggingface.co/microsoft/phi-1_5)      | 1.3B                        | Wqkv              | -         |
-| [Qwen](https://github.com/QwenLM/Qwen)                   | 7B/14B                      | c_attn            | qwen      |
+| [Qwen](https://github.com/QwenLM/Qwen)                   | 1.8B/7B/14B/72B             | c_attn            | qwen      |
 | [XVERSE](https://github.com/xverse-ai)                   | 7B/13B/65B                  | q_proj,v_proj     | xverse    |
 
 > [!NOTE]
@@ -199,8 +199,8 @@ huggingface-cli login
 | 全参数   |  16  | 140GB | 240GB | 520GB | 1200GB |
 | 部分参数 |  16  |  20GB |  40GB | 120GB |  240GB |
 | LoRA    |  16  |  16GB |  32GB |  80GB |  160GB |
-| LoRA    |   8  |  10GB |  16GB |  40GB |   80GB |
-| LoRA    |   4  |   6GB |  12GB |  24GB |   48GB |
+| QLoRA   |   8  |  10GB |  16GB |  40GB |   80GB |
+| QLoRA   |   4  |   6GB |  12GB |  24GB |   48GB |
 
 ## 如何使用
 
diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py
index ac275c50..ebb633c5 100644
--- a/src/llmtuner/data/template.py
+++ b/src/llmtuner/data/template.py
@@ -408,18 +408,31 @@ register_template(
         "{{system}}"
     ],
     prompt=[
-        "### Instruction:\n{{query}}\n\n### Response:\n"
+        "User: {{query}}\n\nAssistant:"
+    ],
+    system="",
+    sep=[]
+)
+
+
+register_template(
+    name="deepseekcoder",
+    prefix=[
+        "{{system}}"
+    ],
+    prompt=[
+        "### Instruction:\n{{query}}\n### Response:\n"
     ],
     system=(
         "You are an AI programming assistant, utilizing the Deepseek Coder model, "
         "developed by Deepseek Company, and you only answer questions related to computer science. "
         "For politically sensitive questions, security and privacy issues, "
-        "and other non-computer science questions, you will refuse to answer."
+        "and other non-computer science questions, you will refuse to answer\n"
     ),
     sep=[
         "\n",
         {"token": "<|EOT|>"},
-        "\n\n"
+        "\n"
     ],
     stop_words=[
         "<|EOT|>"
diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py
index 3257678e..f1f4d283 100644
--- a/src/llmtuner/extras/constants.py
+++ b/src/llmtuner/extras/constants.py
@@ -131,6 +131,28 @@ register_model_group(
 )
 
 
+register_model_group(
+    models={
+        "DeepseekLLM-7B-Base": "deepseek-ai/deepseek-llm-7b-base",
+        "DeepseekLLM-67B-Base": "deepseek-ai/deepseek-llm-67b-base",
+        "DeepseekLLM-7B-Chat": "deepseek-ai/deepseek-llm-7b-chat",
+        "DeepseekLLM-67B-Chat": "deepseek-ai/deepseek-llm-67b-chat"
+    },
+    template="deepseek"
+)
+
+
+register_model_group(
+    models={
+        "DeepseekCoder-6.7B-Base": "deepseek-ai/deepseek-coder-6.7b-base",
+        "DeepseekCoder-6.7B-Chat": "deepseek-ai/deepseek-coder-6.7b-instruct",
+        "DeepseekCoder-33B-Base": "deepseek-ai/deepseek-coder-33b-base",
+        "DeepseekCoder-33B-Chat": "deepseek-ai/deepseek-coder-33b-instruct"
+    },
+    template="deepseekcoder"
+)
+
+
 register_model_group(
     models={
         "Falcon-7B": "tiiuae/falcon-7b",
@@ -214,14 +236,22 @@ register_model_group(
 
 register_model_group(
     models={
+        "Qwen-1.8B": "Qwen/Qwen-1_8B",
         "Qwen-7B": "Qwen/Qwen-7B",
         "Qwen-14B": "Qwen/Qwen-14B",
+        "Qwen-72B": "Qwen/Qwen-72B",
+        "Qwen-1.8B-Chat": "Qwen/Qwen-1_8B-Chat",
         "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
         "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat",
+        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat",
+        "Qwen-1.8B-int8-Chat": "Qwen/Qwen-1_8B-Chat-Int8",
+        "Qwen-1.8B-int4-Chat": "Qwen/Qwen-1_8B-Chat-Int4",
         "Qwen-7B-int8-Chat": "Qwen/Qwen-7B-Chat-Int8",
         "Qwen-7B-int4-Chat": "Qwen/Qwen-7B-Chat-Int4",
         "Qwen-14B-int8-Chat": "Qwen/Qwen-14B-Chat-Int8",
-        "Qwen-14B-int4-Chat": "Qwen/Qwen-14B-Chat-Int4"
+        "Qwen-14B-int4-Chat": "Qwen/Qwen-14B-Chat-Int4",
+        "Qwen-72B-int8-Chat": "Qwen/Qwen-72B-Chat-Int8",
+        "Qwen-72B-int4-Chat": "Qwen/Qwen-72B-Chat-Int4"
     },
     module="c_attn",
     template="qwen"

From bb6b4823add4cd4818587ac1a2f427ad075adbce Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 30 Nov 2023 20:03:32 +0800
Subject: [PATCH 02/12] fix #1682

Former-commit-id: a38dbf55e32a18838eea7f254fd9022fe33bca08
---
 src/llmtuner/webui/components/eval.py |  5 +++--
 src/llmtuner/webui/engine.py          |  5 ++++-
 src/llmtuner/webui/locales.py         | 10 +++++-----
 src/llmtuner/webui/runner.py          | 19 ++++++++-----------
 4 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/llmtuner/webui/components/eval.py b/src/llmtuner/webui/components/eval.py
index 36c994a6..0718c63e 100644
--- a/src/llmtuner/webui/components/eval.py
+++ b/src/llmtuner/webui/components/eval.py
@@ -38,10 +38,11 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]:
         max_new_tokens = gr.Slider(10, 2048, value=128, step=1)
         top_p = gr.Slider(0.01, 1, value=0.7, step=0.01)
         temperature = gr.Slider(0.01, 1.5, value=0.95, step=0.01)
+        output_dir = gr.Textbox()
 
-    input_elems.update({max_new_tokens, top_p, temperature})
+    input_elems.update({max_new_tokens, top_p, temperature, output_dir})
     elem_dict.update(dict(
-        max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature
+        max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature, output_dir=output_dir
     ))
 
     with gr.Row():
diff --git a/src/llmtuner/webui/engine.py b/src/llmtuner/webui/engine.py
index bdac09dd..991b281c 100644
--- a/src/llmtuner/webui/engine.py
+++ b/src/llmtuner/webui/engine.py
@@ -49,7 +49,10 @@ class Engine:
                 else:
                     yield self._form_dict({"eval.resume_btn": {"value": True}})
             else:
-                yield self._form_dict({"train.output_dir": {"value": get_time()}})
+                yield self._form_dict({
+                    "train.output_dir": {"value": "train_" + get_time()},
+                    "eval.output_dir": {"value": "eval_" + get_time()},
+                })
 
     def change_lang(self, lang: str) -> Dict[Component, Dict[str, Any]]:
         return {
diff --git a/src/llmtuner/webui/locales.py b/src/llmtuner/webui/locales.py
index 769cf15d..5f5609d8 100644
--- a/src/llmtuner/webui/locales.py
+++ b/src/llmtuner/webui/locales.py
@@ -132,7 +132,7 @@ LOCALES = {
     "dataset_dir": {
         "en": {
             "label": "Data dir",
-            "info": "Path of the data directory."
+            "info": "Path to the data directory."
         },
         "zh": {
             "label": "数据路径",
@@ -475,12 +475,12 @@ LOCALES = {
     },
     "output_dir": {
         "en": {
-            "label": "Checkpoint name",
-            "info": "Directory to save checkpoint."
+            "label": "Output dir",
+            "info": "Directory for saving results."
         },
         "zh": {
-            "label": "断点名称",
-            "info": "保存模型断点的文件夹名称。"
+            "label": "输出目录",
+            "info": "保存结果的路径。"
         }
     },
     "output_box": {
diff --git a/src/llmtuner/webui/runner.py b/src/llmtuner/webui/runner.py
index 7789fc4d..664f3354 100644
--- a/src/llmtuner/webui/runner.py
+++ b/src/llmtuner/webui/runner.py
@@ -87,9 +87,9 @@ class Runner:
         user_config = load_config()
 
         if get("top.checkpoints"):
-            checkpoint_dir = ",".join([get_save_dir(
-                get("top.model_name"), get("top.finetuning_type"), ckpt
-            ) for ckpt in get("top.checkpoints")])
+            checkpoint_dir = ",".join([
+                get_save_dir(get("top.model_name"), get("top.finetuning_type"), ckpt) for ckpt in get("top.checkpoints")
+            ])
         else:
             checkpoint_dir = None
 
@@ -160,15 +160,11 @@ class Runner:
         user_config = load_config()
 
         if get("top.checkpoints"):
-            checkpoint_dir = ",".join([get_save_dir(
-                get("top.model_name"), get("top.finetuning_type"), ckpt
-            ) for ckpt in get("top.checkpoints")])
-            output_dir = get_save_dir(
-                get("top.model_name"), get("top.finetuning_type"), "eval_" + "_".join(get("top.checkpoints"))
-            )
+            checkpoint_dir = ",".join([
+                get_save_dir(get("top.model_name"), get("top.finetuning_type"), ckpt) for ckpt in get("top.checkpoints")
+            ])
         else:
             checkpoint_dir = None
-            output_dir = get_save_dir(get("top.model_name"), get("top.finetuning_type"), "eval_base")
 
         args = dict(
             stage="sft",
@@ -192,7 +188,7 @@ class Runner:
             max_new_tokens=get("eval.max_new_tokens"),
             top_p=get("eval.top_p"),
             temperature=get("eval.temperature"),
-            output_dir=output_dir
+            output_dir=get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("eval.output_dir"))
         )
 
         if get("eval.predict"):
@@ -242,6 +238,7 @@ class Runner:
         output_dir = get_save_dir(get("top.model_name"), get("top.finetuning_type"), get(
             "{}.output_dir".format("train" if self.do_train else "eval")
         ))
+
         while self.thread.is_alive():
             time.sleep(2)
             if self.aborted:

From ba6d290d0bcdfa9a9d37828a911804dc4b77e308 Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 30 Nov 2023 21:02:00 +0800
Subject: [PATCH 03/12] fix #1668

Former-commit-id: 1585962eb7ed042890d4c56422aae749c669dda8
---
 src/llmtuner/train/ppo/trainer.py | 3 ++-
 src/llmtuner/train/rm/trainer.py  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/llmtuner/train/ppo/trainer.py b/src/llmtuner/train/ppo/trainer.py
index e6c3d0e3..1bba733b 100644
--- a/src/llmtuner/train/ppo/trainer.py
+++ b/src/llmtuner/train/ppo/trainer.py
@@ -298,7 +298,8 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
             with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype): # support bf16
                 logits, _, values = model(**input_kwargs)
 
-            if getattr(model.config, "model_type", None) == "chatglm":
+            unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model)
+            if getattr(unwrapped_model.config, "model_type", None) == "chatglm":
                 values = torch.transpose(values, 0, 1)
 
             logprobs = logprobs_from_logits(logits[:, :-1, :], input_ids[:, 1:])
diff --git a/src/llmtuner/train/rm/trainer.py b/src/llmtuner/train/rm/trainer.py
index 9be64264..b018a8c4 100644
--- a/src/llmtuner/train/rm/trainer.py
+++ b/src/llmtuner/train/rm/trainer.py
@@ -40,7 +40,8 @@ class PairwiseTrainer(Trainer):
         # Compute rewards
         _, _, values = model(**inputs, output_hidden_states=True, return_dict=True)
 
-        if getattr(model.config, "model_type", None) == "chatglm":
+        unwrapped_model: "PreTrainedModel" = self.accelerator.unwrap_model(self.model)
+        if getattr(unwrapped_model.config, "model_type", None) == "chatglm":
             values = torch.transpose(values, 0, 1)
 
         # Split the inputs and rewards into two parts, chosen and rejected

From 3d291a82d34a09d0fc221a246962bcbb376b4c6b Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 30 Nov 2023 21:47:06 +0800
Subject: [PATCH 04/12] fix #1597

Former-commit-id: 327d7f7efe1fefe4bf4646c07fc4917a42c13383
---
 src/llmtuner/train/ppo/trainer.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/llmtuner/train/ppo/trainer.py b/src/llmtuner/train/ppo/trainer.py
index 1bba733b..b81aa7ff 100644
--- a/src/llmtuner/train/ppo/trainer.py
+++ b/src/llmtuner/train/ppo/trainer.py
@@ -6,7 +6,9 @@ from tqdm import tqdm
 from typing import TYPE_CHECKING, List, Optional, Tuple
 
 from transformers import BatchEncoding, GenerationConfig, Trainer, TrainerState, TrainerControl
+from transformers.utils import WEIGHTS_NAME, SAFE_WEIGHTS_NAME
 from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
+from transformers.trainer_pt_utils import remove_dummy_checkpoint
 
 from trl import PPOTrainer
 from trl.core import PPODecorators, logprobs_from_logits
@@ -55,6 +57,9 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
 
         self.state = TrainerState()
         self.control = TrainerControl()
+        self.is_deepspeed_enabled = self.accelerator.distributed_type == "DEEPSPEED" and hasattr(
+            self.accelerator.state, "deepspeed_plugin"
+        )
         self.log_callback, self.save_callback = callbacks[0], callbacks[1]
         assert isinstance(self.log_callback, LogCallback) and isinstance(self.save_callback, SavePeftModelCallback)
 
@@ -62,10 +67,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
             logger.info("max_steps is given, it will override any value given in num_train_epochs")
 
         if reward_model is not None:
-            is_deepspeed_enabled = self.accelerator.distributed_type == "DEEPSPEED" and hasattr(
-                self.accelerator.state, "deepspeed_plugin"
-            )
-            if is_deepspeed_enabled:
+            if self.is_deepspeed_enabled:
                 if not (
                     getattr(reward_model.pretrained_model, "is_loaded_in_8bit", False)
                     or getattr(reward_model.pretrained_model, "is_loaded_in_4bit", False)
@@ -345,4 +347,13 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
         Subclass and override to inject custom behavior.
         """
         if self.args.should_save:
-            self._save(output_dir)
+            try:
+                self._save(output_dir, state_dict=self.accelerator.get_state_dict(self.model))
+            except ValueError:
+                logger.warning(
+                    " stage3_gather_16bit_weights_on_model_save=false. Saving the full checkpoint instead, use"
+                    " zero_to_fp32.py to recover weights"
+                )
+                self._save(output_dir, state_dict={})
+                remove_dummy_checkpoint(self.args.should_save, output_dir, [WEIGHTS_NAME, SAFE_WEIGHTS_NAME])
+                self.model.save_checkpoint(output_dir) # wrapped model

From e400f2e8ad667ec979451eda7a308635a7e855d5 Mon Sep 17 00:00:00 2001
From: billvsme <994171686@qq.com>
Date: Thu, 30 Nov 2023 22:40:35 +0800
Subject: [PATCH 05/12] improve get_current_device

Former-commit-id: 40dfcbc3d4571ce022b6aa39db581c8b88a75b8d
---
 src/llmtuner/extras/misc.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/llmtuner/extras/misc.py b/src/llmtuner/extras/misc.py
index 672110cf..2e8d16a8 100644
--- a/src/llmtuner/extras/misc.py
+++ b/src/llmtuner/extras/misc.py
@@ -69,11 +69,11 @@ def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
 
 def get_current_device() -> str:
     import accelerate
-    dummy_accelerator = accelerate.Accelerator()
+    local_rank = int(os.environ.get('LOCAL_RANK', '0'))
     if accelerate.utils.is_xpu_available():
-        return "xpu:{}".format(dummy_accelerator.local_process_index)
+        return "xpu:{}".format(local_rank)
     else:
-        return dummy_accelerator.local_process_index if torch.cuda.is_available() else "cpu"
+        return local_rank if torch.cuda.is_available() else "cpu"
 
 
 def get_logits_processor() -> "LogitsProcessorList":

From a26f68ba47ddb96164a92010d9ada898edce927d Mon Sep 17 00:00:00 2001
From: Marco <121761685+mlinmg@users.noreply.github.com>
Date: Thu, 30 Nov 2023 16:21:34 +0100
Subject: [PATCH 06/12] Update dataset_info.json

Added the Nectar dataset already preprocessed and divided in sft and rl to which I added a preprompt to each instruction since it has been seen that this increase instruction following

Former-commit-id: 9468ee9012bfe7124fc5cc2acebcfe03a6d0cdee
---
 data/dataset_info.json | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/data/dataset_info.json b/data/dataset_info.json
index faa7931d..78d6a922 100644
--- a/data/dataset_info.json
+++ b/data/dataset_info.json
@@ -266,6 +266,12 @@
     "columns": {
       "prompt": "content"
     }
+  "nectar_rlaif": {
+    "hf_hub_url": "mlinmg/RLAIF-Nectar",
+    "ranking": true
+  },
+  "nectar_sft": {
+    "hf_hub_url": "mlinmg/SFT-Nectar"
   },
   "starcoder": {
     "hf_hub_url": "bigcode/starcoderdata",

From 7cf4e3b9c66c4d86c4b8b5ec92d547b43b857b1f Mon Sep 17 00:00:00 2001
From: samge <samge720@gmail.com>
Date: Fri, 1 Dec 2023 11:35:02 +0800
Subject: [PATCH 07/12] =?UTF-8?q?Improve=EF=BC=9A"CUDA=5FVISIBLE=5FDEVICES?=
 =?UTF-8?q?"=20read=20from=20the=20env?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Former-commit-id: 421d4de604493e1e26ec8348dab3eae138f46b86
---
 src/llmtuner/webui/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/llmtuner/webui/utils.py b/src/llmtuner/webui/utils.py
index 7c624db4..fc2c5c2c 100644
--- a/src/llmtuner/webui/utils.py
+++ b/src/llmtuner/webui/utils.py
@@ -44,7 +44,8 @@ def can_quantize(finetuning_type: str) -> Dict[str, Any]:
 def gen_cmd(args: Dict[str, Any]) -> str:
     args.pop("disable_tqdm", None)
     args["plot_loss"] = args.get("do_train", None)
-    cmd_lines = ["CUDA_VISIBLE_DEVICES=0 python src/train_bash.py "]
+    cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES') or "0"
+    cmd_lines = [f"CUDA_VISIBLE_DEVICES={cuda_visible_devices} python src/train_bash.py "]
     for k, v in args.items():
         if v is not None and v != "":
             cmd_lines.append("    --{} {} ".format(k, str(v)))

From 9a6b694e122924886cc483f093656011d0e6479c Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Fri, 1 Dec 2023 15:34:50 +0800
Subject: [PATCH 08/12] fix #1696

Former-commit-id: bf6f6aeefe65b4949633648b8711525c0029c001
---
 README.md                        |  2 ++
 README_zh.md                     |  2 ++
 data/dataset_info.json           | 13 +++++++------
 src/llmtuner/extras/callbacks.py | 29 ++++++++++++++++-------------
 4 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 65c53c18..e2b10f3a 100644
--- a/README.md
+++ b/README.md
@@ -156,6 +156,7 @@ Please refer to [constants.py](src/llmtuner/extras/constants.py) for a full list
 - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M)
 - [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa)
 - [WebNovel (zh)](https://huggingface.co/datasets/zxbsmk/webnovel_cn)
+- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
 - [Ad Gen (zh)](https://huggingface.co/datasets/HasturOfficial/adgen)
 - [ShareGPT Hyperfiltered (en)](https://huggingface.co/datasets/totally-not-an-llm/sharegpt-hyperfiltered-3k)
 - [ShareGPT4 (en&zh)](https://huggingface.co/datasets/shibing624/sharegpt_gpt4)
@@ -171,6 +172,7 @@ Please refer to [constants.py](src/llmtuner/extras/constants.py) for a full list
 - [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
 - [Open Assistant (multilingual)](https://huggingface.co/datasets/OpenAssistant/oasst1)
 - [GPT-4 Generated Data (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
+- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
 
 </details>
 
diff --git a/README_zh.md b/README_zh.md
index 7bb3c16f..9e3c0833 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -156,6 +156,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
 - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M)
 - [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa)
 - [WebNovel (zh)](https://huggingface.co/datasets/zxbsmk/webnovel_cn)
+- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
 - [Ad Gen (zh)](https://huggingface.co/datasets/HasturOfficial/adgen)
 - [ShareGPT Hyperfiltered (en)](https://huggingface.co/datasets/totally-not-an-llm/sharegpt-hyperfiltered-3k)
 - [ShareGPT4 (en&zh)](https://huggingface.co/datasets/shibing624/sharegpt_gpt4)
@@ -171,6 +172,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
 - [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
 - [Open Assistant (multilingual)](https://huggingface.co/datasets/OpenAssistant/oasst1)
 - [GPT-4 Generated Data (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
+- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
 
 </details>
 
diff --git a/data/dataset_info.json b/data/dataset_info.json
index 78d6a922..2b3f4eb7 100644
--- a/data/dataset_info.json
+++ b/data/dataset_info.json
@@ -134,6 +134,9 @@
   "webnovel": {
     "hf_hub_url": "zxbsmk/webnovel_cn"
   },
+  "nectar_sft": {
+    "hf_hub_url": "mlinmg/SFT-Nectar"
+  },
   "adgen": {
     "hf_hub_url": "HasturOfficial/adgen",
     "columns": {
@@ -216,6 +219,10 @@
     "file_sha1": "515b18ed497199131ddcc1af950345c11dc5c7fd",
     "ranking": true
   },
+  "nectar_rm": {
+    "hf_hub_url": "mlinmg/RLAIF-Nectar",
+    "ranking": true
+  },
   "wiki_demo": {
     "file_name": "wiki_demo.txt",
     "file_sha1": "e70375e28eda542a90c68213640cc371898ce181",
@@ -266,12 +273,6 @@
     "columns": {
       "prompt": "content"
     }
-  "nectar_rlaif": {
-    "hf_hub_url": "mlinmg/RLAIF-Nectar",
-    "ranking": true
-  },
-  "nectar_sft": {
-    "hf_hub_url": "mlinmg/SFT-Nectar"
   },
   "starcoder": {
     "hf_hub_url": "bigcode/starcoderdata",
diff --git a/src/llmtuner/extras/callbacks.py b/src/llmtuner/extras/callbacks.py
index 5cf62cdc..fd78391d 100644
--- a/src/llmtuner/extras/callbacks.py
+++ b/src/llmtuner/extras/callbacks.py
@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING
 from datetime import timedelta
 
 from transformers import TrainerCallback
+from transformers.modeling_utils import custom_object_save, unwrap_model
 from transformers.trainer_utils import has_length, PREFIX_CHECKPOINT_DIR
 
 from llmtuner.extras.constants import LOG_FILE_NAME
@@ -18,6 +19,16 @@ if TYPE_CHECKING:
 logger = get_logger(__name__)
 
 
+def _save_model_with_valuehead(model: "AutoModelForCausalLMWithValueHead", output_dir: str) -> None:
+    model.pretrained_model.config.save_pretrained(output_dir)
+    if model.pretrained_model.can_generate():
+        model.pretrained_model.generation_config.save_pretrained(output_dir)
+    if getattr(model, "is_peft_model", False):
+        model.pretrained_model.save_pretrained(output_dir)
+    elif getattr(model.pretrained_model, "_auto_class", None): # must not a peft model
+        custom_object_save(model.pretrained_model, output_dir, config=model.pretrained_model.config)
+
+
 class SavePeftModelCallback(TrainerCallback):
 
     def on_save(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
@@ -25,25 +36,17 @@ class SavePeftModelCallback(TrainerCallback):
         Event called after a checkpoint save.
         """
         if args.should_save:
-            output_dir = os.path.join(args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, state.global_step))
-            model: "AutoModelForCausalLMWithValueHead" = kwargs.pop("model")
-            model.pretrained_model.config.save_pretrained(output_dir)
-            if model.pretrained_model.can_generate():
-                model.pretrained_model.generation_config.save_pretrained(output_dir)
-            if getattr(model, "is_peft_model", False):
-                model.pretrained_model.save_pretrained(output_dir)
+            _save_model_with_valuehead(
+                model=unwrap_model(kwargs.pop("model")),
+                output_dir=os.path.join(args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, state.global_step))
+            )
 
     def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
         r"""
         Event called at the end of training.
         """
         if args.should_save:
-            model: "AutoModelForCausalLMWithValueHead" = kwargs.pop("model")
-            model.pretrained_model.config.save_pretrained(args.output_dir)
-            if model.pretrained_model.can_generate():
-                model.pretrained_model.generation_config.save_pretrained(args.output_dir)
-            if getattr(model, "is_peft_model", False):
-                model.pretrained_model.save_pretrained(args.output_dir)
+            _save_model_with_valuehead(model=unwrap_model(kwargs.pop("model")), output_dir=args.output_dir)
 
 
 class LogCallback(TrainerCallback):

From a44ba7a2b82415272566e0460da1535115e5f428 Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Fri, 1 Dec 2023 15:58:50 +0800
Subject: [PATCH 09/12] tiny fix

Former-commit-id: e597d3c084c8700e247bad6e26d2ee40fc3c316b
---
 src/llmtuner/extras/misc.py | 7 ++++---
 src/llmtuner/webui/utils.py | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/llmtuner/extras/misc.py b/src/llmtuner/extras/misc.py
index 2e8d16a8..4f754c5c 100644
--- a/src/llmtuner/extras/misc.py
+++ b/src/llmtuner/extras/misc.py
@@ -69,11 +69,12 @@ def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
 
 def get_current_device() -> str:
     import accelerate
-    local_rank = int(os.environ.get('LOCAL_RANK', '0'))
     if accelerate.utils.is_xpu_available():
-        return "xpu:{}".format(local_rank)
+        return "xpu:{}".format(os.environ.get("LOCAL_RANK", "0"))
+    elif accelerate.utils.is_npu_available() or torch.cuda.is_available():
+        return os.environ.get("LOCAL_RANK", "0")
     else:
-        return local_rank if torch.cuda.is_available() else "cpu"
+        return "cpu"
 
 
 def get_logits_processor() -> "LogitsProcessorList":
diff --git a/src/llmtuner/webui/utils.py b/src/llmtuner/webui/utils.py
index fc2c5c2c..34dd1804 100644
--- a/src/llmtuner/webui/utils.py
+++ b/src/llmtuner/webui/utils.py
@@ -44,8 +44,8 @@ def can_quantize(finetuning_type: str) -> Dict[str, Any]:
 def gen_cmd(args: Dict[str, Any]) -> str:
     args.pop("disable_tqdm", None)
     args["plot_loss"] = args.get("do_train", None)
-    cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES') or "0"
-    cmd_lines = [f"CUDA_VISIBLE_DEVICES={cuda_visible_devices} python src/train_bash.py "]
+    current_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
+    cmd_lines = ["CUDA_VISIBLE_DEVICES={} python src/train_bash.py ".format(current_devices)]
     for k, v in args.items():
         if v is not None and v != "":
             cmd_lines.append("    --{} {} ".format(k, str(v)))

From c5842fcd964e066992a3102b33141eda3850ba5d Mon Sep 17 00:00:00 2001
From: SamgeShao <samge720@gmail.com>
Date: Fri, 1 Dec 2023 16:37:41 +0800
Subject: [PATCH 10/12] Update .gitignore

Former-commit-id: 7cabb9903dd3830ead03c786e494d51cc09a3b66
---
 .gitignore | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 68bc17f9..47762394 100644
--- a/.gitignore
+++ b/.gitignore
@@ -157,4 +157,10 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+# The ".idea" folder can generally be ignored directly.
+.idea/
+
+# custom .gitignore
+user.config
+saves/
+cache/

From dbb8342ec056b698de0932eda47631efac78f358 Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Fri, 1 Dec 2023 17:04:37 +0800
Subject: [PATCH 11/12] add err hint

Former-commit-id: a51b8ec620e52cbfcad91d12f0acd7c73f448444
---
 .gitignore                   | 1 -
 src/llmtuner/webui/common.py | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 47762394..0355c666 100644
--- a/.gitignore
+++ b/.gitignore
@@ -157,7 +157,6 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-# The ".idea" folder can generally be ignored directly.
 .idea/
 
 # custom .gitignore
diff --git a/src/llmtuner/webui/common.py b/src/llmtuner/webui/common.py
index 55d8942b..8788754a 100644
--- a/src/llmtuner/webui/common.py
+++ b/src/llmtuner/webui/common.py
@@ -89,12 +89,12 @@ def list_checkpoint(model_name: str, finetuning_type: str) -> Dict[str, Any]:
     return gr.update(value=[], choices=checkpoints)
 
 
-def load_dataset_info(dataset_dir: str) -> Dict[str, Any]:
+def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]:
     try:
         with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f:
             return json.load(f)
-    except:
-        print("Cannot find {} in {}.".format(DATA_CONFIG, dataset_dir))
+    except Exception as err:
+        print("Cannot open {} in {} due to {}.".format(DATA_CONFIG, dataset_dir, str(err)))
         return {}
 
 

From e964fa7df7a2f0551455b3d475b2cccd24c7447c Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Fri, 1 Dec 2023 17:13:22 +0800
Subject: [PATCH 12/12] fix err hint

Former-commit-id: a5a248d569f8bf97cb9be71221783d97c666583c
---
 src/llmtuner/hparams/data_args.py | 11 +++++++----
 src/llmtuner/webui/common.py      | 11 ++++++++---
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/llmtuner/hparams/data_args.py b/src/llmtuner/hparams/data_args.py
index 0b74c3cb..cea89198 100644
--- a/src/llmtuner/hparams/data_args.py
+++ b/src/llmtuner/hparams/data_args.py
@@ -4,6 +4,9 @@ from typing import List, Literal, Optional
 from dataclasses import dataclass, field
 
 
+DATA_CONFIG = "dataset_info.json"
+
+
 @dataclass
 class DatasetAttr:
 
@@ -130,11 +133,11 @@ class DataArguments:
         self.seed = seed
         dataset_names = [ds.strip() for ds in self.dataset.split(",")] if self.dataset is not None else []
         try:
-            with open(os.path.join(self.dataset_dir, "dataset_info.json"), "r") as f:
+            with open(os.path.join(self.dataset_dir, DATA_CONFIG), "r") as f:
                 dataset_info = json.load(f)
-        except Exception:
+        except Exception as err:
             if self.dataset is not None:
-                raise ValueError("Cannot find dataset_info.json in `dataset_dir`.")
+                raise ValueError("Cannot open {} due to {}.".format(os.path.join(self.dataset_dir, DATA_CONFIG), str(err)))
             dataset_info = None
 
         prompt_list = self.system_prompt.split("|") if self.system_prompt else [None]
@@ -147,7 +150,7 @@ class DataArguments:
         self.dataset_list: List[DatasetAttr] = []
         for i, name in enumerate(dataset_names):
             if name not in dataset_info:
-                raise ValueError("Undefined dataset {} in dataset_info.json.".format(name))
+                raise ValueError("Undefined dataset {} in {}.".format(name, DATA_CONFIG))
 
             if "hf_hub_url" in dataset_info[name]:
                 dataset_attr = DatasetAttr("hf_hub", dataset_name=dataset_info[name]["hf_hub_url"])
diff --git a/src/llmtuner/webui/common.py b/src/llmtuner/webui/common.py
index 8788754a..40f10c6a 100644
--- a/src/llmtuner/webui/common.py
+++ b/src/llmtuner/webui/common.py
@@ -11,14 +11,19 @@ from transformers.utils import (
     ADAPTER_SAFE_WEIGHTS_NAME
 )
 
-from llmtuner.extras.constants import DEFAULT_MODULE, DEFAULT_TEMPLATE, SUPPORTED_MODELS, TRAINING_STAGES
+from llmtuner.extras.constants import (
+    DEFAULT_MODULE,
+    DEFAULT_TEMPLATE,
+    SUPPORTED_MODELS,
+    TRAINING_STAGES
+)
+from llmtuner.hparams.data_args import DATA_CONFIG
 
 
 DEFAULT_CACHE_DIR = "cache"
 DEFAULT_DATA_DIR = "data"
 DEFAULT_SAVE_DIR = "saves"
 USER_CONFIG = "user.config"
-DATA_CONFIG = "dataset_info.json"
 CKPT_NAMES = [
     WEIGHTS_NAME,
     WEIGHTS_INDEX_NAME,
@@ -94,7 +99,7 @@ def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]:
         with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f:
             return json.load(f)
     except Exception as err:
-        print("Cannot open {} in {} due to {}.".format(DATA_CONFIG, dataset_dir, str(err)))
+        print("Cannot open {} due to {}.".format(os.path.join(dataset_dir, DATA_CONFIG), str(err)))
         return {}