From 4adec327de1937eefbfaa7c928edf44d549e3c93 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 21 May 2024 17:53:06 +0800
Subject: [PATCH 1/4] fix #3847

Former-commit-id: d206b306ca4eadc8b3d4feaf490ad12f9452e562
---
 README.md                       | 2 +-
 README_zh.md                    | 2 +-
 src/llamafactory/extras/misc.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index de846645..b543afaa 100644
--- a/README.md
+++ b/README.md
@@ -289,7 +289,7 @@ huggingface-cli login
 | datasets     | 2.14.3  | 2.19.1    |
 | accelerate   | 0.27.2  | 0.30.1    |
 | peft         | 0.9.0   | 0.11.1    |
-| trl          | 0.8.1   | 0.8.6     |
+| trl          | 0.8.2   | 0.8.6     |
 
 | Optional     | Minimum | Recommend |
 | ------------ | ------- | --------- |
diff --git a/README_zh.md b/README_zh.md
index 26db95d7..2eb2cec0 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -289,7 +289,7 @@ huggingface-cli login
 | datasets     | 2.14.3  | 2.19.1    |
 | accelerate   | 0.27.2  | 0.30.1    |
 | peft         | 0.9.0   | 0.11.1    |
-| trl          | 0.8.1   | 0.8.6     |
+| trl          | 0.8.2   | 0.8.6     |
 
 | 可选项       | 至少     | 推荐      |
 | ------------ | ------- | --------- |
diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py
index 0addf315..0dc07d28 100644
--- a/src/llamafactory/extras/misc.py
+++ b/src/llamafactory/extras/misc.py
@@ -65,7 +65,7 @@ def check_dependencies() -> None:
         require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3")
         require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2")
         require_version("peft>=0.10.0", "To fix: pip install peft>=0.10.0")
-        require_version("trl>=0.8.1", "To fix: pip install trl>=0.8.1")
+        require_version("trl>=0.8.2", "To fix: pip install trl>=0.8.2")
 
 
 def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:

From cccce564bd4bb17e789caa1a666bb205fbc2e7dc Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 21 May 2024 18:22:32 +0800
Subject: [PATCH 2/4] update wechat

Former-commit-id: 6613349562194b48c5fc57aa68e620b8fa83fc0a
---
 README.md    | 2 ++
 README_zh.md | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/README.md b/README.md
index b543afaa..ecb87e2f 100644
--- a/README.md
+++ b/README.md
@@ -345,6 +345,8 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec
 
 <details><summary>For Ascend NPU users</summary>
 
+Join [NPU user group](assets/wechat_npu.jpg).
+
 To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[torch-npu](https://gitee.com/ascend/pytorch)** library and the **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**.
 
 | Requirement  | Minimum | Recommend |
diff --git a/README_zh.md b/README_zh.md
index 2eb2cec0..3ef92697 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -345,6 +345,8 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
 
 <details><summary>昇腾 NPU 用户指南</summary>
 
+加入 [NPU 用户群](assets/wechat_npu.jpg)。
+
 如果使用昇腾 NPU 设备进行（分布式）训练或推理，需要安装 **[torch-npu](https://gitee.com/ascend/pytorch)** 库和 **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。
 
 | 依赖项       | 至少     | 推荐      |

From 09e78272c2bfed79a72f4ebcda1fb9da7f2980e4 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 21 May 2024 18:30:59 +0800
Subject: [PATCH 3/4] Update README_zh.md

Former-commit-id: 34c4ba6bf9bb89170446fb396aa06ae44d251de0
---
 data/README_zh.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/README_zh.md b/data/README_zh.md
index d8a2419e..aff6fdb1 100644
--- a/data/README_zh.md
+++ b/data/README_zh.md
@@ -7,7 +7,7 @@
   "hf_hub_url": "Hugging Face 的数据集仓库地址（若指定，则忽略 script_url 和 file_name）",
   "ms_hub_url": "ModelScope 的数据集仓库地址（若指定，则忽略 script_url 和 file_name）",
   "script_url": "包含数据加载脚本的本地文件夹名称（若指定，则忽略 file_name）",
-  "file_name": "该目录下数据集文件的名称（若上述参数未指定，则此项必需）",
+  "file_name": "该目录下数据集文件夹或文件的名称（若上述参数未指定，则此项必需）",
   "formatting": "数据集格式（可选，默认：alpaca，可以为 alpaca 或 sharegpt）",
   "ranking": "是否为偏好数据集（可选，默认：False）",
   "subset": "数据集子集的名称（可选，默认：None）",

From 2b65f8bd5cc1807ab5ed1c2cb9cedf09b9138d9a Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Tue, 21 May 2024 20:03:09 +0800
Subject: [PATCH 4/4] fix paligemma sft

Former-commit-id: 60682d04414be37e611d6470618a8d599703942b
---
 src/llamafactory/data/preprocess.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py
index 4bc5ad3c..7bf9d4bc 100644
--- a/src/llamafactory/data/preprocess.py
+++ b/src/llamafactory/data/preprocess.py
@@ -89,7 +89,7 @@ def preprocess_supervised_dataset(
         if processor is not None and hasattr(processor, "image_seq_length"):  # paligemma case
             image_token_id = tokenizer.convert_tokens_to_ids(IMAGE_TOKEN)
             input_ids += [image_token_id] * getattr(processor, "image_seq_length")
-            labels += [image_token_id] * getattr(processor, "image_seq_length")
+            labels += [IGNORE_INDEX] * getattr(processor, "image_seq_length")
 
         for turn_idx, (source_ids, target_ids) in enumerate(
             template.encode_multiturn(