From 4adec327de1937eefbfaa7c928edf44d549e3c93 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 21 May 2024 17:53:06 +0800 Subject: [PATCH 1/4] fix #3847 Former-commit-id: d206b306ca4eadc8b3d4feaf490ad12f9452e562 --- README.md | 2 +- README_zh.md | 2 +- src/llamafactory/extras/misc.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index de846645..b543afaa 100644 --- a/README.md +++ b/README.md @@ -289,7 +289,7 @@ huggingface-cli login | datasets | 2.14.3 | 2.19.1 | | accelerate | 0.27.2 | 0.30.1 | | peft | 0.9.0 | 0.11.1 | -| trl | 0.8.1 | 0.8.6 | +| trl | 0.8.2 | 0.8.6 | | Optional | Minimum | Recommend | | ------------ | ------- | --------- | diff --git a/README_zh.md b/README_zh.md index 26db95d7..2eb2cec0 100644 --- a/README_zh.md +++ b/README_zh.md @@ -289,7 +289,7 @@ huggingface-cli login | datasets | 2.14.3 | 2.19.1 | | accelerate | 0.27.2 | 0.30.1 | | peft | 0.9.0 | 0.11.1 | -| trl | 0.8.1 | 0.8.6 | +| trl | 0.8.2 | 0.8.6 | | 可选项 | 至少 | 推荐 | | ------------ | ------- | --------- | diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 0addf315..0dc07d28 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -65,7 +65,7 @@ def check_dependencies() -> None: require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3") require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2") require_version("peft>=0.10.0", "To fix: pip install peft>=0.10.0") - require_version("trl>=0.8.1", "To fix: pip install trl>=0.8.1") + require_version("trl>=0.8.2", "To fix: pip install trl>=0.8.2") def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: From cccce564bd4bb17e789caa1a666bb205fbc2e7dc Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 21 May 2024 18:22:32 +0800 Subject: [PATCH 2/4] update wechat Former-commit-id: 6613349562194b48c5fc57aa68e620b8fa83fc0a --- README.md | 2 ++ README_zh.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index b543afaa..ecb87e2f 100644 --- a/README.md +++ b/README.md @@ -345,6 +345,8 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec
For Ascend NPU users +Join [NPU user group](assets/wechat_npu.jpg). + To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[torch-npu](https://gitee.com/ascend/pytorch)** library and the **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. | Requirement | Minimum | Recommend | diff --git a/README_zh.md b/README_zh.md index 2eb2cec0..3ef92697 100644 --- a/README_zh.md +++ b/README_zh.md @@ -345,6 +345,8 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
昇腾 NPU 用户指南 +加入 [NPU 用户群](assets/wechat_npu.jpg)。 + 如果使用昇腾 NPU 设备进行(分布式)训练或推理,需要安装 **[torch-npu](https://gitee.com/ascend/pytorch)** 库和 **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。 | 依赖项 | 至少 | 推荐 | From 09e78272c2bfed79a72f4ebcda1fb9da7f2980e4 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 21 May 2024 18:30:59 +0800 Subject: [PATCH 3/4] Update README_zh.md Former-commit-id: 34c4ba6bf9bb89170446fb396aa06ae44d251de0 --- data/README_zh.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/README_zh.md b/data/README_zh.md index d8a2419e..aff6fdb1 100644 --- a/data/README_zh.md +++ b/data/README_zh.md @@ -7,7 +7,7 @@ "hf_hub_url": "Hugging Face 的数据集仓库地址(若指定,则忽略 script_url 和 file_name)", "ms_hub_url": "ModelScope 的数据集仓库地址(若指定,则忽略 script_url 和 file_name)", "script_url": "包含数据加载脚本的本地文件夹名称(若指定,则忽略 file_name)", - "file_name": "该目录下数据集文件的名称(若上述参数未指定,则此项必需)", + "file_name": "该目录下数据集文件夹或文件的名称(若上述参数未指定,则此项必需)", "formatting": "数据集格式(可选,默认:alpaca,可以为 alpaca 或 sharegpt)", "ranking": "是否为偏好数据集(可选,默认:False)", "subset": "数据集子集的名称(可选,默认:None)", From 2b65f8bd5cc1807ab5ed1c2cb9cedf09b9138d9a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 21 May 2024 20:03:09 +0800 Subject: [PATCH 4/4] fix paligemma sft Former-commit-id: 60682d04414be37e611d6470618a8d599703942b --- src/llamafactory/data/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py index 4bc5ad3c..7bf9d4bc 100644 --- a/src/llamafactory/data/preprocess.py +++ b/src/llamafactory/data/preprocess.py @@ -89,7 +89,7 @@ def preprocess_supervised_dataset( if processor is not None and hasattr(processor, "image_seq_length"): # paligemma case image_token_id = tokenizer.convert_tokens_to_ids(IMAGE_TOKEN) input_ids += [image_token_id] * getattr(processor, "image_seq_length") - labels += [image_token_id] * getattr(processor, "image_seq_length") + labels += [IGNORE_INDEX] * getattr(processor, "image_seq_length") for turn_idx, (source_ids, target_ids) in enumerate( template.encode_multiturn(