diff --git a/README.md b/README.md index de846645..ecb87e2f 100644 --- a/README.md +++ b/README.md @@ -289,7 +289,7 @@ huggingface-cli login | datasets | 2.14.3 | 2.19.1 | | accelerate | 0.27.2 | 0.30.1 | | peft | 0.9.0 | 0.11.1 | -| trl | 0.8.1 | 0.8.6 | +| trl | 0.8.2 | 0.8.6 | | Optional | Minimum | Recommend | | ------------ | ------- | --------- | @@ -345,6 +345,8 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec
For Ascend NPU users +Join [NPU user group](assets/wechat_npu.jpg). + To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[torch-npu](https://gitee.com/ascend/pytorch)** library and the **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. | Requirement | Minimum | Recommend | diff --git a/README_zh.md b/README_zh.md index 26db95d7..3ef92697 100644 --- a/README_zh.md +++ b/README_zh.md @@ -289,7 +289,7 @@ huggingface-cli login | datasets | 2.14.3 | 2.19.1 | | accelerate | 0.27.2 | 0.30.1 | | peft | 0.9.0 | 0.11.1 | -| trl | 0.8.1 | 0.8.6 | +| trl | 0.8.2 | 0.8.6 | | 可选项 | 至少 | 推荐 | | ------------ | ------- | --------- | @@ -345,6 +345,8 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
昇腾 NPU 用户指南 +加入 [NPU 用户群](assets/wechat_npu.jpg)。 + 如果使用昇腾 NPU 设备进行(分布式)训练或推理,需要安装 **[torch-npu](https://gitee.com/ascend/pytorch)** 库和 **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。 | 依赖项 | 至少 | 推荐 | diff --git a/assets/wechat.jpg b/assets/wechat.jpg index 63a44b5e..a5d44ade 100644 Binary files a/assets/wechat.jpg and b/assets/wechat.jpg differ diff --git a/assets/wechat_npu.jpg b/assets/wechat_npu.jpg new file mode 100644 index 00000000..353e7603 Binary files /dev/null and b/assets/wechat_npu.jpg differ diff --git a/data/README_zh.md b/data/README_zh.md index d8a2419e..aff6fdb1 100644 --- a/data/README_zh.md +++ b/data/README_zh.md @@ -7,7 +7,7 @@ "hf_hub_url": "Hugging Face 的数据集仓库地址(若指定,则忽略 script_url 和 file_name)", "ms_hub_url": "ModelScope 的数据集仓库地址(若指定,则忽略 script_url 和 file_name)", "script_url": "包含数据加载脚本的本地文件夹名称(若指定,则忽略 file_name)", - "file_name": "该目录下数据集文件的名称(若上述参数未指定,则此项必需)", + "file_name": "该目录下数据集文件夹或文件的名称(若上述参数未指定,则此项必需)", "formatting": "数据集格式(可选,默认:alpaca,可以为 alpaca 或 sharegpt)", "ranking": "是否为偏好数据集(可选,默认:False)", "subset": "数据集子集的名称(可选,默认:None)", diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py index 4bc5ad3c..7bf9d4bc 100644 --- a/src/llamafactory/data/preprocess.py +++ b/src/llamafactory/data/preprocess.py @@ -89,7 +89,7 @@ def preprocess_supervised_dataset( if processor is not None and hasattr(processor, "image_seq_length"): # paligemma case image_token_id = tokenizer.convert_tokens_to_ids(IMAGE_TOKEN) input_ids += [image_token_id] * getattr(processor, "image_seq_length") - labels += [image_token_id] * getattr(processor, "image_seq_length") + labels += [IGNORE_INDEX] * getattr(processor, "image_seq_length") for turn_idx, (source_ids, target_ids) in enumerate( template.encode_multiturn( diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 0addf315..0dc07d28 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -65,7 +65,7 @@ def check_dependencies() -> None: require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3") require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2") require_version("peft>=0.10.0", "To fix: pip install peft>=0.10.0") - require_version("trl>=0.8.1", "To fix: pip install trl>=0.8.1") + require_version("trl>=0.8.2", "To fix: pip install trl>=0.8.2") def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: