diff --git a/README.md b/README.md index 13ead458..92bbcc88 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ https://github.com/user-attachments/assets/7c96b465-9df7-45f4-8053-bf03e58386d3 Choose your path: - **Colab**: https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing -- **PAI-DSW**: https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory +- **PAI-DSW**: [Llama3 Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl) - **Local machine**: Please refer to [usage](#getting-started) - **Documentation (WIP)**: https://llamafactory.readthedocs.io/zh-cn/latest/ @@ -175,7 +175,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | -| [Llama 3/Llama 3.1](https://huggingface.co/meta-llama) | 8B/70B | llama3 | +| [Llama 3/Llama 3.1/Llama3.2](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 | | [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava | | [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 | | [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | @@ -183,8 +183,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [PaliGemma](https://huggingface.co/google) | 3B | paligemma | | [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | | [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | -| [Qwen/Qwen1.5/Qwen2 (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/4B/7B/14B/32B/72B/110B | qwen | -| [Qwen2.5 (Code/Math)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B | qwen | +| [Qwen (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen2-VL](https://huggingface.co/Qwen) | 2B/7B/72B | qwen2_vl | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | diff --git a/README_zh.md b/README_zh.md index 0c2f970f..0b02f35f 100644 --- a/README_zh.md +++ b/README_zh.md @@ -26,7 +26,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 选择你的打开方式: - **Colab**:https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing -- **PAI-DSW**:https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory +- **PAI-DSW**:[Llama3 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl) - **本地机器**:请见[如何使用](#如何使用) - **入门教程**:https://zhuanlan.zhihu.com/p/695287607 - **框架文档**:https://llamafactory.readthedocs.io/zh-cn/latest/ @@ -176,7 +176,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | -| [Llama 3/Llama 3.1](https://huggingface.co/meta-llama) | 8B/70B | llama3 | +| [Llama 3/Llama 3.1/Llama3.2](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 | | [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava | | [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 | | [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | @@ -184,8 +184,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | [PaliGemma](https://huggingface.co/google) | 3B | paligemma | | [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | | [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | -| [Qwen/Qwen1.5/Qwen2 (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/4B/7B/14B/32B/72B/110B | qwen | -| [Qwen2.5 (Code/Math)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B | qwen | +| [Qwen (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen2-VL](https://huggingface.co/Qwen) | 2B/7B/72B | qwen2_vl | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | diff --git a/assets/wechat.jpg b/assets/wechat.jpg index f9ca1a21..f2d57406 100644 Binary files a/assets/wechat.jpg and b/assets/wechat.jpg differ diff --git a/assets/wechat_npu.jpg b/assets/wechat_npu.jpg index 32789dda..7708e35a 100644 Binary files a/assets/wechat_npu.jpg and b/assets/wechat_npu.jpg differ diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index a43baf21..d03ece88 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -12,6 +12,9 @@ ARG INSTALL_BNB=false ARG INSTALL_VLLM=false ARG INSTALL_DEEPSPEED=false ARG INSTALL_FLASHATTN=false +ARG INSTALL_LIGER_KERNEL=false +ARG INSTALL_HQQ=false +ARG INSTALL_EETQ=false ARG PIP_INDEX=https://pypi.org/simple # Set the working directory @@ -38,6 +41,15 @@ RUN EXTRA_PACKAGES="metrics"; \ if [ "$INSTALL_DEEPSPEED" == "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ fi; \ + if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \ + fi; \ + if [ "$INSTALL_HQQ" == "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \ + fi; \ + if [ "$INSTALL_EETQ" == "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},eetq"; \ + fi; \ pip install -e ".[$EXTRA_PACKAGES]" # Rebuild flash attention diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml index 16267dc3..7af06b74 100644 --- a/docker/docker-cuda/docker-compose.yml +++ b/docker/docker-cuda/docker-compose.yml @@ -8,6 +8,9 @@ services: INSTALL_VLLM: false INSTALL_DEEPSPEED: false INSTALL_FLASHATTN: false + INSTALL_LIGER_KERNEL: false + INSTALL_HQQ: false + INSTALL_EETQ: false PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: diff --git a/docker/docker-rocm/Dockerfile b/docker/docker-rocm/Dockerfile index 3ce425fd..62bd78f5 100644 --- a/docker/docker-rocm/Dockerfile +++ b/docker/docker-rocm/Dockerfile @@ -10,6 +10,8 @@ ARG INSTALL_BNB=false ARG INSTALL_VLLM=false ARG INSTALL_DEEPSPEED=false ARG INSTALL_FLASHATTN=false +ARG INSTALL_LIGER_KERNEL=false +ARG INSTALL_HQQ=false ARG PIP_INDEX=https://pypi.org/simple # Set the working directory @@ -36,6 +38,12 @@ RUN EXTRA_PACKAGES="metrics"; \ if [ "$INSTALL_DEEPSPEED" == "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ fi; \ + if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \ + fi; \ + if [ "$INSTALL_HQQ" == "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \ + fi; \ pip install -e ".[$EXTRA_PACKAGES]" # Rebuild flash attention diff --git a/docker/docker-rocm/docker-compose.yml b/docker/docker-rocm/docker-compose.yml index 923bd067..2a4ea960 100644 --- a/docker/docker-rocm/docker-compose.yml +++ b/docker/docker-rocm/docker-compose.yml @@ -8,6 +8,8 @@ services: INSTALL_VLLM: false INSTALL_DEEPSPEED: false INSTALL_FLASHATTN: false + INSTALL_LIGER_KERNEL: false + INSTALL_HQQ: false PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 9b844d88..af70259b 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -49,6 +49,7 @@ class Template: stop_words: List[str] efficient_eos: bool replace_eos: bool + replace_jinja_template: bool mm_plugin: "BasePlugin" def encode_oneturn( @@ -214,6 +215,7 @@ def _register_template( stop_words: Sequence[str] = [], efficient_eos: bool = False, replace_eos: bool = False, + replace_jinja_template: bool = True, mm_plugin: "BasePlugin" = get_mm_plugin(name="base"), ) -> None: r""" @@ -263,6 +265,7 @@ def _register_template( stop_words=stop_words, efficient_eos=efficient_eos, replace_eos=replace_eos, + replace_jinja_template=replace_jinja_template, mm_plugin=mm_plugin, ) @@ -398,10 +401,11 @@ def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args: if num_added_tokens > 0: logger.warning("New tokens have been added, make sure `resize_vocab` is True.") - try: - tokenizer.chat_template = _get_jinja_template(template, tokenizer) - except ValueError: - logger.info("Cannot add this chat template to tokenizer.") + if template.replace_jinja_template: + try: + tokenizer.chat_template = _get_jinja_template(template, tokenizer) + except ValueError: + logger.info("Cannot add this chat template to tokenizer.") return template @@ -664,6 +668,7 @@ _register_template( format_separator=EmptyFormatter(slots=["\n"]), format_prefix=EmptyFormatter(slots=[{"bos_token"}]), efficient_eos=True, + replace_jinja_template=False, ) @@ -740,6 +745,7 @@ _register_template( format_prefix=EmptyFormatter(slots=[{"bos_token"}]), stop_words=["<|eot_id|>"], replace_eos=True, + replace_jinja_template=False, ) @@ -838,6 +844,7 @@ _register_template( default_system="You are a helpful assistant.", stop_words=["<|im_end|>"], replace_eos=True, + replace_jinja_template=False, ) @@ -850,6 +857,7 @@ _register_template( default_system="You are a helpful assistant.", stop_words=["<|im_end|>"], replace_eos=True, + replace_jinja_template=False, mm_plugin=get_mm_plugin(name="qwen2_vl", image_token="<|image_pad|>", video_token="<|video_pad|>"), ) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index e88f0da7..ef075cf9 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -798,6 +798,29 @@ register_model_group( ) +register_model_group( + models={ + "LLaMA3.2-1B": { + DownloadSource.DEFAULT: "meta-llama/Llama-3.2-1B", + DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-1B", + }, + "LLaMA3.2-3B": { + DownloadSource.DEFAULT: "meta-llama/Llama-3.2-3B", + DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-3B", + }, + "LLaMA3.2-1B-Instruct": { + DownloadSource.DEFAULT: "meta-llama/Llama-3.2-1B-Instruct", + DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-1B-Instruct", + }, + "LLaMA3.2-3B-Instruct": { + DownloadSource.DEFAULT: "meta-llama/Llama-3.2-3B-Instruct", + DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-3B-Instruct", + }, + }, + template="llama3", +) + + register_model_group( models={ "LLaVA1.5-7B-Chat": { diff --git a/tests/data/test_template.py b/tests/data/test_template.py index a327df22..18d03958 100644 --- a/tests/data/test_template.py +++ b/tests/data/test_template.py @@ -19,6 +19,7 @@ import pytest from transformers import AutoTokenizer from llamafactory.data import get_template_and_fix_tokenizer +from llamafactory.data.template import _get_jinja_template from llamafactory.hparams import DataArguments @@ -117,7 +118,8 @@ def test_encode_multiturn(use_fast: bool): def test_jinja_template(use_fast: bool): tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast) ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast) - get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3")) + template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3")) + tokenizer.chat_template = _get_jinja_template(template, tokenizer) # llama3 template no replace assert tokenizer.chat_template != ref_tokenizer.chat_template assert tokenizer.apply_chat_template(MESSAGES) == ref_tokenizer.apply_chat_template(MESSAGES)