Merge branches 'pixtral-patch' and 'pixtral-patch' of https://github.com/Kuangdd01/LLaMA-Factory-X into pixtral-patch

Former-commit-id: 5e64b0c37165a50296036a6e09e09193fb2ad644
2025-11-07 22:00:03 +08:00 · 2024-09-26 12:18:25 +08:00 · 2024-09-26 12:18:25 +08:00 · 35e44143fd
commit 35e44143fd
parent c436d6ea0b 5b128e6b0e
11 changed files with 69 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -26,7 +26,7 @@ https://github.com/user-attachments/assets/7c96b465-9df7-45f4-8053-bf03e58386d3
 Choose your path:

 - **Colab**: https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing
- **PAI-DSW**: https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory
+- **PAI-DSW**: [Llama3 Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl)
 - **Local machine**: Please refer to [usage](#getting-started)
 - **Documentation (WIP)**: https://llamafactory.readthedocs.io/zh-cn/latest/

@ -175,7 +175,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [InternLM2/InternLM2.5](https://huggingface.co/internlm)          | 7B/20B                           | intern2   |
 | [Llama](https://github.com/facebookresearch/llama)                | 7B/13B/33B/65B                   | -         |
 | [Llama 2](https://huggingface.co/meta-llama)                      | 7B/13B/70B                       | llama2    |
-| [Llama 3/Llama 3.1](https://huggingface.co/meta-llama)            | 8B/70B                           | llama3    |
+| [Llama 3/Llama 3.1/Llama3.2](https://huggingface.co/meta-llama)   | 1B/3B/8B/70B                     | llama3    |
 | [LLaVA-1.5](https://huggingface.co/llava-hf)                      | 7B/13B                           | llava     |
 | [MiniCPM](https://huggingface.co/openbmb)                         | 1B/2B/4B                         | cpm/cpm3  |
 | [Mistral/Mixtral](https://huggingface.co/mistralai)               | 7B/8x7B/8x22B                    | mistral   |
@ -183,8 +183,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [PaliGemma](https://huggingface.co/google)                        | 3B                               | paligemma |
 | [Phi-1.5/Phi-2](https://huggingface.co/microsoft)                 | 1.3B/2.7B                        | -         |
 | [Phi-3](https://huggingface.co/microsoft)                         | 4B/7B/14B                        | phi       |
-| [Qwen/Qwen1.5/Qwen2 (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/4B/7B/14B/32B/72B/110B | qwen      |
-| [Qwen2.5 (Code/Math)](https://huggingface.co/Qwen)                | 0.5B/1.5B/3B/7B/14B/32B/72B      | qwen      |
+| [Qwen (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen)       | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen      |
 | [Qwen2-VL](https://huggingface.co/Qwen)                           | 2B/7B/72B                        | qwen2_vl  |
 | [StarCoder 2](https://huggingface.co/bigcode)                     | 3B/7B/15B                        | -         |
 | [XVERSE](https://huggingface.co/xverse)                           | 7B/13B/65B                       | xverse    |
--- a/README_zh.md
+++ b/README_zh.md
@ -26,7 +26,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
 选择你的打开方式：

 - **Colab**：https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing
- **PAI-DSW**：https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory
+- **PAI-DSW**：[Llama3 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl)
 - **本地机器**：请见[如何使用](#如何使用)
 - **入门教程**：https://zhuanlan.zhihu.com/p/695287607
 - **框架文档**：https://llamafactory.readthedocs.io/zh-cn/latest/
@ -176,7 +176,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
 | [InternLM2/InternLM2.5](https://huggingface.co/internlm)          | 7B/20B                           | intern2   |
 | [Llama](https://github.com/facebookresearch/llama)                | 7B/13B/33B/65B                   | -         |
 | [Llama 2](https://huggingface.co/meta-llama)                      | 7B/13B/70B                       | llama2    |
-| [Llama 3/Llama 3.1](https://huggingface.co/meta-llama)            | 8B/70B                           | llama3    |
+| [Llama 3/Llama 3.1/Llama3.2](https://huggingface.co/meta-llama)   | 1B/3B/8B/70B                     | llama3    |
 | [LLaVA-1.5](https://huggingface.co/llava-hf)                      | 7B/13B                           | llava     |
 | [MiniCPM](https://huggingface.co/openbmb)                         | 1B/2B/4B                         | cpm/cpm3  |
 | [Mistral/Mixtral](https://huggingface.co/mistralai)               | 7B/8x7B/8x22B                    | mistral   |
@ -184,8 +184,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
 | [PaliGemma](https://huggingface.co/google)                        | 3B                               | paligemma |
 | [Phi-1.5/Phi-2](https://huggingface.co/microsoft)                 | 1.3B/2.7B                        | -         |
 | [Phi-3](https://huggingface.co/microsoft)                         | 4B/7B/14B                        | phi       |
-| [Qwen/Qwen1.5/Qwen2 (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/4B/7B/14B/32B/72B/110B | qwen      |
-| [Qwen2.5 (Code/Math)](https://huggingface.co/Qwen)                | 0.5B/1.5B/3B/7B/14B/32B/72B      | qwen      |
+| [Qwen (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen)       | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen      |
 | [Qwen2-VL](https://huggingface.co/Qwen)                           | 2B/7B/72B                        | qwen2_vl  |
 | [StarCoder 2](https://huggingface.co/bigcode)                     | 3B/7B/15B                        | -         |
 | [XVERSE](https://huggingface.co/xverse)                           | 7B/13B/65B                       | xverse    |
--- a/assets/wechat.jpg
+++ b/assets/wechat.jpg
--- a/assets/wechat_npu.jpg
+++ b/assets/wechat_npu.jpg
--- a/docker/docker-cuda/Dockerfile
+++ b/docker/docker-cuda/Dockerfile
@ -12,6 +12,9 @@ ARG INSTALL_BNB=false
 ARG INSTALL_VLLM=false
 ARG INSTALL_DEEPSPEED=false
 ARG INSTALL_FLASHATTN=false
+ARG INSTALL_LIGER_KERNEL=false
+ARG INSTALL_HQQ=false
+ARG INSTALL_EETQ=false
 ARG PIP_INDEX=https://pypi.org/simple

 # Set the working directory
@ -38,6 +41,15 @@ RUN EXTRA_PACKAGES="metrics"; \
    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
    fi; \
+    if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
+    fi; \
+    if [ "$INSTALL_HQQ" == "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
+    fi; \
+    if [ "$INSTALL_EETQ" == "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},eetq"; \
+    fi; \
    pip install -e ".[$EXTRA_PACKAGES]"

 # Rebuild flash attention
--- a/docker/docker-cuda/docker-compose.yml
+++ b/docker/docker-cuda/docker-compose.yml
@ -8,6 +8,9 @@ services:
        INSTALL_VLLM: false
        INSTALL_DEEPSPEED: false
        INSTALL_FLASHATTN: false
+        INSTALL_LIGER_KERNEL: false
+        INSTALL_HQQ: false
+        INSTALL_EETQ: false
        PIP_INDEX: https://pypi.org/simple
    container_name: llamafactory
    volumes:
--- a/docker/docker-rocm/Dockerfile
+++ b/docker/docker-rocm/Dockerfile
@ -10,6 +10,8 @@ ARG INSTALL_BNB=false
 ARG INSTALL_VLLM=false
 ARG INSTALL_DEEPSPEED=false
 ARG INSTALL_FLASHATTN=false
+ARG INSTALL_LIGER_KERNEL=false
+ARG INSTALL_HQQ=false
 ARG PIP_INDEX=https://pypi.org/simple

 # Set the working directory
@ -36,6 +38,12 @@ RUN EXTRA_PACKAGES="metrics"; \
    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
    fi; \
+    if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
+    fi; \
+    if [ "$INSTALL_HQQ" == "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
+    fi; \
    pip install -e ".[$EXTRA_PACKAGES]"

 # Rebuild flash attention
--- a/docker/docker-rocm/docker-compose.yml
+++ b/docker/docker-rocm/docker-compose.yml
@ -8,6 +8,8 @@ services:
        INSTALL_VLLM: false
        INSTALL_DEEPSPEED: false
        INSTALL_FLASHATTN: false
+        INSTALL_LIGER_KERNEL: false
+        INSTALL_HQQ: false
        PIP_INDEX: https://pypi.org/simple
    container_name: llamafactory
    volumes:
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@ -49,6 +49,7 @@ class Template:
    stop_words: List[str]
    efficient_eos: bool
    replace_eos: bool
+    replace_jinja_template: bool
    mm_plugin: "BasePlugin"

    def encode_oneturn(
@ -214,6 +215,7 @@ def _register_template(
    stop_words: Sequence[str] = [],
    efficient_eos: bool = False,
    replace_eos: bool = False,
+    replace_jinja_template: bool = True,
    mm_plugin: "BasePlugin" = get_mm_plugin(name="base"),
 ) -> None:
    r"""
@ -263,6 +265,7 @@ def _register_template(
        stop_words=stop_words,
        efficient_eos=efficient_eos,
        replace_eos=replace_eos,
+        replace_jinja_template=replace_jinja_template,
        mm_plugin=mm_plugin,
    )

@ -398,10 +401,11 @@ def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args:
        if num_added_tokens > 0:
            logger.warning("New tokens have been added, make sure `resize_vocab` is True.")

-    try:
-        tokenizer.chat_template = _get_jinja_template(template, tokenizer)
-    except ValueError:
-        logger.info("Cannot add this chat template to tokenizer.")
+    if template.replace_jinja_template:
+        try:
+            tokenizer.chat_template = _get_jinja_template(template, tokenizer)
+        except ValueError:
+            logger.info("Cannot add this chat template to tokenizer.")

    return template

@ -664,6 +668,7 @@ _register_template(
    format_separator=EmptyFormatter(slots=["<end_of_turn>\n"]),
    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
    efficient_eos=True,
+    replace_jinja_template=False,
 )


@ -740,6 +745,7 @@ _register_template(
    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
    stop_words=["<|eot_id|>"],
    replace_eos=True,
+    replace_jinja_template=False,
 )


@ -838,6 +844,7 @@ _register_template(
    default_system="You are a helpful assistant.",
    stop_words=["<|im_end|>"],
    replace_eos=True,
+    replace_jinja_template=False,
 )


@ -850,6 +857,7 @@ _register_template(
    default_system="You are a helpful assistant.",
    stop_words=["<|im_end|>"],
    replace_eos=True,
+    replace_jinja_template=False,
    mm_plugin=get_mm_plugin(name="qwen2_vl", image_token="<|image_pad|>", video_token="<|video_pad|>"),
 )

--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@ -798,6 +798,29 @@ register_model_group(
 )


+register_model_group(
+    models={
+        "LLaMA3.2-1B": {
+            DownloadSource.DEFAULT: "meta-llama/Llama-3.2-1B",
+            DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-1B",
+        },
+        "LLaMA3.2-3B": {
+            DownloadSource.DEFAULT: "meta-llama/Llama-3.2-3B",
+            DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-3B",
+        },
+        "LLaMA3.2-1B-Instruct": {
+            DownloadSource.DEFAULT: "meta-llama/Llama-3.2-1B-Instruct",
+            DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-1B-Instruct",
+        },
+        "LLaMA3.2-3B-Instruct": {
+            DownloadSource.DEFAULT: "meta-llama/Llama-3.2-3B-Instruct",
+            DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-3B-Instruct",
+        },
+    },
+    template="llama3",
+)
+
+
 register_model_group(
    models={
        "LLaVA1.5-7B-Chat": {
--- a/tests/data/test_template.py
+++ b/tests/data/test_template.py
@ -19,6 +19,7 @@ import pytest
 from transformers import AutoTokenizer

 from llamafactory.data import get_template_and_fix_tokenizer
+from llamafactory.data.template import _get_jinja_template
 from llamafactory.hparams import DataArguments


@ -117,7 +118,8 @@ def test_encode_multiturn(use_fast: bool):
 def test_jinja_template(use_fast: bool):
    tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast)
    ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast)
-    get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
+    template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
+    tokenizer.chat_template = _get_jinja_template(template, tokenizer)  # llama3 template no replace
    assert tokenizer.chat_template != ref_tokenizer.chat_template
    assert tokenizer.apply_chat_template(MESSAGES) == ref_tokenizer.apply_chat_template(MESSAGES)