mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-23 06:12:50 +08:00
Merge branches 'pixtral-patch' and 'pixtral-patch' of https://github.com/Kuangdd01/LLaMA-Factory-X into pixtral-patch
Former-commit-id: 5e64b0c37165a50296036a6e09e09193fb2ad644
This commit is contained in:
commit
35e44143fd
@ -26,7 +26,7 @@ https://github.com/user-attachments/assets/7c96b465-9df7-45f4-8053-bf03e58386d3
|
||||
Choose your path:
|
||||
|
||||
- **Colab**: https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing
|
||||
- **PAI-DSW**: https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory
|
||||
- **PAI-DSW**: [Llama3 Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl)
|
||||
- **Local machine**: Please refer to [usage](#getting-started)
|
||||
- **Documentation (WIP)**: https://llamafactory.readthedocs.io/zh-cn/latest/
|
||||
|
||||
@ -175,7 +175,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
||||
| [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 |
|
||||
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
||||
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
|
||||
| [Llama 3/Llama 3.1](https://huggingface.co/meta-llama) | 8B/70B | llama3 |
|
||||
| [Llama 3/Llama 3.1/Llama3.2](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 |
|
||||
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava |
|
||||
| [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 |
|
||||
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
||||
@ -183,8 +183,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
||||
| [PaliGemma](https://huggingface.co/google) | 3B | paligemma |
|
||||
| [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - |
|
||||
| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi |
|
||||
| [Qwen/Qwen1.5/Qwen2 (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/4B/7B/14B/32B/72B/110B | qwen |
|
||||
| [Qwen2.5 (Code/Math)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B | qwen |
|
||||
| [Qwen (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
|
||||
| [Qwen2-VL](https://huggingface.co/Qwen) | 2B/7B/72B | qwen2_vl |
|
||||
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
||||
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse |
|
||||
|
@ -26,7 +26,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
|
||||
选择你的打开方式:
|
||||
|
||||
- **Colab**:https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing
|
||||
- **PAI-DSW**:https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory
|
||||
- **PAI-DSW**:[Llama3 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl)
|
||||
- **本地机器**:请见[如何使用](#如何使用)
|
||||
- **入门教程**:https://zhuanlan.zhihu.com/p/695287607
|
||||
- **框架文档**:https://llamafactory.readthedocs.io/zh-cn/latest/
|
||||
@ -176,7 +176,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
|
||||
| [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 |
|
||||
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
|
||||
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
|
||||
| [Llama 3/Llama 3.1](https://huggingface.co/meta-llama) | 8B/70B | llama3 |
|
||||
| [Llama 3/Llama 3.1/Llama3.2](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 |
|
||||
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava |
|
||||
| [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 |
|
||||
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
||||
@ -184,8 +184,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
|
||||
| [PaliGemma](https://huggingface.co/google) | 3B | paligemma |
|
||||
| [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - |
|
||||
| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi |
|
||||
| [Qwen/Qwen1.5/Qwen2 (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/4B/7B/14B/32B/72B/110B | qwen |
|
||||
| [Qwen2.5 (Code/Math)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B | qwen |
|
||||
| [Qwen (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
|
||||
| [Qwen2-VL](https://huggingface.co/Qwen) | 2B/7B/72B | qwen2_vl |
|
||||
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
|
||||
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse |
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 145 KiB After Width: | Height: | Size: 164 KiB |
Binary file not shown.
Before Width: | Height: | Size: 149 KiB After Width: | Height: | Size: 167 KiB |
@ -12,6 +12,9 @@ ARG INSTALL_BNB=false
|
||||
ARG INSTALL_VLLM=false
|
||||
ARG INSTALL_DEEPSPEED=false
|
||||
ARG INSTALL_FLASHATTN=false
|
||||
ARG INSTALL_LIGER_KERNEL=false
|
||||
ARG INSTALL_HQQ=false
|
||||
ARG INSTALL_EETQ=false
|
||||
ARG PIP_INDEX=https://pypi.org/simple
|
||||
|
||||
# Set the working directory
|
||||
@ -38,6 +41,15 @@ RUN EXTRA_PACKAGES="metrics"; \
|
||||
if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
|
||||
EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
|
||||
fi; \
|
||||
if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
|
||||
EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
|
||||
fi; \
|
||||
if [ "$INSTALL_HQQ" == "true" ]; then \
|
||||
EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
|
||||
fi; \
|
||||
if [ "$INSTALL_EETQ" == "true" ]; then \
|
||||
EXTRA_PACKAGES="${EXTRA_PACKAGES},eetq"; \
|
||||
fi; \
|
||||
pip install -e ".[$EXTRA_PACKAGES]"
|
||||
|
||||
# Rebuild flash attention
|
||||
|
@ -8,6 +8,9 @@ services:
|
||||
INSTALL_VLLM: false
|
||||
INSTALL_DEEPSPEED: false
|
||||
INSTALL_FLASHATTN: false
|
||||
INSTALL_LIGER_KERNEL: false
|
||||
INSTALL_HQQ: false
|
||||
INSTALL_EETQ: false
|
||||
PIP_INDEX: https://pypi.org/simple
|
||||
container_name: llamafactory
|
||||
volumes:
|
||||
|
@ -10,6 +10,8 @@ ARG INSTALL_BNB=false
|
||||
ARG INSTALL_VLLM=false
|
||||
ARG INSTALL_DEEPSPEED=false
|
||||
ARG INSTALL_FLASHATTN=false
|
||||
ARG INSTALL_LIGER_KERNEL=false
|
||||
ARG INSTALL_HQQ=false
|
||||
ARG PIP_INDEX=https://pypi.org/simple
|
||||
|
||||
# Set the working directory
|
||||
@ -36,6 +38,12 @@ RUN EXTRA_PACKAGES="metrics"; \
|
||||
if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
|
||||
EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
|
||||
fi; \
|
||||
if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
|
||||
EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
|
||||
fi; \
|
||||
if [ "$INSTALL_HQQ" == "true" ]; then \
|
||||
EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
|
||||
fi; \
|
||||
pip install -e ".[$EXTRA_PACKAGES]"
|
||||
|
||||
# Rebuild flash attention
|
||||
|
@ -8,6 +8,8 @@ services:
|
||||
INSTALL_VLLM: false
|
||||
INSTALL_DEEPSPEED: false
|
||||
INSTALL_FLASHATTN: false
|
||||
INSTALL_LIGER_KERNEL: false
|
||||
INSTALL_HQQ: false
|
||||
PIP_INDEX: https://pypi.org/simple
|
||||
container_name: llamafactory
|
||||
volumes:
|
||||
|
@ -49,6 +49,7 @@ class Template:
|
||||
stop_words: List[str]
|
||||
efficient_eos: bool
|
||||
replace_eos: bool
|
||||
replace_jinja_template: bool
|
||||
mm_plugin: "BasePlugin"
|
||||
|
||||
def encode_oneturn(
|
||||
@ -214,6 +215,7 @@ def _register_template(
|
||||
stop_words: Sequence[str] = [],
|
||||
efficient_eos: bool = False,
|
||||
replace_eos: bool = False,
|
||||
replace_jinja_template: bool = True,
|
||||
mm_plugin: "BasePlugin" = get_mm_plugin(name="base"),
|
||||
) -> None:
|
||||
r"""
|
||||
@ -263,6 +265,7 @@ def _register_template(
|
||||
stop_words=stop_words,
|
||||
efficient_eos=efficient_eos,
|
||||
replace_eos=replace_eos,
|
||||
replace_jinja_template=replace_jinja_template,
|
||||
mm_plugin=mm_plugin,
|
||||
)
|
||||
|
||||
@ -398,10 +401,11 @@ def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args:
|
||||
if num_added_tokens > 0:
|
||||
logger.warning("New tokens have been added, make sure `resize_vocab` is True.")
|
||||
|
||||
try:
|
||||
tokenizer.chat_template = _get_jinja_template(template, tokenizer)
|
||||
except ValueError:
|
||||
logger.info("Cannot add this chat template to tokenizer.")
|
||||
if template.replace_jinja_template:
|
||||
try:
|
||||
tokenizer.chat_template = _get_jinja_template(template, tokenizer)
|
||||
except ValueError:
|
||||
logger.info("Cannot add this chat template to tokenizer.")
|
||||
|
||||
return template
|
||||
|
||||
@ -664,6 +668,7 @@ _register_template(
|
||||
format_separator=EmptyFormatter(slots=["<end_of_turn>\n"]),
|
||||
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
|
||||
efficient_eos=True,
|
||||
replace_jinja_template=False,
|
||||
)
|
||||
|
||||
|
||||
@ -740,6 +745,7 @@ _register_template(
|
||||
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
|
||||
stop_words=["<|eot_id|>"],
|
||||
replace_eos=True,
|
||||
replace_jinja_template=False,
|
||||
)
|
||||
|
||||
|
||||
@ -838,6 +844,7 @@ _register_template(
|
||||
default_system="You are a helpful assistant.",
|
||||
stop_words=["<|im_end|>"],
|
||||
replace_eos=True,
|
||||
replace_jinja_template=False,
|
||||
)
|
||||
|
||||
|
||||
@ -850,6 +857,7 @@ _register_template(
|
||||
default_system="You are a helpful assistant.",
|
||||
stop_words=["<|im_end|>"],
|
||||
replace_eos=True,
|
||||
replace_jinja_template=False,
|
||||
mm_plugin=get_mm_plugin(name="qwen2_vl", image_token="<|image_pad|>", video_token="<|video_pad|>"),
|
||||
)
|
||||
|
||||
|
@ -798,6 +798,29 @@ register_model_group(
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"LLaMA3.2-1B": {
|
||||
DownloadSource.DEFAULT: "meta-llama/Llama-3.2-1B",
|
||||
DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-1B",
|
||||
},
|
||||
"LLaMA3.2-3B": {
|
||||
DownloadSource.DEFAULT: "meta-llama/Llama-3.2-3B",
|
||||
DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-3B",
|
||||
},
|
||||
"LLaMA3.2-1B-Instruct": {
|
||||
DownloadSource.DEFAULT: "meta-llama/Llama-3.2-1B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-1B-Instruct",
|
||||
},
|
||||
"LLaMA3.2-3B-Instruct": {
|
||||
DownloadSource.DEFAULT: "meta-llama/Llama-3.2-3B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "LLM-Research/Llama-3.2-3B-Instruct",
|
||||
},
|
||||
},
|
||||
template="llama3",
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"LLaVA1.5-7B-Chat": {
|
||||
|
@ -19,6 +19,7 @@ import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from llamafactory.data import get_template_and_fix_tokenizer
|
||||
from llamafactory.data.template import _get_jinja_template
|
||||
from llamafactory.hparams import DataArguments
|
||||
|
||||
|
||||
@ -117,7 +118,8 @@ def test_encode_multiturn(use_fast: bool):
|
||||
def test_jinja_template(use_fast: bool):
|
||||
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast)
|
||||
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast)
|
||||
get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
|
||||
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
|
||||
tokenizer.chat_template = _get_jinja_template(template, tokenizer) # llama3 template no replace
|
||||
assert tokenizer.chat_template != ref_tokenizer.chat_template
|
||||
assert tokenizer.apply_chat_template(MESSAGES) == ref_tokenizer.apply_chat_template(MESSAGES)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user