diff --git a/README.md b/README.md
index 973aac52..c984cffe 100644
--- a/README.md
+++ b/README.md
@@ -86,7 +86,7 @@ Choose your path:
| Support Date | Model Name |
| ------------ | ---------------------------------------------------------- |
| Day 0 | Qwen2.5 / Qwen2-VL / QwQ / QvQ / InternLM3 / MiniCPM-o-2.6 |
-| Day 1 | Llama 3 / GLM-4 / PaliGemma2 |
+| Day 1 | Llama 3 / GLM-4 / Mistral Small / PaliGemma2 |
## Benchmark
@@ -236,7 +236,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
| [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 |
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_v |
+| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
+| [Mistral Small](https://huggingface.co/mistralai) | 24B | mistral_small |
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
| [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma |
| [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - |
diff --git a/README_zh.md b/README_zh.md
index 079a272a..2157d11b 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -88,7 +88,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
| 适配时间 | 模型名称 |
| ------------ | ---------------------------------------------------------- |
| Day 0 | Qwen2.5 / Qwen2-VL / QwQ / QvQ / InternLM3 / MiniCPM-o-2.6 |
-| Day 1 | Llama 3 / GLM-4 / PaliGemma2 |
+| Day 1 | Llama 3 / GLM-4 / Mistral Small / PaliGemma2 |
## 性能指标
@@ -238,7 +238,9 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
| [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 |
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_v |
+| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
+| [Mistral Small](https://huggingface.co/mistralai) | 24B | mistral_small |
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
| [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma |
| [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - |
diff --git a/requirements.txt b/requirements.txt
index 903aa1c3..d57ed94b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
transformers>=4.41.2,<=4.45.2;python_version<'3.10'
-transformers>=4.41.2,<=4.48.1,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10'
+transformers>=4.41.2,<=4.48.2,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10'
datasets>=2.16.0,<=3.2.0
accelerate>=0.34.0,<=1.2.1
peft>=0.11.1,<=0.12.0
diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py
index 0c3363c6..0b5d8cce 100644
--- a/src/llamafactory/__init__.py
+++ b/src/llamafactory/__init__.py
@@ -20,7 +20,7 @@ Level:
Dependency graph:
main:
- transformers>=4.41.2,<=4.48.1,!=4.46.*,!=4.47.*,!=4.48.0
+ transformers>=4.41.2,<=4.48.2,!=4.46.*,!=4.47.*,!=4.48.0
datasets>=2.16.0,<=3.2.0
accelerate>=0.34.0,<=1.2.1
peft>=0.11.1,<=0.12.0
@@ -30,7 +30,7 @@ Dependency graph:
longlora:
transformers>=4.41.2,<4.48.0
packing:
- transformers>=4.43.0,<=4.48.1
+ transformers>=4.43.0,<=4.48.2
Disable version checking: DISABLE_VERSION_CHECK=1
Enable VRAM recording: RECORD_VRAM=1
diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py
index 57bbb405..7b5e3bbe 100644
--- a/src/llamafactory/chat/hf_engine.py
+++ b/src/llamafactory/chat/hf_engine.py
@@ -183,8 +183,8 @@ class HuggingfaceEngine(BaseEngine):
if getattr(model.config, "model_type", None) in ["minicpmv", "minicpmo"]:
gen_kwargs["input_ids"] = inputs
- del gen_kwargs["image_sizes"]
gen_kwargs["tokenizer"] = tokenizer
+ gen_kwargs.pop("image_sizes", None)
return gen_kwargs, prompt_length
diff --git a/src/llamafactory/data/mm_plugin.py b/src/llamafactory/data/mm_plugin.py
index 00945923..eb8e7e5c 100644
--- a/src/llamafactory/data/mm_plugin.py
+++ b/src/llamafactory/data/mm_plugin.py
@@ -319,7 +319,7 @@ class LlavaNextPlugin(BasePlugin):
if self.expand_mm_tokens:
orig_height, orig_width = next(image_sizes)
image_seqlen = processor._get_number_of_features(orig_height, orig_width, height, width)
- if getattr(processor, "vision_feature_select_strategy") == "default":
+ if getattr(processor, "vision_feature_select_strategy", "default") == "default":
image_seqlen -= 1
else:
image_seqlen = 1
@@ -370,7 +370,7 @@ class LlavaNextVideoPlugin(BasePlugin):
if self.expand_mm_tokens:
orig_height, orig_width = next(image_sizes)
image_seqlen = processor._get_number_of_features(orig_height, orig_width, height, width)
- if getattr(processor, "vision_feature_select_strategy") == "default":
+ if getattr(processor, "vision_feature_select_strategy", "default") == "default":
image_seqlen -= 1
else:
image_seqlen = 1
@@ -915,7 +915,7 @@ class VideoLlavaPlugin(BasePlugin):
image_seqlen = (height // processor.patch_size) * (width // processor.patch_size) + 1
video_seqlen = image_seqlen * num_frames
- if getattr(processor, "vision_feature_select_strategy") == "default":
+ if getattr(processor, "vision_feature_select_strategy", "default") == "default":
image_seqlen -= 1
else:
image_seqlen, video_seqlen = 1, 1
diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 5b775db7..e87643da 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -220,6 +220,7 @@ def _register_template(
replace_eos: bool = False,
replace_jinja_template: bool = False,
mm_plugin: "BasePlugin" = get_mm_plugin(name="base"),
+ fuse_system_into_user: bool = False,
) -> None:
r"""
Registers a chat template.
@@ -242,7 +243,7 @@ def _register_template(
)
```
"""
- template_class = Llama2Template if any(k in name for k in ("llama2", "mistral", "pixtral")) else Template
+ template_class = Llama2Template if fuse_system_into_user else Template
default_slots = ["{{content}}"] if efficient_eos else ["{{content}}", {"eos_token"}]
default_user_formatter = StringFormatter(slots=["{{content}}"])
default_assistant_formatter = StringFormatter(slots=default_slots)
@@ -751,6 +752,7 @@ _register_template(
name="llama2",
format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]),
format_system=StringFormatter(slots=["<>\n{{content}}\n<>\n\n"]),
+ fuse_system_into_user=True,
)
@@ -760,6 +762,7 @@ _register_template(
format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]),
format_system=StringFormatter(slots=["<>\n{{content}}\n<>\n\n"]),
default_system="You are a helpful assistant. 你是一个乐于助人的助手。",
+ fuse_system_into_user=True,
)
@@ -878,11 +881,12 @@ _register_template(
format_user=StringFormatter(slots=["[INST] {{content}}[/INST]"]),
format_assistant=StringFormatter(slots=[" {{content}}", {"eos_token"}]),
format_system=StringFormatter(slots=["{{content}}\n\n"]),
- format_function=FunctionFormatter(slots=["[TOOL_CALLS] ", "{{content}}", {"eos_token"}], tool_format="mistral"),
+ format_function=FunctionFormatter(slots=["[TOOL_CALLS] {{content}}", {"eos_token"}], tool_format="mistral"),
format_observation=StringFormatter(slots=["""[TOOL_RESULTS] {"content": {{content}}}[/TOOL_RESULTS]"""]),
format_tools=ToolFormatter(tool_format="mistral"),
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
mm_plugin=get_mm_plugin(name="llava_next", image_token=""),
+ fuse_system_into_user=True,
)
@@ -932,11 +936,12 @@ _register_template(
format_user=StringFormatter(slots=["[INST] {{content}}[/INST]"]),
format_assistant=StringFormatter(slots=[" {{content}}", {"eos_token"}]),
format_system=StringFormatter(slots=["{{content}}\n\n"]),
- format_function=FunctionFormatter(slots=["[TOOL_CALLS] ", "{{content}}", {"eos_token"}], tool_format="mistral"),
+ format_function=FunctionFormatter(slots=["[TOOL_CALLS] {{content}}", {"eos_token"}], tool_format="mistral"),
format_observation=StringFormatter(slots=["""[TOOL_RESULTS] {"content": {{content}}}[/TOOL_RESULTS]"""]),
format_tools=ToolFormatter(tool_format="mistral"),
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
mm_plugin=get_mm_plugin(name="llava_next_video", image_token="", video_token="