diff --git a/README.md b/README.md index 973aac52..c984cffe 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ Choose your path: | Support Date | Model Name | | ------------ | ---------------------------------------------------------- | | Day 0 | Qwen2.5 / Qwen2-VL / QwQ / QvQ / InternLM3 / MiniCPM-o-2.6 | -| Day 1 | Llama 3 / GLM-4 / PaliGemma2 | +| Day 1 | Llama 3 / GLM-4 / Mistral Small / PaliGemma2 | ## Benchmark @@ -236,7 +236,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video | | [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 | | [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_v | +| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral | | [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | +| [Mistral Small](https://huggingface.co/mistralai) | 24B | mistral_small | | [OLMo](https://huggingface.co/allenai) | 1B/7B | - | | [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma | | [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | diff --git a/README_zh.md b/README_zh.md index 079a272a..2157d11b 100644 --- a/README_zh.md +++ b/README_zh.md @@ -88,7 +88,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | 适配时间 | 模型名称 | | ------------ | ---------------------------------------------------------- | | Day 0 | Qwen2.5 / Qwen2-VL / QwQ / QvQ / InternLM3 / MiniCPM-o-2.6 | -| Day 1 | Llama 3 / GLM-4 / PaliGemma2 | +| Day 1 | Llama 3 / GLM-4 / Mistral Small / PaliGemma2 | ## 性能指标 @@ -238,7 +238,9 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video | | [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 | | [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_v | +| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral | | [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | +| [Mistral Small](https://huggingface.co/mistralai) | 24B | mistral_small | | [OLMo](https://huggingface.co/allenai) | 1B/7B | - | | [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma | | [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | diff --git a/requirements.txt b/requirements.txt index 903aa1c3..d57ed94b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ transformers>=4.41.2,<=4.45.2;python_version<'3.10' -transformers>=4.41.2,<=4.48.1,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10' +transformers>=4.41.2,<=4.48.2,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10' datasets>=2.16.0,<=3.2.0 accelerate>=0.34.0,<=1.2.1 peft>=0.11.1,<=0.12.0 diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index 0c3363c6..0b5d8cce 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -20,7 +20,7 @@ Level: Dependency graph: main: - transformers>=4.41.2,<=4.48.1,!=4.46.*,!=4.47.*,!=4.48.0 + transformers>=4.41.2,<=4.48.2,!=4.46.*,!=4.47.*,!=4.48.0 datasets>=2.16.0,<=3.2.0 accelerate>=0.34.0,<=1.2.1 peft>=0.11.1,<=0.12.0 @@ -30,7 +30,7 @@ Dependency graph: longlora: transformers>=4.41.2,<4.48.0 packing: - transformers>=4.43.0,<=4.48.1 + transformers>=4.43.0,<=4.48.2 Disable version checking: DISABLE_VERSION_CHECK=1 Enable VRAM recording: RECORD_VRAM=1 diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py index 57bbb405..7b5e3bbe 100644 --- a/src/llamafactory/chat/hf_engine.py +++ b/src/llamafactory/chat/hf_engine.py @@ -183,8 +183,8 @@ class HuggingfaceEngine(BaseEngine): if getattr(model.config, "model_type", None) in ["minicpmv", "minicpmo"]: gen_kwargs["input_ids"] = inputs - del gen_kwargs["image_sizes"] gen_kwargs["tokenizer"] = tokenizer + gen_kwargs.pop("image_sizes", None) return gen_kwargs, prompt_length diff --git a/src/llamafactory/data/mm_plugin.py b/src/llamafactory/data/mm_plugin.py index 00945923..eb8e7e5c 100644 --- a/src/llamafactory/data/mm_plugin.py +++ b/src/llamafactory/data/mm_plugin.py @@ -319,7 +319,7 @@ class LlavaNextPlugin(BasePlugin): if self.expand_mm_tokens: orig_height, orig_width = next(image_sizes) image_seqlen = processor._get_number_of_features(orig_height, orig_width, height, width) - if getattr(processor, "vision_feature_select_strategy") == "default": + if getattr(processor, "vision_feature_select_strategy", "default") == "default": image_seqlen -= 1 else: image_seqlen = 1 @@ -370,7 +370,7 @@ class LlavaNextVideoPlugin(BasePlugin): if self.expand_mm_tokens: orig_height, orig_width = next(image_sizes) image_seqlen = processor._get_number_of_features(orig_height, orig_width, height, width) - if getattr(processor, "vision_feature_select_strategy") == "default": + if getattr(processor, "vision_feature_select_strategy", "default") == "default": image_seqlen -= 1 else: image_seqlen = 1 @@ -915,7 +915,7 @@ class VideoLlavaPlugin(BasePlugin): image_seqlen = (height // processor.patch_size) * (width // processor.patch_size) + 1 video_seqlen = image_seqlen * num_frames - if getattr(processor, "vision_feature_select_strategy") == "default": + if getattr(processor, "vision_feature_select_strategy", "default") == "default": image_seqlen -= 1 else: image_seqlen, video_seqlen = 1, 1 diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 5b775db7..e87643da 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -220,6 +220,7 @@ def _register_template( replace_eos: bool = False, replace_jinja_template: bool = False, mm_plugin: "BasePlugin" = get_mm_plugin(name="base"), + fuse_system_into_user: bool = False, ) -> None: r""" Registers a chat template. @@ -242,7 +243,7 @@ def _register_template( ) ``` """ - template_class = Llama2Template if any(k in name for k in ("llama2", "mistral", "pixtral")) else Template + template_class = Llama2Template if fuse_system_into_user else Template default_slots = ["{{content}}"] if efficient_eos else ["{{content}}", {"eos_token"}] default_user_formatter = StringFormatter(slots=["{{content}}"]) default_assistant_formatter = StringFormatter(slots=default_slots) @@ -751,6 +752,7 @@ _register_template( name="llama2", format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]), format_system=StringFormatter(slots=["<>\n{{content}}\n<>\n\n"]), + fuse_system_into_user=True, ) @@ -760,6 +762,7 @@ _register_template( format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]), format_system=StringFormatter(slots=["<>\n{{content}}\n<>\n\n"]), default_system="You are a helpful assistant. 你是一个乐于助人的助手。", + fuse_system_into_user=True, ) @@ -878,11 +881,12 @@ _register_template( format_user=StringFormatter(slots=["[INST] {{content}}[/INST]"]), format_assistant=StringFormatter(slots=[" {{content}}", {"eos_token"}]), format_system=StringFormatter(slots=["{{content}}\n\n"]), - format_function=FunctionFormatter(slots=["[TOOL_CALLS] ", "{{content}}", {"eos_token"}], tool_format="mistral"), + format_function=FunctionFormatter(slots=["[TOOL_CALLS] {{content}}", {"eos_token"}], tool_format="mistral"), format_observation=StringFormatter(slots=["""[TOOL_RESULTS] {"content": {{content}}}[/TOOL_RESULTS]"""]), format_tools=ToolFormatter(tool_format="mistral"), format_prefix=EmptyFormatter(slots=[{"bos_token"}]), mm_plugin=get_mm_plugin(name="llava_next", image_token=""), + fuse_system_into_user=True, ) @@ -932,11 +936,12 @@ _register_template( format_user=StringFormatter(slots=["[INST] {{content}}[/INST]"]), format_assistant=StringFormatter(slots=[" {{content}}", {"eos_token"}]), format_system=StringFormatter(slots=["{{content}}\n\n"]), - format_function=FunctionFormatter(slots=["[TOOL_CALLS] ", "{{content}}", {"eos_token"}], tool_format="mistral"), + format_function=FunctionFormatter(slots=["[TOOL_CALLS] {{content}}", {"eos_token"}], tool_format="mistral"), format_observation=StringFormatter(slots=["""[TOOL_RESULTS] {"content": {{content}}}[/TOOL_RESULTS]"""]), format_tools=ToolFormatter(tool_format="mistral"), format_prefix=EmptyFormatter(slots=[{"bos_token"}]), mm_plugin=get_mm_plugin(name="llava_next_video", image_token="", video_token="