From ce7032e1b39006a0b79e002a18b5b993763afa20 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 3 May 2025 16:36:51 +0800 Subject: [PATCH] [model] add qwen2 omni 3b (#7945) --- README.md | 24 ++++++++++++------------ README_zh.md | 3 ++- src/llamafactory/chat/hf_engine.py | 3 +-- src/llamafactory/chat/sglang_engine.py | 3 +-- src/llamafactory/chat/vllm_engine.py | 3 +-- src/llamafactory/data/template.py | 6 +++--- src/llamafactory/extras/constants.py | 14 +++++++++++++- 7 files changed, 33 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 76b8b003..73c8b724 100644 --- a/README.md +++ b/README.md @@ -18,26 +18,26 @@ [![Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board) [![SageMaker](https://img.shields.io/badge/SageMaker-Open%20in%20AWS-blue)](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/) -
- - -
### Supporters ❤️ -
- - Warp sponsorship - -### [Warp, the agentic terminal for developers](https://warp.dev/llama-factory) -[Available for MacOS, Linux, & Windows](https://warp.dev/llama-factory)
+ + Warp sponsorship + + +#### [Warp, the agentic terminal for developers](https://warp.dev/llama-factory) + +[Available for MacOS, Linux, & Windows](https://warp.dev/llama-factory)
-
+ +---- +

Easily fine-tune 100+ large language models with zero-code CLI and Web UI

+

Github trend @@ -298,7 +298,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen3 (MoE)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3 | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | -| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\* | 7B | qwen2_omni | +| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\* | 3B/7B | qwen2_omni | | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | diff --git a/README_zh.md b/README_zh.md index a3914f7c..f02e27b9 100644 --- a/README_zh.md +++ b/README_zh.md @@ -21,6 +21,7 @@

使用零代码命令行Web UI 轻松微调百余种大模型

+

Github trend @@ -284,7 +285,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen3 (MoE)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3 | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | -| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\* | 7B | qwen2_omni | +| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\* | 3B/7B | qwen2_omni | | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py index 8fb08dee..b2c03c60 100644 --- a/src/llamafactory/chat/hf_engine.py +++ b/src/llamafactory/chat/hf_engine.py @@ -103,8 +103,7 @@ class HuggingfaceEngine(BaseEngine): messages = template.mm_plugin.process_messages( messages, mm_input_dict["images"], mm_input_dict["videos"], mm_input_dict["audios"], processor ) - # add thought words to avoid skipping thinking - paired_messages = messages + [{"role": "assistant", "content": template.add_thought("")}] + paired_messages = messages + [{"role": "assistant", "content": ""}] system = system or generating_args["default_system"] enable_thinking = input_kwargs.pop("enable_thinking", None) enable_thinking = enable_thinking if enable_thinking is not None else generating_args["enable_thinking"] diff --git a/src/llamafactory/chat/sglang_engine.py b/src/llamafactory/chat/sglang_engine.py index 7af561ca..24d60604 100644 --- a/src/llamafactory/chat/sglang_engine.py +++ b/src/llamafactory/chat/sglang_engine.py @@ -146,8 +146,7 @@ class SGLangEngine(BaseEngine): messages = self.template.mm_plugin.process_messages( messages, images or [], videos or [], audios or [], self.processor ) - # add thought words to avoid skipping thinking - paired_messages = messages + [{"role": "assistant", "content": self.template.add_thought("")}] + paired_messages = messages + [{"role": "assistant", "content": ""}] system = system or self.generating_args["default_system"] enable_thinking = input_kwargs.pop("enable_thinking", None) enable_thinking = enable_thinking if enable_thinking is not None else self.generating_args["enable_thinking"] diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index dde67dd6..9110ae05 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -123,8 +123,7 @@ class VllmEngine(BaseEngine): messages = self.template.mm_plugin.process_messages( messages, images or [], videos or [], audios or [], self.processor ) - # add thought words to avoid skipping thinking - paired_messages = messages + [{"role": "assistant", "content": self.template.add_thought("")}] + paired_messages = messages + [{"role": "assistant", "content": ""}] system = system or self.generating_args["default_system"] enable_thinking = input_kwargs.pop("enable_thinking", None) enable_thinking = enable_thinking if enable_thinking is not None else self.generating_args["enable_thinking"] diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 5aa1f1be..a5c5eb2b 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -60,7 +60,7 @@ class Template: messages: list[dict[str, str]], system: Optional[str] = None, tools: Optional[str] = None, - enable_thinking: bool = True, + enable_thinking: bool = False, ) -> tuple[list[int], list[int]]: r"""Return a single pair of token ids representing prompt and response respectively.""" encoded_messages = self._encode(tokenizer, messages, system, tools) @@ -406,7 +406,7 @@ class ReasoningTemplate(Template): messages: list[dict[str, str]], system: Optional[str] = None, tools: Optional[str] = None, - enable_thinking: bool = True, + enable_thinking: bool = False, ) -> tuple[list[int], list[int]]: messages = deepcopy(messages) for i in range(len(messages)): @@ -418,7 +418,7 @@ class ReasoningTemplate(Template): for encoded_ids in encoded_messages[:-1]: prompt_ids += encoded_ids - if not enable_thinking or ( + if not enable_thinking and ( messages[-1]["role"] == Role.ASSISTANT and self.thought_words[0] not in messages[-1]["content"] and self.thought_words[1] not in messages[-1]["content"] diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 4e4d0760..c3bb2e22 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -2479,6 +2479,14 @@ register_model_group( DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B", DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B", }, + "Qwen3-14B-Instruct-AWQ": { + DownloadSource.DEFAULT: "Qwen/Qwen3-14B-AWQ", + DownloadSource.MODELSCOPE: "Qwen/Qwen3-14B-AWQ", + }, + "Qwen3-32B-Instruct-AWQ": { + DownloadSource.DEFAULT: "Qwen/Qwen3-32B-AWQ", + DownloadSource.MODELSCOPE: "Qwen/Qwen3-32B-AWQ", + }, }, template="qwen3", ) @@ -2502,10 +2510,14 @@ register_model_group( register_model_group( models={ + "Qwen2.5-Omni-3B": { + DownloadSource.DEFAULT: "Qwen/Qwen2.5-Omni-3B", + DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-Omni-3B", + }, "Qwen2.5-Omni-7B": { DownloadSource.DEFAULT: "Qwen/Qwen2.5-Omni-7B", DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-Omni-7B", - } + }, }, template="qwen2_omni", multimodal=True,