From ce7032e1b39006a0b79e002a18b5b993763afa20 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Sat, 3 May 2025 16:36:51 +0800
Subject: [PATCH] [model] add qwen2 omni 3b (#7945)

---
 README.md                              | 24 ++++++++++++------------
 README_zh.md                           |  3 ++-
 src/llamafactory/chat/hf_engine.py     |  3 +--
 src/llamafactory/chat/sglang_engine.py |  3 +--
 src/llamafactory/chat/vllm_engine.py   |  3 +--
 src/llamafactory/data/template.py      |  6 +++---
 src/llamafactory/extras/constants.py   | 14 +++++++++++++-
 7 files changed, 33 insertions(+), 23 deletions(-)
diff --git a/README.md b/README.md
index 76b8b003..73c8b724 100644
--- a/README.md
+++ b/README.md
@@ -18,26 +18,26 @@
 [![Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
 [![SageMaker](https://img.shields.io/badge/SageMaker-Open%20in%20AWS-blue)](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)
 
-<hr>
-
-
-
 <div align="center" markdown="1">
 
 ### Supporters ❤️
-   <br>
-   <a href="https://warp.dev/llama-factory">
-      <img alt="Warp sponsorship" width="400" src="https://github.com/user-attachments/assets/ab8dd143-b0fd-4904-bdc5-dd7ecac94eae">
-   </a>
 
-### [Warp, the agentic terminal for developers](https://warp.dev/llama-factory)
-[Available for MacOS, Linux, & Windows](https://warp.dev/llama-factory)<br>
+<a href="https://warp.dev/llama-factory">
+    <img alt="Warp sponsorship" width="400" src="https://github.com/user-attachments/assets/ab8dd143-b0fd-4904-bdc5-dd7ecac94eae">
+</a>
+
+#### [Warp, the agentic terminal for developers](https://warp.dev/llama-factory)
+
+[Available for MacOS, Linux, & Windows](https://warp.dev/llama-factory)
 
 </div>
-<hr>
+
+----
+
 <h3 align="center">
     Easily fine-tune 100+ large language models with zero-code <a href="#quickstart">CLI</a> and <a href="#fine-tuning-with-llama-board-gui-powered-by-gradio">Web UI</a>
 </h3>
+
 <p align="center">
     <picture>
         <img alt="Github trend" src="https://trendshift.io/api/badge/repositories/4535">
@@ -298,7 +298,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen)   | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                |
 | [Qwen3 (MoE)](https://huggingface.co/Qwen)                        | 0.6B/1.7B/4B/8B/14B/32B/235B     | qwen3               |
 | [Qwen2-Audio](https://huggingface.co/Qwen)                        | 7B                               | qwen2_audio         |
-| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\*                   | 7B                               | qwen2_omni          |
+| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\*                   | 3B/7B                            | qwen2_omni          |
 | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen)            | 2B/3B/7B/32B/72B                 | qwen2_vl            |
 | [Skywork o1](https://huggingface.co/Skywork)                      | 8B                               | skywork_o1          |
 | [StarCoder 2](https://huggingface.co/bigcode)                     | 3B/7B/15B                        | -                   |
diff --git a/README_zh.md b/README_zh.md
index a3914f7c..f02e27b9 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -21,6 +21,7 @@
 <h3 align="center">
     使用零代码<a href="#快速开始">命令行</a>与 <a href="#llama-board-可视化微调由-gradio-驱动">Web UI</a> 轻松微调百余种大模型
 </h3>
+
 <p align="center">
     <picture>
         <img alt="Github trend" src="https://trendshift.io/api/badge/repositories/4535">
@@ -284,7 +285,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen)   | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                |
 | [Qwen3 (MoE)](https://huggingface.co/Qwen)                        | 0.6B/1.7B/4B/8B/14B/32B/235B     | qwen3               |
 | [Qwen2-Audio](https://huggingface.co/Qwen)                        | 7B                               | qwen2_audio         |
-| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\*                   | 7B                               | qwen2_omni          |
+| [Qwen2.5-Omni](https://huggingface.co/Qwen)\*\*                   | 3B/7B                            | qwen2_omni          |
 | [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen)            | 2B/3B/7B/32B/72B                 | qwen2_vl            |
 | [Skywork o1](https://huggingface.co/Skywork)                      | 8B                               | skywork_o1          |
 | [StarCoder 2](https://huggingface.co/bigcode)                     | 3B/7B/15B                        | -                   |
diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py
index 8fb08dee..b2c03c60 100644
--- a/src/llamafactory/chat/hf_engine.py
+++ b/src/llamafactory/chat/hf_engine.py
@@ -103,8 +103,7 @@ class HuggingfaceEngine(BaseEngine):
         messages = template.mm_plugin.process_messages(
             messages, mm_input_dict["images"], mm_input_dict["videos"], mm_input_dict["audios"], processor
         )
-        # add thought words to avoid skipping thinking
-        paired_messages = messages + [{"role": "assistant", "content": template.add_thought("")}]
+        paired_messages = messages + [{"role": "assistant", "content": ""}]
         system = system or generating_args["default_system"]
         enable_thinking = input_kwargs.pop("enable_thinking", None)
         enable_thinking = enable_thinking if enable_thinking is not None else generating_args["enable_thinking"]
diff --git a/src/llamafactory/chat/sglang_engine.py b/src/llamafactory/chat/sglang_engine.py
index 7af561ca..24d60604 100644
--- a/src/llamafactory/chat/sglang_engine.py
+++ b/src/llamafactory/chat/sglang_engine.py
@@ -146,8 +146,7 @@ class SGLangEngine(BaseEngine):
         messages = self.template.mm_plugin.process_messages(
             messages, images or [], videos or [], audios or [], self.processor
         )
-        # add thought words to avoid skipping thinking
-        paired_messages = messages + [{"role": "assistant", "content": self.template.add_thought("")}]
+        paired_messages = messages + [{"role": "assistant", "content": ""}]
         system = system or self.generating_args["default_system"]
         enable_thinking = input_kwargs.pop("enable_thinking", None)
         enable_thinking = enable_thinking if enable_thinking is not None else self.generating_args["enable_thinking"]
diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py
index dde67dd6..9110ae05 100644
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -123,8 +123,7 @@ class VllmEngine(BaseEngine):
         messages = self.template.mm_plugin.process_messages(
             messages, images or [], videos or [], audios or [], self.processor
         )
-        # add thought words to avoid skipping thinking
-        paired_messages = messages + [{"role": "assistant", "content": self.template.add_thought("")}]
+        paired_messages = messages + [{"role": "assistant", "content": ""}]
         system = system or self.generating_args["default_system"]
         enable_thinking = input_kwargs.pop("enable_thinking", None)
         enable_thinking = enable_thinking if enable_thinking is not None else self.generating_args["enable_thinking"]
diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 5aa1f1be..a5c5eb2b 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -60,7 +60,7 @@ class Template:
         messages: list[dict[str, str]],
         system: Optional[str] = None,
         tools: Optional[str] = None,
-        enable_thinking: bool = True,
+        enable_thinking: bool = False,
     ) -> tuple[list[int], list[int]]:
         r"""Return a single pair of token ids representing prompt and response respectively."""
         encoded_messages = self._encode(tokenizer, messages, system, tools)
@@ -406,7 +406,7 @@ class ReasoningTemplate(Template):
         messages: list[dict[str, str]],
         system: Optional[str] = None,
         tools: Optional[str] = None,
-        enable_thinking: bool = True,
+        enable_thinking: bool = False,
     ) -> tuple[list[int], list[int]]:
         messages = deepcopy(messages)
         for i in range(len(messages)):
@@ -418,7 +418,7 @@ class ReasoningTemplate(Template):
         for encoded_ids in encoded_messages[:-1]:
             prompt_ids += encoded_ids
 
-        if not enable_thinking or (
+        if not enable_thinking and (
             messages[-1]["role"] == Role.ASSISTANT
             and self.thought_words[0] not in messages[-1]["content"]
             and self.thought_words[1] not in messages[-1]["content"]
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 4e4d0760..c3bb2e22 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -2479,6 +2479,14 @@ register_model_group(
             DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B",
             DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B",
         },
+        "Qwen3-14B-Instruct-AWQ": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3-14B-AWQ",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3-14B-AWQ",
+        },
+        "Qwen3-32B-Instruct-AWQ": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3-32B-AWQ",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3-32B-AWQ",
+        },
     },
     template="qwen3",
 )
@@ -2502,10 +2510,14 @@ register_model_group(
 
 register_model_group(
     models={
+        "Qwen2.5-Omni-3B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2.5-Omni-3B",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-Omni-3B",
+        },
         "Qwen2.5-Omni-7B": {
             DownloadSource.DEFAULT: "Qwen/Qwen2.5-Omni-7B",
             DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-Omni-7B",
-        }
+        },
     },
     template="qwen2_omni",
     multimodal=True,