[model] add qwen2 omni 3b (#7945)

2026-03-07 20:26:00 +08:00 · 2025-05-03 16:36:51 +08:00
parent c94518718c
commit 634efb3ac9
7 changed files with 33 additions and 23 deletions
--- a/src/llamafactory/chat/hf_engine.py
+++ b/src/llamafactory/chat/hf_engine.py
@@ -103,8 +103,7 @@ class HuggingfaceEngine(BaseEngine):
        messages = template.mm_plugin.process_messages(
            messages, mm_input_dict["images"], mm_input_dict["videos"], mm_input_dict["audios"], processor
        )
-        # add thought words to avoid skipping thinking
-        paired_messages = messages + [{"role": "assistant", "content": template.add_thought("")}]
+        paired_messages = messages + [{"role": "assistant", "content": ""}]
        system = system or generating_args["default_system"]
        enable_thinking = input_kwargs.pop("enable_thinking", None)
        enable_thinking = enable_thinking if enable_thinking is not None else generating_args["enable_thinking"]
--- a/src/llamafactory/chat/sglang_engine.py
+++ b/src/llamafactory/chat/sglang_engine.py
@@ -146,8 +146,7 @@ class SGLangEngine(BaseEngine):
        messages = self.template.mm_plugin.process_messages(
            messages, images or [], videos or [], audios or [], self.processor
        )
-        # add thought words to avoid skipping thinking
-        paired_messages = messages + [{"role": "assistant", "content": self.template.add_thought("")}]
+        paired_messages = messages + [{"role": "assistant", "content": ""}]
        system = system or self.generating_args["default_system"]
        enable_thinking = input_kwargs.pop("enable_thinking", None)
        enable_thinking = enable_thinking if enable_thinking is not None else self.generating_args["enable_thinking"]
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -123,8 +123,7 @@ class VllmEngine(BaseEngine):
        messages = self.template.mm_plugin.process_messages(
            messages, images or [], videos or [], audios or [], self.processor
        )
-        # add thought words to avoid skipping thinking
-        paired_messages = messages + [{"role": "assistant", "content": self.template.add_thought("")}]
+        paired_messages = messages + [{"role": "assistant", "content": ""}]
        system = system or self.generating_args["default_system"]
        enable_thinking = input_kwargs.pop("enable_thinking", None)
        enable_thinking = enable_thinking if enable_thinking is not None else self.generating_args["enable_thinking"]
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -60,7 +60,7 @@ class Template:
        messages: list[dict[str, str]],
        system: Optional[str] = None,
        tools: Optional[str] = None,
-        enable_thinking: bool = True,
+        enable_thinking: bool = False,
    ) -> tuple[list[int], list[int]]:
        r"""Return a single pair of token ids representing prompt and response respectively."""
        encoded_messages = self._encode(tokenizer, messages, system, tools)
@@ -406,7 +406,7 @@ class ReasoningTemplate(Template):
        messages: list[dict[str, str]],
        system: Optional[str] = None,
        tools: Optional[str] = None,
-        enable_thinking: bool = True,
+        enable_thinking: bool = False,
    ) -> tuple[list[int], list[int]]:
        messages = deepcopy(messages)
        for i in range(len(messages)):
@@ -418,7 +418,7 @@ class ReasoningTemplate(Template):
        for encoded_ids in encoded_messages[:-1]:
            prompt_ids += encoded_ids

-        if not enable_thinking or (
+        if not enable_thinking and (
            messages[-1]["role"] == Role.ASSISTANT
            and self.thought_words[0] not in messages[-1]["content"]
            and self.thought_words[1] not in messages[-1]["content"]
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -2479,6 +2479,14 @@ register_model_group(
            DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B",
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B",
        },
+        "Qwen3-14B-Instruct-AWQ": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3-14B-AWQ",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3-14B-AWQ",
+        },
+        "Qwen3-32B-Instruct-AWQ": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3-32B-AWQ",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3-32B-AWQ",
+        },
    },
    template="qwen3",
 )
@@ -2502,10 +2510,14 @@ register_model_group(

 register_model_group(
    models={
+        "Qwen2.5-Omni-3B": {
+            DownloadSource.DEFAULT: "Qwen/Qwen2.5-Omni-3B",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-Omni-3B",
+        },
        "Qwen2.5-Omni-7B": {
            DownloadSource.DEFAULT: "Qwen/Qwen2.5-Omni-7B",
            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-Omni-7B",
-        }
+        },
    },
    template="qwen2_omni",
    multimodal=True,