[misc] fix import error (#9296 )

[model] support hunyuan-mt model (#9284 )
Co-authored-by: wyfdgg <liwenkun0812@163.com> Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
2025-12-28 01:30:36 +08:00 · 2025-10-17 10:54:30 +08:00 · 2025-10-17 10:33:09 +08:00
5 changed files with 14 additions and 8 deletions
--- a/README.md
+++ b/README.md
@@ -283,7 +283,7 @@ Choose your path:
 | [GPT-OSS](https://huggingface.co/openai)                          | 20B/120B                         | gpt                  |
 | [Granite 3.0-3.3](https://huggingface.co/ibm-granite)             | 1B/2B/3B/8B                      | granite3             |
 | [Granite 4](https://huggingface.co/ibm-granite)                   | 7B                               | granite4             |
-| [Hunyuan](https://huggingface.co/tencent/)                        | 7B                               | hunyuan              |
+| [Hunyuan (MT)](https://huggingface.co/tencent/)                   | 7B                               | hunyuan              |
 | [Index](https://huggingface.co/IndexTeam)                         | 1.9B                             | index                |
 | [InternLM 2-3](https://huggingface.co/internlm)                   | 7B/8B/20B                        | intern2              |
 | [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab)              | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl            |
--- a/README_zh.md
+++ b/README_zh.md
@@ -285,7 +285,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | [GPT-OSS](https://huggingface.co/openai)                          | 20B/120B                         | gpt                  |
 | [Granite 3.0-3.3](https://huggingface.co/ibm-granite)             | 1B/2B/3B/8B                      | granite3             |
 | [Granite 4](https://huggingface.co/ibm-granite)                   | 7B                               | granite4             |
-| [Hunyuan](https://huggingface.co/tencent/)                        | 7B                               | hunyuan              |
+| [Hunyuan (MT)](https://huggingface.co/tencent/)                   | 7B                               | hunyuan              |
 | [Index](https://huggingface.co/IndexTeam)                         | 1.9B                             | index                |
 | [InternLM 2-3](https://huggingface.co/internlm)                   | 7B/8B/20B                        | intern2              |
 | [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab)              | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl            |
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -1201,10 +1201,10 @@ register_template(

 register_template(
    name="hunyuan",
-    format_user=StringFormatter(slots=["<|bos|>user\n{{content}}<|eos|>\n<|bos|>assistant\n"]),
-    format_assistant=StringFormatter(slots=["{{content}}<|eos|>\n"]),
-    format_system=StringFormatter(slots=["<|bos|>system\n{{content}}<|eos|>\n"]),
-    format_prefix=EmptyFormatter(slots=["<|bos|>"]),
+    format_user=StringFormatter(slots=["{{content}}<|extra_0|>"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|eos|>"]),
+    format_system=StringFormatter(slots=["{{content}}<|extra_4|>"]),
+    format_prefix=EmptyFormatter(slots=["<|startoftext|>"]),
    stop_words=["<|eos|>"],
 )

--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -1152,6 +1152,10 @@ register_model_group(
            DownloadSource.DEFAULT: "tencent/Hunyuan-7B-Instruct",
            DownloadSource.MODELSCOPE: "AI-ModelScope/Hunyuan-7B-Instruct",
        },
+        "Hunyuan-MT-7B-Instruct": {
+            DownloadSource.DEFAULT: "tencent/Hunyuan-MT-7B",
+            DownloadSource.MODELSCOPE: "Tencent-Hunyuan/Hunyuan-MT-7B",
+        },
    },
    template="hunyuan",
 )
--- a/src/llamafactory/model/model_utils/attention.py
+++ b/src/llamafactory/model/model_utils/attention.py
@@ -14,8 +14,6 @@

 from typing import TYPE_CHECKING

-from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
-
 from ...extras import logging
 from ...extras.constants import AttentionFunction

@@ -30,6 +28,8 @@ logger = logging.get_logger(__name__)


 def configure_attn_implementation(config: "PretrainedConfig", model_args: "ModelArguments") -> None:
+    from transformers.utils import is_flash_attn_2_available
+
    if getattr(config, "model_type", None) == "gemma2":
        if model_args.flash_attn == AttentionFunction.AUTO or model_args.flash_attn == AttentionFunction.FA2:
            if is_flash_attn_2_available():
@@ -51,6 +51,8 @@ def configure_attn_implementation(config: "PretrainedConfig", model_args: "Model
        requested_attn_implementation = "eager"

    elif model_args.flash_attn == AttentionFunction.SDPA:
+        from transformers.utils import is_torch_sdpa_available
+
        if not is_torch_sdpa_available():
            logger.warning_rank0("torch>=2.1.1 is required for SDPA attention.")
            return
Author	SHA1	Message	Date
Yaowei Zheng	a442fa90ad	[misc] fix import error (#9296 )	2025-10-17 10:54:30 +08:00
wyfdgg	8c341cbaae	[model] support hunyuan-mt model (#9284 ) Co-authored-by: wyfdgg <liwenkun0812@163.com> Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>	2025-10-17 10:33:09 +08:00