[misc] fix constants (#9008)

2025-12-28 09:40:34 +08:00 · 2025-08-23 23:04:30 +08:00
parent 652e6e92da
commit 514d6d74fa
8 changed files with 40 additions and 16 deletions
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 [![GitHub contributors](https://img.shields.io/github/contributors/hiyouga/LLaMA-Factory?color=orange)](https://github.com/hiyouga/LLaMA-Factory/graphs/contributors)
 [![GitHub workflow](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml/badge.svg)](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml)
 [![PyPI](https://img.shields.io/pypi/v/llamafactory)](https://pypi.org/project/llamafactory/)
-[![Citation](https://img.shields.io/badge/citation-760-green)](https://scholar.google.com/scholar?cites=12620864006390196564)
+[![Citation](https://img.shields.io/badge/citation-818-green)](https://scholar.google.com/scholar?cites=12620864006390196564)
 [![Docker Pulls](https://img.shields.io/docker/pulls/hiyouga/llamafactory)](https://hub.docker.com/r/hiyouga/llamafactory/tags)
 [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
@@ -124,12 +124,12 @@ Choose your path:
 [25/08/06] We supported fine-tuning the **[GPT-OSS](https://github.com/openai/gpt-oss)** models. See [PR #8826](https://github.com/hiyouga/LLaMA-Factory/pull/8826) to get started.
 <details><summary>Full Changelog</summary>
 [25/07/02] We supported fine-tuning the **[GLM-4.1V-9B-Thinking](https://github.com/THUDM/GLM-4.1V-Thinking)** model.
 [25/04/28] We supported fine-tuning the **[Qwen3](https://qwenlm.github.io/blog/qwen3/)** model family.
 <details><summary>Full Changelog</summary>
 [25/04/21] We supported the **[Muon](https://github.com/KellerJordan/Muon)** optimizer. See [examples](examples/README.md) for usage. Thank [@tianshijing](https://github.com/tianshijing)'s PR.
 [25/04/16] We supported fine-tuning the **[InternVL3](https://huggingface.co/OpenGVLab/InternVL3-8B)** model. See [PR #7258](https://github.com/hiyouga/LLaMA-Factory/pull/7258) to get started.
--- a/README_zh.md
+++ b/README_zh.md
@@ -5,7 +5,7 @@
 [![GitHub contributors](https://img.shields.io/github/contributors/hiyouga/LLaMA-Factory?color=orange)](https://github.com/hiyouga/LLaMA-Factory/graphs/contributors)
 [![GitHub workflow](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml/badge.svg)](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml)
 [![PyPI](https://img.shields.io/pypi/v/llamafactory)](https://pypi.org/project/llamafactory/)
-[![Citation](https://img.shields.io/badge/citation-760-green)](https://scholar.google.com/scholar?cites=12620864006390196564)
+[![Citation](https://img.shields.io/badge/citation-818-green)](https://scholar.google.com/scholar?cites=12620864006390196564)
 [![Docker Pulls](https://img.shields.io/docker/pulls/hiyouga/llamafactory)](https://hub.docker.com/r/hiyouga/llamafactory/tags)
 [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
@@ -126,12 +126,12 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 [25/08/06] 我们支持了 **[GPT-OSS](https://github.com/openai/gpt-oss)** 模型的微调。查看 [PR #8826](https://github.com/hiyouga/LLaMA-Factory/pull/8826) 以使用。
 <details><summary>展开日志</summary>
 [25/07/02] 我们支持了 **[GLM-4.1V-9B-Thinking](https://github.com/THUDM/GLM-4.1V-Thinking)** 模型的微调。
 [25/04/28] 我们支持了 **[Qwen3](https://qwenlm.github.io/blog/qwen3/)** 系列模型的微调。
 <details><summary>展开日志</summary>
 [25/04/21] 我们支持了 **[Muon](https://github.com/KellerJordan/Muon)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。感谢 [@tianshijing](https://github.com/tianshijing) 的 PR。
 [25/04/16] 我们支持了 **[InternVL3](https://huggingface.co/OpenGVLab/InternVL3-8B)** 模型的微调。查看 [PR #7258](https://github.com/hiyouga/LLaMA-Factory/pull/7258) 以使用。
--- a/assets/wechat.jpg
+++ b/assets/wechat.jpg
--- a/assets/wechat_npu.jpg
+++ b/assets/wechat_npu.jpg
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -1862,12 +1862,10 @@ register_template(
        slots=[{"bos_token"}, "user\n{{content}}", {"eos_token"}, {"bos_token"}, "assistant\n"]
    ),
    format_system=StringFormatter(slots=[{"bos_token"}, "system\n{{content}}", {"eos_token"}]),
-    format_function=FunctionFormatter(
+    format_function=FunctionFormatter(slots=[{"bos_token"}, "\n{{content}}", {"eos_token"}], tool_format="seed_oss"),
        slots=[{"bos_token"}, "\n{{content}}", {"eos_token"}], tool_format="seed_oss"
    ),
    format_tools=ToolFormatter(tool_format="seed_oss"),
    template_class=ReasoningTemplate,
-    thought_words=("<seed:think>", "</seed:think>")
+    thought_words=("<seed:think>", "</seed:think>"),
 )
--- a/src/llamafactory/data/tool_utils.py
+++ b/src/llamafactory/data/tool_utils.py
@@ -355,6 +355,7 @@ class GLM4MOEToolUtils(QwenToolUtils):
        return "\n".join(function_texts)
 class SeedToolUtils(ToolUtils):
    r"""Seed tool using template."""
@@ -387,8 +388,7 @@ class SeedToolUtils(ToolUtils):
    def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
        results = []
        regex = re.compile(
-            r"<seed:tool_call>\s*<function=\s*([^\s<]+)\s*(.*?)\s*</function>\s*</seed:tool_call>",
+            r"<seed:tool_call>\s*<function=\s*([^\s<]+)\s*(.*?)\s*</function>\s*</seed:tool_call>", re.DOTALL
            re.DOTALL
        )
        for func_name, params_block in re.findall(regex, content):
            args_dict = {}
@@ -405,6 +405,7 @@ class SeedToolUtils(ToolUtils):
        return results
 TOOLS = {
    "default": DefaultToolUtils(),
    "glm4": GLM4ToolUtils(),
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -645,6 +645,7 @@ register_model_group(
    template="falcon",
 )
 register_model_group(
    models={
        "Falcon-H1-0.5B-Base": {
@@ -1264,6 +1265,7 @@ register_model_group(
    multimodal=True,
 )
 register_model_group(
    models={
        "Intern-S1-mini": {
@@ -1275,6 +1277,7 @@ register_model_group(
    multimodal=True,
 )
 register_model_group(
    models={
        "Jamba-v0.1": {
@@ -3039,18 +3042,40 @@ register_model_group(
    models={
        "Seed-Coder-8B-Base": {
            DownloadSource.DEFAULT: "ByteDance-Seed/Seed-Coder-8B-Base",
            DownloadSource.MODELSCOPE: "ByteDance-Seed/Seed-Coder-8B-Base",
        },
        "Seed-Coder-8B-Instruct": {
            DownloadSource.DEFAULT: "ByteDance-Seed/Seed-Coder-8B-Instruct",
            DownloadSource.MODELSCOPE: "ByteDance-Seed/Seed-Coder-8B-Instruct",
        },
-        "Seed-Coder-8B-Instruct-Reasoning": {
+        "Seed-Coder-8B-Thinking": {
            DownloadSource.DEFAULT: "ByteDance-Seed/Seed-Coder-8B-Reasoning-bf16",
            DownloadSource.MODELSCOPE: "ByteDance-Seed/Seed-Coder-8B-Reasoning-bf16",
        },
    },
    template="seed_coder",
 )
 register_model_group(
    models={
        "Seed-OSS-36B-Base": {
            DownloadSource.DEFAULT: "ByteDance-Seed/Seed-OSS-36B-Base",
            DownloadSource.MODELSCOPE: "ByteDance-Seed/Seed-OSS-36B-Base",
        },
        "Seed-OSS-36B-Base-woSyn": {
            DownloadSource.DEFAULT: "ByteDance-Seed/Seed-OSS-36B-Base-woSyn",
            DownloadSource.MODELSCOPE: "ByteDance-Seed/Seed-OSS-36B-Base-woSyn",
        },
        "Seed-OSS-36B-Instruct": {
            DownloadSource.DEFAULT: "ByteDance-Seed/Seed-OSS-36B-Instruct",
            DownloadSource.MODELSCOPE: "ByteDance-Seed/Seed-OSS-36B-Instruct",
        },
    },
    template="seed_oss",
 )
 register_model_group(
    models={
        "Skywork-13B-Base": {