mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-19 20:22:48 +08:00
[model] add qwen3 nothink (#8869)
This commit is contained in:
parent
8efa5c9694
commit
a0e9a36a6f
@ -101,6 +101,7 @@ Choose your path:
|
|||||||
|
|
||||||
## Blogs
|
## Blogs
|
||||||
|
|
||||||
|
- [Fine-tune GPT-OSS for Role-Playing using LLaMA-Factory](https://docs.llamafactory.com.cn/docs/documents/best-practice/gptoss/?utm_source=LLaMA-Factory) (Chinese)
|
||||||
- [Fine-tune Llama3.1-70B for Medical Diagnosis using LLaMA-Factory](https://docs.alayanew.com/docs/documents/bestPractice/bigModel/llama70B/?utm_source=LLaMA-Factory) (Chinese)
|
- [Fine-tune Llama3.1-70B for Medical Diagnosis using LLaMA-Factory](https://docs.alayanew.com/docs/documents/bestPractice/bigModel/llama70B/?utm_source=LLaMA-Factory) (Chinese)
|
||||||
- [A One-Stop Code-Free Model Reinforcement Learning and Deployment Platform based on LLaMA-Factory and EasyR1](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/) (Chinese)
|
- [A One-Stop Code-Free Model Reinforcement Learning and Deployment Platform based on LLaMA-Factory and EasyR1](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/) (Chinese)
|
||||||
- [How Apoidea Group enhances visual information extraction from banking documents with multimodal models using LLaMA-Factory on Amazon SageMaker HyperPod](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/) (English)
|
- [How Apoidea Group enhances visual information extraction from banking documents with multimodal models using LLaMA-Factory on Amazon SageMaker HyperPod](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/) (English)
|
||||||
|
@ -103,6 +103,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
|
|
||||||
## 官方博客
|
## 官方博客
|
||||||
|
|
||||||
|
- [使用 LLaMA-Factory 构建 GPT-OSS 角色扮演模型](https://docs.llamafactory.com.cn/docs/documents/best-practice/gptoss/?utm_source=LLaMA-Factory)(中文)
|
||||||
- [使用 LLaMA-Factory 微调 Llama3.1-70B 医学诊断模型](https://docs.alayanew.com/docs/documents/bestPractice/bigModel/llama70B/?utm_source=LLaMA-Factory)(中文)
|
- [使用 LLaMA-Factory 微调 Llama3.1-70B 医学诊断模型](https://docs.alayanew.com/docs/documents/bestPractice/bigModel/llama70B/?utm_source=LLaMA-Factory)(中文)
|
||||||
- [基于 LLaMA-Factory 和 EasyR1 打造一站式无代码大模型强化学习和部署平台 LLM Model Hub](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/)(中文)
|
- [基于 LLaMA-Factory 和 EasyR1 打造一站式无代码大模型强化学习和部署平台 LLM Model Hub](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/)(中文)
|
||||||
- [通过亚马逊 SageMaker HyperPod 上的 LLaMA-Factory 增强多模态模型银行文档的视觉信息提取](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/)(英文)
|
- [通过亚马逊 SageMaker HyperPod 上的 LLaMA-Factory 增强多模态模型银行文档的视觉信息提取](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/)(英文)
|
||||||
|
@ -96,7 +96,7 @@ class Template:
|
|||||||
|
|
||||||
def add_thought(self, content: str = "") -> str:
|
def add_thought(self, content: str = "") -> str:
|
||||||
r"""Add empty thought to assistant message."""
|
r"""Add empty thought to assistant message."""
|
||||||
return f"{self.thought_words[0]}\n\n{self.thought_words[1]}\n\n" + content
|
return f"{self.thought_words[0]}{self.thought_words[1]}" + content
|
||||||
|
|
||||||
def remove_thought(self, content: str) -> str:
|
def remove_thought(self, content: str) -> str:
|
||||||
r"""Remove thought from assistant message."""
|
r"""Remove thought from assistant message."""
|
||||||
@ -518,7 +518,7 @@ def register_template(
|
|||||||
format_prefix=format_prefix or default_prefix_formatter,
|
format_prefix=format_prefix or default_prefix_formatter,
|
||||||
default_system=default_system,
|
default_system=default_system,
|
||||||
stop_words=stop_words or [],
|
stop_words=stop_words or [],
|
||||||
thought_words=thought_words or ("<think>", "</think>"),
|
thought_words=thought_words or ("<think>\n", "\n</think>\n\n"),
|
||||||
efficient_eos=efficient_eos,
|
efficient_eos=efficient_eos,
|
||||||
replace_eos=replace_eos,
|
replace_eos=replace_eos,
|
||||||
replace_jinja_template=replace_jinja_template,
|
replace_jinja_template=replace_jinja_template,
|
||||||
@ -579,7 +579,7 @@ def parse_template(tokenizer: "PreTrainedTokenizer") -> "Template":
|
|||||||
format_prefix=EmptyFormatter(slots=[prefix]) if prefix else EmptyFormatter(),
|
format_prefix=EmptyFormatter(slots=[prefix]) if prefix else EmptyFormatter(),
|
||||||
default_system=default_system,
|
default_system=default_system,
|
||||||
stop_words=[],
|
stop_words=[],
|
||||||
thought_words=("<think>", "</think>"),
|
thought_words=("<think>\n", "\n</think>\n\n"),
|
||||||
efficient_eos=False,
|
efficient_eos=False,
|
||||||
replace_eos=False,
|
replace_eos=False,
|
||||||
replace_jinja_template=False,
|
replace_jinja_template=False,
|
||||||
@ -1750,6 +1750,22 @@ register_template(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# copied from qwen template
|
||||||
|
register_template(
|
||||||
|
name="qwen3_nothink",
|
||||||
|
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
|
||||||
|
format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
|
||||||
|
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
|
||||||
|
format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="qwen"),
|
||||||
|
format_observation=StringFormatter(
|
||||||
|
slots=["<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n"]
|
||||||
|
),
|
||||||
|
format_tools=ToolFormatter(tool_format="qwen"),
|
||||||
|
stop_words=["<|im_end|>"],
|
||||||
|
replace_eos=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# copied from chatml template
|
# copied from chatml template
|
||||||
register_template(
|
register_template(
|
||||||
name="qwen2_audio",
|
name="qwen2_audio",
|
||||||
|
@ -2767,10 +2767,6 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "Qwen/Qwen3-4B",
|
DownloadSource.DEFAULT: "Qwen/Qwen3-4B",
|
||||||
DownloadSource.MODELSCOPE: "Qwen/Qwen3-4B",
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-4B",
|
||||||
},
|
},
|
||||||
"Qwen3-4B-Instruct-2507": {
|
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen3-4B-Instruct-2507",
|
|
||||||
DownloadSource.MODELSCOPE: "Qwen/Qwen3-4B-Instruct-2507",
|
|
||||||
},
|
|
||||||
"Qwen3-4B-Thinking-2507": {
|
"Qwen3-4B-Thinking-2507": {
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen3-4B-Thinking-2507",
|
DownloadSource.DEFAULT: "Qwen/Qwen3-4B-Thinking-2507",
|
||||||
DownloadSource.MODELSCOPE: "Qwen/Qwen3-4B-Thinking-2507",
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-4B-Thinking-2507",
|
||||||
@ -2791,10 +2787,6 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "Qwen/Qwen3-30B-A3B",
|
DownloadSource.DEFAULT: "Qwen/Qwen3-30B-A3B",
|
||||||
DownloadSource.MODELSCOPE: "Qwen/Qwen3-30B-A3B",
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-30B-A3B",
|
||||||
},
|
},
|
||||||
"Qwen3-30B-A3B-Instruct-2507": {
|
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen3-30B-A3B-Instruct-2507",
|
|
||||||
DownloadSource.MODELSCOPE: "Qwen/Qwen3-30B-A3B-Instruct-2507",
|
|
||||||
},
|
|
||||||
"Qwen3-30B-A3B-Thinking-2507": {
|
"Qwen3-30B-A3B-Thinking-2507": {
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen3-30B-A3B-Thinking-2507",
|
DownloadSource.DEFAULT: "Qwen/Qwen3-30B-A3B-Thinking-2507",
|
||||||
DownloadSource.MODELSCOPE: "Qwen/Qwen3-30B-A3B-Thinking-2507",
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-30B-A3B-Thinking-2507",
|
||||||
@ -2803,10 +2795,6 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B",
|
DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B",
|
||||||
DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B",
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B",
|
||||||
},
|
},
|
||||||
"Qwen3-235B-A22B-Instruct-2507": {
|
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-Instruct-2507",
|
|
||||||
DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-Instruct-2507",
|
|
||||||
},
|
|
||||||
"Qwen3-235B-A22B-Thinking-2507": {
|
"Qwen3-235B-A22B-Thinking-2507": {
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
||||||
DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
||||||
@ -2848,6 +2836,25 @@ register_model_group(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_group(
|
||||||
|
models={
|
||||||
|
"Qwen3-4B-Instruct-2507": {
|
||||||
|
DownloadSource.DEFAULT: "Qwen/Qwen3-4B-Instruct-2507",
|
||||||
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-4B-Instruct-2507",
|
||||||
|
},
|
||||||
|
"Qwen3-30B-A3B-Instruct-2507": {
|
||||||
|
DownloadSource.DEFAULT: "Qwen/Qwen3-30B-A3B-Instruct-2507",
|
||||||
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-30B-A3B-Instruct-2507",
|
||||||
|
},
|
||||||
|
"Qwen3-235B-A22B-Instruct-2507": {
|
||||||
|
DownloadSource.DEFAULT: "Qwen/Qwen3-235B-A22B-Instruct-2507",
|
||||||
|
DownloadSource.MODELSCOPE: "Qwen/Qwen3-235B-A22B-Instruct-2507",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
template="qwen3_nothink",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"Qwen2-Audio-7B": {
|
"Qwen2-Audio-7B": {
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from dataclasses import asdict, dataclass, field
|
from dataclasses import asdict, dataclass, field
|
||||||
from typing import Any, Literal, Optional, Union
|
from typing import Any, Literal, Optional
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -50,7 +50,7 @@ class RayArguments:
|
|||||||
default="PACK",
|
default="PACK",
|
||||||
metadata={"help": "The placement strategy for Ray training. Default is PACK."},
|
metadata={"help": "The placement strategy for Ray training. Default is PACK."},
|
||||||
)
|
)
|
||||||
ray_init_kwargs: Optional[dict] = field(
|
ray_init_kwargs: Optional[Union[dict, str]] = field(
|
||||||
default=None,
|
default=None,
|
||||||
metadata={"help": "The arguments to pass to ray.init for Ray training. Default is None."},
|
metadata={"help": "The arguments to pass to ray.init for Ray training. Default is None."},
|
||||||
)
|
)
|
||||||
@ -59,10 +59,14 @@ class RayArguments:
|
|||||||
self.use_ray = use_ray()
|
self.use_ray = use_ray()
|
||||||
if isinstance(self.resources_per_worker, str) and self.resources_per_worker.startswith("{"):
|
if isinstance(self.resources_per_worker, str) and self.resources_per_worker.startswith("{"):
|
||||||
self.resources_per_worker = _convert_str_dict(json.loads(self.resources_per_worker))
|
self.resources_per_worker = _convert_str_dict(json.loads(self.resources_per_worker))
|
||||||
|
|
||||||
|
if isinstance(self.ray_init_kwargs, str) and self.ray_init_kwargs.startswith("{"):
|
||||||
|
self.ray_init_kwargs = _convert_str_dict(json.loads(self.ray_init_kwargs))
|
||||||
|
|
||||||
if self.ray_storage_filesystem is not None:
|
if self.ray_storage_filesystem is not None:
|
||||||
if self.ray_storage_filesystem not in ["s3", "gs", "gcs"]:
|
if self.ray_storage_filesystem not in ["s3", "gs", "gcs"]:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"ray_storage_filesystem must be one of ['s3', 'gs', 'gcs'], got {self.ray_storage_filesystem}"
|
f"ray_storage_filesystem must be one of ['s3', 'gs', 'gcs'], got {self.ray_storage_filesystem}."
|
||||||
)
|
)
|
||||||
|
|
||||||
import pyarrow.fs as fs
|
import pyarrow.fs as fs
|
||||||
|
Loading…
x
Reference in New Issue
Block a user