mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-29 02:00:36 +08:00
[model] support MiniMax-M1&M2 series (#9680)
Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
This commit is contained in:
@@ -309,6 +309,7 @@ Read technical notes:
|
||||
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B/309B | mimo/mimo_v2 |
|
||||
| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
|
||||
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
|
||||
| [MiniMax-M1/MiniMax-M2](https://huggingface.co/MiniMaxAI/models) | 229B/456B | minimax1/minimax2 |
|
||||
| [Ministral 3](https://huggingface.co/mistralai) | 3B/8B/14B | ministral3 |
|
||||
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
||||
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
|
||||
|
||||
@@ -311,6 +311,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
||||
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B/309B | mimo/mimo_v2 |
|
||||
| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
|
||||
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
|
||||
| [MiniMax-M1/MiniMax-M2](https://huggingface.co/MiniMaxAI/models) | 229B/456B | minimax1/minimax2 |
|
||||
| [Ministral 3](https://huggingface.co/mistralai) | 3B/8B/14B | ministral3 |
|
||||
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
||||
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
|
||||
|
||||
@@ -1673,6 +1673,39 @@ register_template(
|
||||
)
|
||||
|
||||
|
||||
register_template(
|
||||
name="minimax1",
|
||||
format_user=StringFormatter(
|
||||
slots=["<beginning_of_sentence>user name=user\n{{content}}<end_of_sentence>\n<beginning_of_sentence>ai name=assistant\n"]
|
||||
),
|
||||
format_assistant=StringFormatter(slots=["{{content}}<end_of_sentence>\n"]),
|
||||
format_system=StringFormatter(
|
||||
slots=["<beginning_of_sentence>system ai_setting=assistant\n{{content}}<end_of_sentence>\n"]
|
||||
),
|
||||
format_function=FunctionFormatter(slots=["{{content}}<end_of_sentence>\n"], tool_format="minimax1"),
|
||||
format_observation=StringFormatter(
|
||||
slots=["<beginning_of_sentence>tool name=tools\n{{content}}<end_of_sentence>\n<beginning_of_sentence>ai name=assistant\n"]
|
||||
),
|
||||
format_tools=ToolFormatter(tool_format="minimax1"),
|
||||
default_system="You are a helpful assistant.",
|
||||
stop_words=["<end_of_sentence>"],
|
||||
)
|
||||
|
||||
|
||||
register_template(
|
||||
name="minimax2",
|
||||
format_user=StringFormatter(slots=["]~b]user\n{{content}}[e~[\n]~b]ai\n"]),
|
||||
format_assistant=StringFormatter(slots=["{{content}}[e~[\n"]),
|
||||
format_system=StringFormatter(slots=["]~!b[]~b]system\n{{content}}[e~[\n"]),
|
||||
format_function=FunctionFormatter(slots=["{{content}}[e~[\n"], tool_format="minimax2"),
|
||||
format_observation=StringFormatter(slots=["]~b]tool\n<response>{{content}}</response>[e~[\n]~b]ai\n"]),
|
||||
format_tools=ToolFormatter(tool_format="minimax2"),
|
||||
default_system="You are a helpful assistant. Your name is MiniMax-M2.1 and is built by MiniMax.",
|
||||
stop_words=["[e~["],
|
||||
template_class=ReasoningTemplate,
|
||||
)
|
||||
|
||||
|
||||
# mistral tokenizer v3 tekken
|
||||
register_template(
|
||||
name="ministral",
|
||||
|
||||
@@ -61,6 +61,21 @@ LLAMA3_TOOL_PROMPT = (
|
||||
"Do not use variables.\n\n{tool_text}"
|
||||
)
|
||||
|
||||
MINIMAX_M1_TOOL_PROMPT = (
|
||||
"You are provided with these tools:\n<tools>\n{tool_text}</tools>\n\n"
|
||||
"If you need to call tools, please respond with <tool_calls></tool_calls> XML tags, and provide tool-name and "
|
||||
"json-object of arguments, following the format below:\n<tool_calls>\n"
|
||||
"{{\"name\": <tool-name-1>, \"arguments\": <args-json-object-1>}}\n...\n</tool_calls>"
|
||||
)
|
||||
|
||||
MINIMAX_M2_TOOL_PROMPT = (
|
||||
"\n\n# Tools\n\nYou may call one or more tools to assist with the user query.\n"
|
||||
"Here are the tools available in JSONSchema format:\n\n<tools>\n{tool_text}</tools>\n\n"
|
||||
"When making tool calls, use XML format to invoke tools and pass parameters:\n"
|
||||
"\n<minimax:tool_call>\n<invoke name=\"tool-name-1\">\n<parameter name=\"param-key-1\">param-value-1</parameter>\n"
|
||||
"<parameter name=\"param-key-2\">param-value-2</parameter>\n...\n</invoke>\n</minimax:tool_call>"
|
||||
)
|
||||
|
||||
QWEN_TOOL_PROMPT = (
|
||||
"\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\n"
|
||||
"You are provided with function signatures within <tools></tools> XML tags:\n<tools>{tool_text}"
|
||||
@@ -253,6 +268,111 @@ class Llama3ToolUtils(ToolUtils):
|
||||
return content
|
||||
|
||||
|
||||
class MiniMaxM1ToolUtils(ToolUtils):
|
||||
r"""MiniMax-M1 tool using template."""
|
||||
|
||||
@override
|
||||
@staticmethod
|
||||
def tool_formatter(tools: list[dict[str, Any]]) -> str:
|
||||
tool_text = ""
|
||||
for tool in tools:
|
||||
tool = tool.get("function", "") if tool.get("type") == "function" else tool
|
||||
tool_text += json.dumps(tool, ensure_ascii=False) + "\n"
|
||||
|
||||
return MINIMAX_M1_TOOL_PROMPT.format(tool_text=tool_text)
|
||||
|
||||
@override
|
||||
@staticmethod
|
||||
def function_formatter(functions: list["FunctionCall"]) -> str:
|
||||
function_texts = []
|
||||
for func in functions:
|
||||
name, arguments = func.name, json.loads(func.arguments)
|
||||
function_texts.append(json.dumps({"name": name, "arguments": arguments}, ensure_ascii=False))
|
||||
|
||||
return "<tool_calls>\n" + "\n".join(function_texts) + "\n</tool_calls>"
|
||||
|
||||
@override
|
||||
@staticmethod
|
||||
def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
|
||||
regex = re.compile(r"<tool_calls>\s*(.+?)\s*</tool_calls>", re.DOTALL)
|
||||
tool_match = re.search(regex, content)
|
||||
if not tool_match:
|
||||
return content
|
||||
|
||||
tool_calls_content = tool_match.group(1)
|
||||
results = []
|
||||
for line in tool_calls_content.split("\n"):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
tool_call = json.loads(line)
|
||||
results.append(FunctionCall(tool_call["name"], json.dumps(tool_call["arguments"], ensure_ascii=False)))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class MiniMaxM2ToolUtils(ToolUtils):
|
||||
r"""MiniMax-M2 tool using template."""
|
||||
|
||||
@override
|
||||
@staticmethod
|
||||
def tool_formatter(tools: list[dict[str, Any]]) -> str:
|
||||
tool_text = ""
|
||||
for tool in tools:
|
||||
tool = tool.get("function", "") if tool.get("type") == "function" else tool
|
||||
tool_text += "<tool>" + json.dumps(tool, ensure_ascii=False) + "</tool>\n"
|
||||
|
||||
return MINIMAX_M2_TOOL_PROMPT.format(tool_text=tool_text)
|
||||
|
||||
@override
|
||||
@staticmethod
|
||||
def function_formatter(functions: list["FunctionCall"]) -> str:
|
||||
function_texts = []
|
||||
for func in functions:
|
||||
name, arguments = func.name, json.loads(func.arguments)
|
||||
prompt = "<invoke name=\"" + name + "\">"
|
||||
for key, value in arguments.items():
|
||||
prompt += "\n<parameter name=\"" + key + "\">"
|
||||
if not isinstance(value, str):
|
||||
value = json.dumps(value, ensure_ascii=False)
|
||||
prompt += value + "</parameter>"
|
||||
prompt += "\n</invoke>"
|
||||
function_texts.append(prompt)
|
||||
|
||||
@override
|
||||
@staticmethod
|
||||
def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
|
||||
regex = re.compile(
|
||||
r"<minimax:tool_call>\s*(.+?)\s*</minimax:tool_call>", re.DOTALL
|
||||
)
|
||||
tool_match = re.search(regex, content)
|
||||
if not tool_match:
|
||||
return content
|
||||
|
||||
tool_calls_content = tool_match.group(1)
|
||||
invoke_regex = re.compile(r"<invoke name=\"(.*?)\">(.*?)</invoke>", re.DOTALL)
|
||||
results = []
|
||||
|
||||
for func_name, params_block in re.findall(invoke_regex, tool_calls_content):
|
||||
args_dict = {}
|
||||
param_pattern = re.compile(r"<parameter name=\"(.*?)\">(.*?)</parameter>", re.DOTALL)
|
||||
for key, raw_value in re.findall(param_pattern, params_block):
|
||||
value = raw_value.strip()
|
||||
try:
|
||||
parsed_value = json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
parsed_value = raw_value
|
||||
args_dict[key] = parsed_value
|
||||
|
||||
results.append(FunctionCall(func_name.strip(), json.dumps(args_dict, ensure_ascii=False)))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class MistralToolUtils(ToolUtils):
|
||||
r"""Mistral v0.3 tool using template."""
|
||||
|
||||
@@ -432,6 +552,8 @@ TOOLS = {
|
||||
"default": DefaultToolUtils(),
|
||||
"glm4": GLM4ToolUtils(),
|
||||
"llama3": Llama3ToolUtils(),
|
||||
"minimax1": MiniMaxM1ToolUtils(),
|
||||
"minimax2": MiniMaxM2ToolUtils(),
|
||||
"mistral": MistralToolUtils(),
|
||||
"qwen": QwenToolUtils(),
|
||||
"glm4_moe": GLM4MOEToolUtils(),
|
||||
|
||||
@@ -1071,6 +1071,40 @@ register_model_group(
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"MiniMax-Text-01-Instruct": {
|
||||
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-Text-01-hf",
|
||||
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-Text-01",
|
||||
},
|
||||
"MiniMax-M1-40k-Thinking": {
|
||||
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-M1-40k-hf",
|
||||
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-M1-40k-hf",
|
||||
},
|
||||
"MiniMax-M1-80k-Thinking": {
|
||||
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-M1-80k-hf",
|
||||
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-M1-80k-hf",
|
||||
},
|
||||
},
|
||||
template="minimax1",
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"MiniMax-M2-Thinking": {
|
||||
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-M2",
|
||||
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-M2",
|
||||
},
|
||||
"MiniMax-M2.1-Thinking": {
|
||||
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-M2.1",
|
||||
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-M2.1",
|
||||
},
|
||||
},
|
||||
template="minimax2",
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"Granite-3.0-1B-A400M-Base": {
|
||||
|
||||
Reference in New Issue
Block a user