[model] support MiniMax-M1&M2 series (#9680)

Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
This commit is contained in:
Hertz
2025-12-28 19:02:05 +08:00
committed by GitHub
parent 7ef1fba34a
commit c107cc22d0
5 changed files with 191 additions and 0 deletions

View File

@@ -309,6 +309,7 @@ Read technical notes:
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B/309B | mimo/mimo_v2 |
| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
| [MiniMax-M1/MiniMax-M2](https://huggingface.co/MiniMaxAI/models) | 229B/456B | minimax1/minimax2 |
| [Ministral 3](https://huggingface.co/mistralai) | 3B/8B/14B | ministral3 |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |

View File

@@ -311,6 +311,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B/309B | mimo/mimo_v2 |
| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
| [MiniMax-M1/MiniMax-M2](https://huggingface.co/MiniMaxAI/models) | 229B/456B | minimax1/minimax2 |
| [Ministral 3](https://huggingface.co/mistralai) | 3B/8B/14B | ministral3 |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |

View File

@@ -1673,6 +1673,39 @@ register_template(
)
register_template(
name="minimax1",
format_user=StringFormatter(
slots=["<beginning_of_sentence>user name=user\n{{content}}<end_of_sentence>\n<beginning_of_sentence>ai name=assistant\n"]
),
format_assistant=StringFormatter(slots=["{{content}}<end_of_sentence>\n"]),
format_system=StringFormatter(
slots=["<beginning_of_sentence>system ai_setting=assistant\n{{content}}<end_of_sentence>\n"]
),
format_function=FunctionFormatter(slots=["{{content}}<end_of_sentence>\n"], tool_format="minimax1"),
format_observation=StringFormatter(
slots=["<beginning_of_sentence>tool name=tools\n{{content}}<end_of_sentence>\n<beginning_of_sentence>ai name=assistant\n"]
),
format_tools=ToolFormatter(tool_format="minimax1"),
default_system="You are a helpful assistant.",
stop_words=["<end_of_sentence>"],
)
register_template(
name="minimax2",
format_user=StringFormatter(slots=["]~b]user\n{{content}}[e~[\n]~b]ai\n"]),
format_assistant=StringFormatter(slots=["{{content}}[e~[\n"]),
format_system=StringFormatter(slots=["]~!b[]~b]system\n{{content}}[e~[\n"]),
format_function=FunctionFormatter(slots=["{{content}}[e~[\n"], tool_format="minimax2"),
format_observation=StringFormatter(slots=["]~b]tool\n<response>{{content}}</response>[e~[\n]~b]ai\n"]),
format_tools=ToolFormatter(tool_format="minimax2"),
default_system="You are a helpful assistant. Your name is MiniMax-M2.1 and is built by MiniMax.",
stop_words=["[e~["],
template_class=ReasoningTemplate,
)
# mistral tokenizer v3 tekken
register_template(
name="ministral",

View File

@@ -61,6 +61,21 @@ LLAMA3_TOOL_PROMPT = (
"Do not use variables.\n\n{tool_text}"
)
MINIMAX_M1_TOOL_PROMPT = (
"You are provided with these tools:\n<tools>\n{tool_text}</tools>\n\n"
"If you need to call tools, please respond with <tool_calls></tool_calls> XML tags, and provide tool-name and "
"json-object of arguments, following the format below:\n<tool_calls>\n"
"{{\"name\": <tool-name-1>, \"arguments\": <args-json-object-1>}}\n...\n</tool_calls>"
)
MINIMAX_M2_TOOL_PROMPT = (
"\n\n# Tools\n\nYou may call one or more tools to assist with the user query.\n"
"Here are the tools available in JSONSchema format:\n\n<tools>\n{tool_text}</tools>\n\n"
"When making tool calls, use XML format to invoke tools and pass parameters:\n"
"\n<minimax:tool_call>\n<invoke name=\"tool-name-1\">\n<parameter name=\"param-key-1\">param-value-1</parameter>\n"
"<parameter name=\"param-key-2\">param-value-2</parameter>\n...\n</invoke>\n</minimax:tool_call>"
)
QWEN_TOOL_PROMPT = (
"\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\n"
"You are provided with function signatures within <tools></tools> XML tags:\n<tools>{tool_text}"
@@ -253,6 +268,111 @@ class Llama3ToolUtils(ToolUtils):
return content
class MiniMaxM1ToolUtils(ToolUtils):
r"""MiniMax-M1 tool using template."""
@override
@staticmethod
def tool_formatter(tools: list[dict[str, Any]]) -> str:
tool_text = ""
for tool in tools:
tool = tool.get("function", "") if tool.get("type") == "function" else tool
tool_text += json.dumps(tool, ensure_ascii=False) + "\n"
return MINIMAX_M1_TOOL_PROMPT.format(tool_text=tool_text)
@override
@staticmethod
def function_formatter(functions: list["FunctionCall"]) -> str:
function_texts = []
for func in functions:
name, arguments = func.name, json.loads(func.arguments)
function_texts.append(json.dumps({"name": name, "arguments": arguments}, ensure_ascii=False))
return "<tool_calls>\n" + "\n".join(function_texts) + "\n</tool_calls>"
@override
@staticmethod
def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
regex = re.compile(r"<tool_calls>\s*(.+?)\s*</tool_calls>", re.DOTALL)
tool_match = re.search(regex, content)
if not tool_match:
return content
tool_calls_content = tool_match.group(1)
results = []
for line in tool_calls_content.split("\n"):
line = line.strip()
if not line:
continue
try:
tool_call = json.loads(line)
results.append(FunctionCall(tool_call["name"], json.dumps(tool_call["arguments"], ensure_ascii=False)))
except json.JSONDecodeError:
continue
return results
class MiniMaxM2ToolUtils(ToolUtils):
r"""MiniMax-M2 tool using template."""
@override
@staticmethod
def tool_formatter(tools: list[dict[str, Any]]) -> str:
tool_text = ""
for tool in tools:
tool = tool.get("function", "") if tool.get("type") == "function" else tool
tool_text += "<tool>" + json.dumps(tool, ensure_ascii=False) + "</tool>\n"
return MINIMAX_M2_TOOL_PROMPT.format(tool_text=tool_text)
@override
@staticmethod
def function_formatter(functions: list["FunctionCall"]) -> str:
function_texts = []
for func in functions:
name, arguments = func.name, json.loads(func.arguments)
prompt = "<invoke name=\"" + name + "\">"
for key, value in arguments.items():
prompt += "\n<parameter name=\"" + key + "\">"
if not isinstance(value, str):
value = json.dumps(value, ensure_ascii=False)
prompt += value + "</parameter>"
prompt += "\n</invoke>"
function_texts.append(prompt)
@override
@staticmethod
def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
regex = re.compile(
r"<minimax:tool_call>\s*(.+?)\s*</minimax:tool_call>", re.DOTALL
)
tool_match = re.search(regex, content)
if not tool_match:
return content
tool_calls_content = tool_match.group(1)
invoke_regex = re.compile(r"<invoke name=\"(.*?)\">(.*?)</invoke>", re.DOTALL)
results = []
for func_name, params_block in re.findall(invoke_regex, tool_calls_content):
args_dict = {}
param_pattern = re.compile(r"<parameter name=\"(.*?)\">(.*?)</parameter>", re.DOTALL)
for key, raw_value in re.findall(param_pattern, params_block):
value = raw_value.strip()
try:
parsed_value = json.loads(value)
except json.JSONDecodeError:
parsed_value = raw_value
args_dict[key] = parsed_value
results.append(FunctionCall(func_name.strip(), json.dumps(args_dict, ensure_ascii=False)))
return results
class MistralToolUtils(ToolUtils):
r"""Mistral v0.3 tool using template."""
@@ -432,6 +552,8 @@ TOOLS = {
"default": DefaultToolUtils(),
"glm4": GLM4ToolUtils(),
"llama3": Llama3ToolUtils(),
"minimax1": MiniMaxM1ToolUtils(),
"minimax2": MiniMaxM2ToolUtils(),
"mistral": MistralToolUtils(),
"qwen": QwenToolUtils(),
"glm4_moe": GLM4MOEToolUtils(),

View File

@@ -1071,6 +1071,40 @@ register_model_group(
)
register_model_group(
models={
"MiniMax-Text-01-Instruct": {
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-Text-01-hf",
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-Text-01",
},
"MiniMax-M1-40k-Thinking": {
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-M1-40k-hf",
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-M1-40k-hf",
},
"MiniMax-M1-80k-Thinking": {
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-M1-80k-hf",
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-M1-80k-hf",
},
},
template="minimax1",
)
register_model_group(
models={
"MiniMax-M2-Thinking": {
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-M2",
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-M2",
},
"MiniMax-M2.1-Thinking": {
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-M2.1",
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-M2.1",
},
},
template="minimax2",
)
register_model_group(
models={
"Granite-3.0-1B-A400M-Base": {