mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-02-26 15:56:00 +08:00
Compare commits
3 Commits
7ef1fba34a
...
e97d0474fb
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e97d0474fb | ||
|
|
3f0c3dc84d | ||
|
|
c107cc22d0 |
2
.github/workflows/docker.yml
vendored
2
.github/workflows/docker.yml
vendored
@@ -72,7 +72,7 @@ jobs:
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Login to Quay
|
||||
if: ${{ github.event_name != 'pull_request' && matrix.device == 'npu'}}
|
||||
if: ${{ github.event_name != 'pull_request' && startsWith(matrix.device, 'npu') }}
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
|
||||
@@ -309,6 +309,7 @@ Read technical notes:
|
||||
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B/309B | mimo/mimo_v2 |
|
||||
| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
|
||||
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
|
||||
| [MiniMax-M1/MiniMax-M2](https://huggingface.co/MiniMaxAI/models) | 229B/456B | minimax1/minimax2 |
|
||||
| [Ministral 3](https://huggingface.co/mistralai) | 3B/8B/14B | ministral3 |
|
||||
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
||||
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
|
||||
@@ -514,7 +515,7 @@ huggingface-cli login
|
||||
```bash
|
||||
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
|
||||
cd LLaMA-Factory
|
||||
pip install -e ".[metrics]" --no-build-isolation
|
||||
pip install -e ".[metrics]"
|
||||
```
|
||||
|
||||
Optional dependencies available: `metrics`, `deepspeed`. Install with: `pip install -e ".[metrics,deepspeed]"`
|
||||
|
||||
@@ -311,6 +311,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
||||
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B/309B | mimo/mimo_v2 |
|
||||
| [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
|
||||
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
|
||||
| [MiniMax-M1/MiniMax-M2](https://huggingface.co/MiniMaxAI/models) | 229B/456B | minimax1/minimax2 |
|
||||
| [Ministral 3](https://huggingface.co/mistralai) | 3B/8B/14B | ministral3 |
|
||||
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
|
||||
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
|
||||
@@ -516,7 +517,7 @@ huggingface-cli login
|
||||
```bash
|
||||
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
|
||||
cd LLaMA-Factory
|
||||
pip install -e ".[metrics]" --no-build-isolation
|
||||
pip install -e ".[metrics]"
|
||||
```
|
||||
|
||||
可选的额外依赖项:`metrics`、`deepspeed`。使用 `pip install -e ".[metrics,deepspeed]"` 安装。
|
||||
|
||||
@@ -1673,6 +1673,43 @@ register_template(
|
||||
)
|
||||
|
||||
|
||||
register_template(
|
||||
name="minimax1",
|
||||
format_user=StringFormatter(
|
||||
slots=[
|
||||
"<beginning_of_sentence>user name=user\n{{content}}<end_of_sentence>\n<beginning_of_sentence>ai name=assistant\n"
|
||||
]
|
||||
),
|
||||
format_assistant=StringFormatter(slots=["{{content}}<end_of_sentence>\n"]),
|
||||
format_system=StringFormatter(
|
||||
slots=["<beginning_of_sentence>system ai_setting=assistant\n{{content}}<end_of_sentence>\n"]
|
||||
),
|
||||
format_function=FunctionFormatter(slots=["{{content}}<end_of_sentence>\n"], tool_format="minimax1"),
|
||||
format_observation=StringFormatter(
|
||||
slots=[
|
||||
"<beginning_of_sentence>tool name=tools\n{{content}}<end_of_sentence>\n<beginning_of_sentence>ai name=assistant\n"
|
||||
]
|
||||
),
|
||||
format_tools=ToolFormatter(tool_format="minimax1"),
|
||||
default_system="You are a helpful assistant.",
|
||||
stop_words=["<end_of_sentence>"],
|
||||
)
|
||||
|
||||
|
||||
register_template(
|
||||
name="minimax2",
|
||||
format_user=StringFormatter(slots=["]~b]user\n{{content}}[e~[\n]~b]ai\n"]),
|
||||
format_assistant=StringFormatter(slots=["{{content}}[e~[\n"]),
|
||||
format_system=StringFormatter(slots=["]~!b[]~b]system\n{{content}}[e~[\n"]),
|
||||
format_function=FunctionFormatter(slots=["{{content}}[e~[\n"], tool_format="minimax2"),
|
||||
format_observation=StringFormatter(slots=["]~b]tool\n<response>{{content}}</response>[e~[\n]~b]ai\n"]),
|
||||
format_tools=ToolFormatter(tool_format="minimax2"),
|
||||
default_system="You are a helpful assistant. Your name is MiniMax-M2.1 and is built by MiniMax.",
|
||||
stop_words=["[e~["],
|
||||
template_class=ReasoningTemplate,
|
||||
)
|
||||
|
||||
|
||||
# mistral tokenizer v3 tekken
|
||||
register_template(
|
||||
name="ministral",
|
||||
|
||||
@@ -61,6 +61,21 @@ LLAMA3_TOOL_PROMPT = (
|
||||
"Do not use variables.\n\n{tool_text}"
|
||||
)
|
||||
|
||||
MINIMAX_M1_TOOL_PROMPT = (
|
||||
"You are provided with these tools:\n<tools>\n{tool_text}</tools>\n\n"
|
||||
"If you need to call tools, please respond with <tool_calls></tool_calls> XML tags, and provide tool-name and "
|
||||
"json-object of arguments, following the format below:\n<tool_calls>\n"
|
||||
"""{{"name": <tool-name-1>, "arguments": <args-json-object-1>}}\n...\n</tool_calls>"""
|
||||
)
|
||||
|
||||
MINIMAX_M2_TOOL_PROMPT = (
|
||||
"\n\n# Tools\n\nYou may call one or more tools to assist with the user query.\n"
|
||||
"Here are the tools available in JSONSchema format:\n\n<tools>\n{tool_text}</tools>\n\n"
|
||||
"When making tool calls, use XML format to invoke tools and pass parameters:\n"
|
||||
"""\n<minimax:tool_call>\n<invoke name="tool-name-1">\n<parameter name="param-key-1">param-value-1</parameter>\n"""
|
||||
"""<parameter name="param-key-2">param-value-2</parameter>\n...\n</invoke>\n</minimax:tool_call>"""
|
||||
)
|
||||
|
||||
QWEN_TOOL_PROMPT = (
|
||||
"\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\n"
|
||||
"You are provided with function signatures within <tools></tools> XML tags:\n<tools>{tool_text}"
|
||||
@@ -253,6 +268,109 @@ class Llama3ToolUtils(ToolUtils):
|
||||
return content
|
||||
|
||||
|
||||
class MiniMaxM1ToolUtils(ToolUtils):
|
||||
r"""MiniMax-M1 tool using template."""
|
||||
|
||||
@override
|
||||
@staticmethod
|
||||
def tool_formatter(tools: list[dict[str, Any]]) -> str:
|
||||
tool_text = ""
|
||||
for tool in tools:
|
||||
tool = tool.get("function", "") if tool.get("type") == "function" else tool
|
||||
tool_text += json.dumps(tool, ensure_ascii=False) + "\n"
|
||||
|
||||
return MINIMAX_M1_TOOL_PROMPT.format(tool_text=tool_text)
|
||||
|
||||
@override
|
||||
@staticmethod
|
||||
def function_formatter(functions: list["FunctionCall"]) -> str:
|
||||
function_texts = []
|
||||
for func in functions:
|
||||
name, arguments = func.name, json.loads(func.arguments)
|
||||
function_texts.append(json.dumps({"name": name, "arguments": arguments}, ensure_ascii=False))
|
||||
|
||||
return "<tool_calls>\n" + "\n".join(function_texts) + "\n</tool_calls>"
|
||||
|
||||
@override
|
||||
@staticmethod
|
||||
def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
|
||||
regex = re.compile(r"<tool_calls>\s*(.+?)\s*</tool_calls>", re.DOTALL)
|
||||
tool_match = re.search(regex, content)
|
||||
if not tool_match:
|
||||
return content
|
||||
|
||||
tool_calls_content = tool_match.group(1)
|
||||
results = []
|
||||
for line in tool_calls_content.split("\n"):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
tool_call = json.loads(line)
|
||||
results.append(FunctionCall(tool_call["name"], json.dumps(tool_call["arguments"], ensure_ascii=False)))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class MiniMaxM2ToolUtils(ToolUtils):
|
||||
r"""MiniMax-M2 tool using template."""
|
||||
|
||||
@override
|
||||
@staticmethod
|
||||
def tool_formatter(tools: list[dict[str, Any]]) -> str:
|
||||
tool_text = ""
|
||||
for tool in tools:
|
||||
tool = tool.get("function", "") if tool.get("type") == "function" else tool
|
||||
tool_text += "<tool>" + json.dumps(tool, ensure_ascii=False) + "</tool>\n"
|
||||
|
||||
return MINIMAX_M2_TOOL_PROMPT.format(tool_text=tool_text)
|
||||
|
||||
@override
|
||||
@staticmethod
|
||||
def function_formatter(functions: list["FunctionCall"]) -> str:
|
||||
function_texts = []
|
||||
for func in functions:
|
||||
name, arguments = func.name, json.loads(func.arguments)
|
||||
prompt = f'<invoke name="{name}">'
|
||||
for key, value in arguments.items():
|
||||
prompt += f'\n<parameter name="{key}">'
|
||||
if not isinstance(value, str):
|
||||
value = json.dumps(value, ensure_ascii=False)
|
||||
prompt += value + "</parameter>"
|
||||
prompt += "\n</invoke>"
|
||||
function_texts.append(prompt)
|
||||
|
||||
@override
|
||||
@staticmethod
|
||||
def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
|
||||
regex = re.compile(r"<minimax:tool_call>\s*(.+?)\s*</minimax:tool_call>", re.DOTALL)
|
||||
tool_match = re.search(regex, content)
|
||||
if not tool_match:
|
||||
return content
|
||||
|
||||
tool_calls_content = tool_match.group(1)
|
||||
invoke_regex = re.compile(r"<invoke name=\"(.*?)\">(.*?)</invoke>", re.DOTALL)
|
||||
results = []
|
||||
|
||||
for func_name, params_block in re.findall(invoke_regex, tool_calls_content):
|
||||
args_dict = {}
|
||||
param_pattern = re.compile(r"<parameter name=\"(.*?)\">(.*?)</parameter>", re.DOTALL)
|
||||
for key, raw_value in re.findall(param_pattern, params_block):
|
||||
value = raw_value.strip()
|
||||
try:
|
||||
parsed_value = json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
parsed_value = raw_value
|
||||
args_dict[key] = parsed_value
|
||||
|
||||
results.append(FunctionCall(func_name.strip(), json.dumps(args_dict, ensure_ascii=False)))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class MistralToolUtils(ToolUtils):
|
||||
r"""Mistral v0.3 tool using template."""
|
||||
|
||||
@@ -432,6 +550,8 @@ TOOLS = {
|
||||
"default": DefaultToolUtils(),
|
||||
"glm4": GLM4ToolUtils(),
|
||||
"llama3": Llama3ToolUtils(),
|
||||
"minimax1": MiniMaxM1ToolUtils(),
|
||||
"minimax2": MiniMaxM2ToolUtils(),
|
||||
"mistral": MistralToolUtils(),
|
||||
"qwen": QwenToolUtils(),
|
||||
"glm4_moe": GLM4MOEToolUtils(),
|
||||
|
||||
@@ -1071,6 +1071,40 @@ register_model_group(
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"MiniMax-Text-01-Instruct": {
|
||||
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-Text-01-hf",
|
||||
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-Text-01",
|
||||
},
|
||||
"MiniMax-M1-40k-Thinking": {
|
||||
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-M1-40k-hf",
|
||||
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-M1-40k-hf",
|
||||
},
|
||||
"MiniMax-M1-80k-Thinking": {
|
||||
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-M1-80k-hf",
|
||||
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-M1-80k-hf",
|
||||
},
|
||||
},
|
||||
template="minimax1",
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"MiniMax-M2-Thinking": {
|
||||
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-M2",
|
||||
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-M2",
|
||||
},
|
||||
"MiniMax-M2.1-Thinking": {
|
||||
DownloadSource.DEFAULT: "MiniMaxAI/MiniMax-M2.1",
|
||||
DownloadSource.MODELSCOPE: "MiniMaxAI/MiniMax-M2.1",
|
||||
},
|
||||
},
|
||||
template="minimax2",
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"Granite-3.0-1B-A400M-Base": {
|
||||
|
||||
Reference in New Issue
Block a user