diff --git a/README.md b/README.md index 73c8b724..a08e36a1 100644 --- a/README.md +++ b/README.md @@ -283,6 +283,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava | | [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next | | [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video | +| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo | | [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 | | [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v | | [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral | diff --git a/README_zh.md b/README_zh.md index f02e27b9..f4c1b671 100644 --- a/README_zh.md +++ b/README_zh.md @@ -270,6 +270,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava | | [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next | | [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video | +| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo | | [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 | | [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v | | [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral | diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index a5c5eb2b..debc69da 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -1324,6 +1324,24 @@ register_template( ) +# copied from qwen template +register_template( + name="mimo", + format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), + format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]), + format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]), + format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="qwen"), + format_observation=StringFormatter( + slots=["<|im_start|>user\n\n{{content}}\n<|im_end|>\n<|im_start|>assistant\n"] + ), + format_tools=ToolFormatter(tool_format="qwen"), + default_system="You are a helpful assistant.", + stop_words=["<|im_end|>"], + replace_eos=True, + template_class=ReasoningTemplate, +) + + # copied from chatml template register_template( name="minicpm_v", diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index c3bb2e22..073e92f9 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -1416,6 +1416,29 @@ register_model_group( ) +register_model_group( + models={ + "MiMo-7B-Base": { + DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-7B-Base", + DownloadSource.MODELSCOPE: "XiaomiMiMo/MiMo-7B-Base", + }, + "MiMo-7B-Instruct": { + DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-7B-SFT", + DownloadSource.MODELSCOPE: "XiaomiMiMo/MiMo-7B-SFT", + }, + "MiMo-7B-Instruct-RL": { + DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-7B-RL", + DownloadSource.MODELSCOPE: "XiaomiMiMo/MiMo-7B-RL", + }, + "MiMo-7B-RL-ZERO": { + DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-7B-RL-ZERO", + DownloadSource.MODELSCOPE: "XiaomiMiMo/MiMo-7B-RL-ZERO", + }, + }, + template="mimo", +) + + register_model_group( models={ "MiniCPM-2B-SFT-Chat": {