diff --git a/README.md b/README.md index d238101e6..04c372394 100644 --- a/README.md +++ b/README.md @@ -313,6 +313,7 @@ Read technical notes: | [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next | | [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video | | [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo | +| [MiMo-v2](https://huggingface.co/XiaomiMiMo) | 309B | mimo_v2 | | [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 | | [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v | | [Ministral(3)/Mistral-Nemo](https://huggingface.co/mistralai) | 3B/8B/12B/14B | ministral/ministral3 | diff --git a/README_zh.md b/README_zh.md index 50e805005..fa3ebaab4 100644 --- a/README_zh.md +++ b/README_zh.md @@ -315,6 +315,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next | | [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video | | [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo | +| [MiMo-v2](https://huggingface.co/XiaomiMiMo) | 309B | mimo_v2 | | [MiniCPM 1-4.1](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 | | [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v | | [Ministral(3)/Mistral-Nemo](https://huggingface.co/mistralai) | 3B/8B/12B/14B | ministral/ministral3 | diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index c611632f8..19d354f31 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -1610,6 +1610,26 @@ register_template( template_class=ReasoningTemplate, ) + +# copied from qwen template +register_template( + name="mimo_v2", + format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), + format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]), + format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]), + format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="qwen"), + format_observation=StringFormatter( + slots=["<|im_start|>user\n\n{{content}}\n<|im_end|>\n<|im_start|>assistant\n"] + ), + format_tools=ToolFormatter(tool_format="qwen"), + default_system="You are MiMo, a helpful AI assistant engineered by Xiaomi.", + stop_words=["<|im_end|>"], + replace_eos=True, + thought_words=("", ""), + template_class=ReasoningTemplate, +) + + # copied from qwen2vl register_template( name="mimo_vl", diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index aec67ca85..134cb5fd8 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -1803,6 +1803,21 @@ register_model_group( ) +register_model_group( + models={ + "MiMo-V2-Flash-Base": { + DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-V2-Flash-Base", + DownloadSource.MODELSCOPE: "XiaomiMiMo/MiMo-V2-Flash-Base", + }, + "MiMo-V2-Flash": { + DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-V2-Flash", + DownloadSource.MODELSCOPE: "XiaomiMiMo/MiMo-V2-Flash", + }, + }, + template="mimo_v2", +) + + register_model_group( models={ "MiMo-7B-VL-RL": { @@ -1827,7 +1842,7 @@ register_model_group( }, "MiMo-VL-7B-SFT-2508": { DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-VL-7B-SFT-2508", - DownloadSource.DEFAULT: "XiaomiMiMo/MiMo-VL-7B-SFT-2508", + DownloadSource.MODELSCOPE: "XiaomiMiMo/MiMo-VL-7B-SFT-2508", }, }, template="qwen2_vl", diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py index 59dfc502a..8728ce3ba 100644 --- a/src/llamafactory/model/model_utils/quantization.py +++ b/src/llamafactory/model/model_utils/quantization.py @@ -113,6 +113,7 @@ def configure_quantization( if quant_method == QuantizationMethod.FP8: quant_config = FineGrainedFP8Config(dequantize=True) init_kwargs["quantization_config"] = quant_config + init_kwargs["ignore_mismatched_sizes"] = True quant_bits = quantization_config.get("bits", "?") logger.info_rank0(f"Loading {quant_bits}-bit {quant_method.upper()}-quantized model.")