From 5fb5d7ebd3a8b7b5b4873f18e2b7318cffbec58c Mon Sep 17 00:00:00 2001 From: Jackey <100291372+ctx289@users.noreply.github.com> Date: Fri, 9 Jan 2026 12:24:45 +0800 Subject: [PATCH] [model] support for microsoft's Phi-4-mini (#9734) --- README.md | 2 +- README_zh.md | 2 +- src/llamafactory/data/template.py | 11 +++++++++++ src/llamafactory/extras/constants.py | 9 +++++++++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0add75bb5..9d63b967a 100644 --- a/README.md +++ b/README.md @@ -317,7 +317,7 @@ Read technical notes: | [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma | | [Phi-3/Phi-3.5](https://huggingface.co/microsoft) | 4B/14B | phi | | [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small | -| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 | +| [Phi-4-mini/Phi-4](https://huggingface.co/microsoft) | 3.8B/14B | phi4_mini/phi4 | | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink | diff --git a/README_zh.md b/README_zh.md index 72ad732fe..751d14a92 100644 --- a/README_zh.md +++ b/README_zh.md @@ -319,7 +319,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma | | [Phi-3/Phi-3.5](https://huggingface.co/microsoft) | 4B/14B | phi | | [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small | -| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 | +| [Phi-4-mini/Phi-4](https://huggingface.co/microsoft) | 3.8B/14B | phi4_mini/phi4 | | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink | diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index ef1c5db69..665372ee8 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -1910,6 +1910,17 @@ register_template( ) +register_template( + name="phi4_mini", + format_user=StringFormatter(slots=["<|user|>{{content}}<|end|><|assistant|>"]), + format_assistant=StringFormatter(slots=["{{content}}<|end|>"]), + format_system=StringFormatter(slots=["<|system|>{{content}}<|end|>"]), + format_tools=StringFormatter(slots=["<|tool|>{{content}}<|/tool|>"]), + stop_words=["<|end|>"], + replace_eos=True, +) + + # copied from ministral template register_template( name="pixtral", diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 0b4d35ef1..0208de822 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -2443,6 +2443,15 @@ register_model_group( template="phi4", ) +register_model_group( + models={ + "Phi-4-3.8B-instruct": { + DownloadSource.DEFAULT: "microsoft/Phi-4-mini-instruct", + DownloadSource.MODELSCOPE: "LLM-Research/Phi-4-mini-instruct", + }, + }, + template="phi4_mini", +) register_model_group( models={