[feat] support megatron-LM training by mcore_adapter (#9237)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
2025-12-18 12:50:38 +08:00 · 2025-10-26 16:21:30 +08:00
parent 129e918106
commit 13170577b2
14 changed files with 671 additions and 8 deletions
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -56,6 +56,8 @@ LAYERNORM_NAMES = {"norm", "ln"}

 LLAMABOARD_CONFIG = "llamaboard_config.yaml"

+MCA_SUPPORTED_MODELS = {"deepseek_v3", "llama", "mistral", "mixtral", "qwen2", "qwen2_vl", "qwen2_5_vl", "qwen3", "qwen3_moe", "qwen3_next"}
+
 METHODS = ["full", "freeze", "lora", "oft"]

 MOD_SUPPORTED_MODELS = {"bloom", "falcon", "gemma", "llama", "mistral", "mixtral", "phi", "starcoder2"}