mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-18 12:50:38 +08:00
[data] optimize qwen3 loss computation (#7923)
This commit is contained in:
@@ -533,6 +533,17 @@ register_model_group(
|
||||
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V3",
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V3",
|
||||
},
|
||||
"DeepSeek-V3-671B-0324-Chat": {
|
||||
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V3-0324",
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V3-0324",
|
||||
},
|
||||
},
|
||||
template="deepseek3",
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"DeepSeek-R1-1.5B-Distill": {
|
||||
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
||||
@@ -566,7 +577,7 @@ register_model_group(
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-R1",
|
||||
},
|
||||
},
|
||||
template="deepseek3",
|
||||
template="deepseekr1",
|
||||
)
|
||||
|
||||
|
||||
@@ -737,6 +748,13 @@ register_model_group(
|
||||
DownloadSource.DEFAULT: "THUDM/GLM-4-32B-0414",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-0414",
|
||||
},
|
||||
},
|
||||
template="glm4",
|
||||
)
|
||||
|
||||
|
||||
register_model_group(
|
||||
models={
|
||||
"GLM-Z1-9B-0414-Chat": {
|
||||
DownloadSource.DEFAULT: "THUDM/GLM-Z1-9B-0414",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-Z1-9B-0414",
|
||||
@@ -746,7 +764,7 @@ register_model_group(
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/GLM-Z1-32B-0414",
|
||||
},
|
||||
},
|
||||
template="glm4",
|
||||
template="glmz1",
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user