mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-03 04:02:49 +08:00
update chatglm3 template
Former-commit-id: 38d8b2cef8d70ce8c390de0317559df7f04b4a5d
This commit is contained in:
parent
9846071c67
commit
57f85add58
@ -43,7 +43,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
|
|||||||
- **Various models**: LLaMA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc.
|
- **Various models**: LLaMA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc.
|
||||||
- **Integrated methods**: (Continuous) pre-training, supervised fine-tuning, reward modeling, PPO and DPO.
|
- **Integrated methods**: (Continuous) pre-training, supervised fine-tuning, reward modeling, PPO and DPO.
|
||||||
- **Scalable resources**: 32-bit full-tuning, 16-bit freeze tuning, 16-bit LoRA tuning, 2/4/8-bit QLoRA with AQLM/AWQ/GPTQ/LLM.int8.
|
- **Scalable resources**: 32-bit full-tuning, 16-bit freeze tuning, 16-bit LoRA tuning, 2/4/8-bit QLoRA with AQLM/AWQ/GPTQ/LLM.int8.
|
||||||
- **Advanced algorithms**: DoRA, LongLoRA, LLaMA Pro, agent tuning.
|
- **Advanced algorithms**: DoRA, LongLoRA, LLaMA Pro, LoftQ, agent tuning.
|
||||||
- **Intriguing tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune, rsLoRA.
|
- **Intriguing tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune, rsLoRA.
|
||||||
|
|
||||||
## Benchmark
|
## Benchmark
|
||||||
|
@ -43,7 +43,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
|
|||||||
- **多种模型**:LLaMA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。
|
- **多种模型**:LLaMA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。
|
||||||
- **集成方法**:(增量)预训练、指令监督微调、奖励模型训练、PPO 训练、DPO 训练。
|
- **集成方法**:(增量)预训练、指令监督微调、奖励模型训练、PPO 训练、DPO 训练。
|
||||||
- **多种精度**:32 比特全参数训练、16 比特部分参数训练、16比特 LoRA 训练、基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 LoRA 训练。
|
- **多种精度**:32 比特全参数训练、16 比特部分参数训练、16比特 LoRA 训练、基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 LoRA 训练。
|
||||||
- **先进算法**: DoRA、LongLoRA、LLaMA Pro、agent tuning。
|
- **先进算法**: DoRA、LongLoRA、LLaMA Pro、LoftQ、agent tuning。
|
||||||
- **新鲜技巧**:FlashAttention-2、Unsloth、RoPE scaling、NEFTune、rsLoRA。
|
- **新鲜技巧**:FlashAttention-2、Unsloth、RoPE scaling、NEFTune、rsLoRA。
|
||||||
|
|
||||||
## 性能指标
|
## 性能指标
|
||||||
|
@ -360,6 +360,21 @@ _register_template(
|
|||||||
name="chatglm3",
|
name="chatglm3",
|
||||||
format_user=StringFormatter(slots=[{"token": "<|user|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]),
|
format_user=StringFormatter(slots=[{"token": "<|user|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]),
|
||||||
format_assistant=StringFormatter(slots=["\n", "{{content}}"]),
|
format_assistant=StringFormatter(slots=["\n", "{{content}}"]),
|
||||||
|
format_system=StringFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]),
|
||||||
|
format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]),
|
||||||
|
format_observation=StringFormatter(
|
||||||
|
slots=[{"token": "<|observation|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]
|
||||||
|
),
|
||||||
|
stop_words=["<|user|>", "<|observation|>"],
|
||||||
|
efficient_eos=True,
|
||||||
|
force_system=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_register_template(
|
||||||
|
name="chatglm3_system",
|
||||||
|
format_user=StringFormatter(slots=[{"token": "<|user|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]),
|
||||||
|
format_assistant=StringFormatter(slots=["\n", "{{content}}"]),
|
||||||
format_system=StringFormatter(
|
format_system=StringFormatter(
|
||||||
slots=[{"token": "[gMASK]"}, {"token": "sop"}, {"token": "<|system|>"}, "\n", "{{content}}"]
|
slots=[{"token": "[gMASK]"}, {"token": "sop"}, {"token": "<|system|>"}, "\n", "{{content}}"]
|
||||||
),
|
),
|
||||||
|
@ -328,15 +328,19 @@ register_model_group(
|
|||||||
models={
|
models={
|
||||||
"Gemma-2B": {
|
"Gemma-2B": {
|
||||||
DownloadSource.DEFAULT: "google/gemma-2b",
|
DownloadSource.DEFAULT: "google/gemma-2b",
|
||||||
|
DownloadSource.MODELSCOPE: "AI-ModelScope/gemma-2b",
|
||||||
},
|
},
|
||||||
"Gemma-7B": {
|
"Gemma-7B": {
|
||||||
DownloadSource.DEFAULT: "google/gemma-7b",
|
DownloadSource.DEFAULT: "google/gemma-7b",
|
||||||
|
DownloadSource.MODELSCOPE: "AI-ModelScope/gemma-2b-it",
|
||||||
},
|
},
|
||||||
"Gemma-2B-Chat": {
|
"Gemma-2B-Chat": {
|
||||||
DownloadSource.DEFAULT: "google/gemma-2b-it",
|
DownloadSource.DEFAULT: "google/gemma-2b-it",
|
||||||
|
DownloadSource.MODELSCOPE: "AI-ModelScope/gemma-7b",
|
||||||
},
|
},
|
||||||
"Gemma-7B-Chat": {
|
"Gemma-7B-Chat": {
|
||||||
DownloadSource.DEFAULT: "google/gemma-7b-it",
|
DownloadSource.DEFAULT: "google/gemma-7b-it",
|
||||||
|
DownloadSource.MODELSCOPE: "AI-ModelScope/gemma-7b-it",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
template="gemma",
|
template="gemma",
|
||||||
@ -562,7 +566,10 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat",
|
DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat",
|
||||||
DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat",
|
DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat",
|
||||||
},
|
},
|
||||||
"Qwen-7B-Chat": {DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat", DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat"},
|
"Qwen-7B-Chat": {
|
||||||
|
DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat",
|
||||||
|
DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat",
|
||||||
|
},
|
||||||
"Qwen-14B-Chat": {
|
"Qwen-14B-Chat": {
|
||||||
DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat",
|
DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat",
|
||||||
DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat",
|
DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user