[model] add models (#7054)

* add qwen25vl awq models

* add moonlight

Former-commit-id: ec1a1bc1184d13188029e19c1d4e7de68707aaf6
This commit is contained in:
hoshi-hiyouga 2025-02-24 22:05:13 +08:00 committed by GitHub
parent 9359ee18ad
commit ca78ba964d
7 changed files with 44 additions and 0 deletions

View File

@ -34,6 +34,7 @@ bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
### eval ### eval
# eval_dataset: alpaca_en_demo
# val_size: 0.1 # val_size: 0.1
# per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
# eval_strategy: steps # eval_strategy: steps

View File

@ -37,6 +37,7 @@ bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
### eval ### eval
# eval_dataset: dpo_en_demo
# val_size: 0.1 # val_size: 0.1
# per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
# eval_strategy: steps # eval_strategy: steps

View File

@ -34,6 +34,7 @@ bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
### eval ### eval
# eval_dataset: c4_demo
# val_size: 0.1 # val_size: 0.1
# per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
# eval_strategy: steps # eval_strategy: steps

View File

@ -35,6 +35,7 @@ bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
### eval ### eval
# eval_dataset: dpo_en_demo
# val_size: 0.1 # val_size: 0.1
# per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
# eval_strategy: steps # eval_strategy: steps

View File

@ -35,6 +35,7 @@ bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
### eval ### eval
# eval_dataset: alpaca_en_demo
# val_size: 0.1 # val_size: 0.1
# per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
# eval_strategy: steps # eval_strategy: steps

View File

@ -985,6 +985,18 @@ register_template(
) )
register_template(
name="moonlight",
format_user=StringFormatter(
slots=["<|im_user|>user<|im_middle|>{{content}}<|im_end|><|im_assistant|>assistant<|im_middle|>"]
),
format_assistant=StringFormatter(slots=["{{content}}<|im_end|>"]),
format_system=StringFormatter(slots=["<|im_system|>system<|im_middle|>{{content}}<|im_end|>"]),
default_system="You are a helpful assistant provided by Moonshot-AI.",
stop_words=["<|im_end|>"],
)
# copied from vicuna template # copied from vicuna template
register_template( register_template(
name="llava", name="llava",

View File

@ -1315,6 +1315,21 @@ register_model_group(
) )
register_model_group(
models={
"Moonlight-16B-A3B": {
DownloadSource.DEFAULT: "moonshotai/Moonlight-16B-A3B",
DownloadSource.MODELSCOPE: "moonshotai/Moonlight-16B-A3B",
},
"Moonlight-16B-A3B-Instruct": {
DownloadSource.DEFAULT: "moonshotai/Moonlight-16B-A3B-Instruct",
DownloadSource.MODELSCOPE: "moonshotai/Moonlight-16B-A3B-Instruct",
},
},
template="moonlight",
)
register_model_group( register_model_group(
models={ models={
"OLMo-1B": { "OLMo-1B": {
@ -2234,6 +2249,18 @@ register_model_group(
DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct", DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct",
DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct", DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct",
}, },
"Qwen2.5-VL-3B-Instruct-AWQ": {
DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-3B-Instruct-AWQ",
DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-3B-Instruct-AWQ",
},
"Qwen2.5-VL-7B-Instruct-AWQ": {
DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
},
"Qwen2.5-VL-72B-Instruct-AWQ": {
DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct-AWQ",
DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct-AWQ",
},
}, },
template="qwen2_vl", template="qwen2_vl",
multimodal=True, multimodal=True,