mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-02 03:32:50 +08:00
[model] add models (#7054)
* add qwen25vl awq models * add moonlight Former-commit-id: ec1a1bc1184d13188029e19c1d4e7de68707aaf6
This commit is contained in:
parent
9359ee18ad
commit
ca78ba964d
@ -34,6 +34,7 @@ bf16: true
|
|||||||
ddp_timeout: 180000000
|
ddp_timeout: 180000000
|
||||||
|
|
||||||
### eval
|
### eval
|
||||||
|
# eval_dataset: alpaca_en_demo
|
||||||
# val_size: 0.1
|
# val_size: 0.1
|
||||||
# per_device_eval_batch_size: 1
|
# per_device_eval_batch_size: 1
|
||||||
# eval_strategy: steps
|
# eval_strategy: steps
|
||||||
|
@ -37,6 +37,7 @@ bf16: true
|
|||||||
ddp_timeout: 180000000
|
ddp_timeout: 180000000
|
||||||
|
|
||||||
### eval
|
### eval
|
||||||
|
# eval_dataset: dpo_en_demo
|
||||||
# val_size: 0.1
|
# val_size: 0.1
|
||||||
# per_device_eval_batch_size: 1
|
# per_device_eval_batch_size: 1
|
||||||
# eval_strategy: steps
|
# eval_strategy: steps
|
||||||
|
@ -34,6 +34,7 @@ bf16: true
|
|||||||
ddp_timeout: 180000000
|
ddp_timeout: 180000000
|
||||||
|
|
||||||
### eval
|
### eval
|
||||||
|
# eval_dataset: c4_demo
|
||||||
# val_size: 0.1
|
# val_size: 0.1
|
||||||
# per_device_eval_batch_size: 1
|
# per_device_eval_batch_size: 1
|
||||||
# eval_strategy: steps
|
# eval_strategy: steps
|
||||||
|
@ -35,6 +35,7 @@ bf16: true
|
|||||||
ddp_timeout: 180000000
|
ddp_timeout: 180000000
|
||||||
|
|
||||||
### eval
|
### eval
|
||||||
|
# eval_dataset: dpo_en_demo
|
||||||
# val_size: 0.1
|
# val_size: 0.1
|
||||||
# per_device_eval_batch_size: 1
|
# per_device_eval_batch_size: 1
|
||||||
# eval_strategy: steps
|
# eval_strategy: steps
|
||||||
|
@ -35,6 +35,7 @@ bf16: true
|
|||||||
ddp_timeout: 180000000
|
ddp_timeout: 180000000
|
||||||
|
|
||||||
### eval
|
### eval
|
||||||
|
# eval_dataset: alpaca_en_demo
|
||||||
# val_size: 0.1
|
# val_size: 0.1
|
||||||
# per_device_eval_batch_size: 1
|
# per_device_eval_batch_size: 1
|
||||||
# eval_strategy: steps
|
# eval_strategy: steps
|
||||||
|
@ -985,6 +985,18 @@ register_template(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_template(
|
||||||
|
name="moonlight",
|
||||||
|
format_user=StringFormatter(
|
||||||
|
slots=["<|im_user|>user<|im_middle|>{{content}}<|im_end|><|im_assistant|>assistant<|im_middle|>"]
|
||||||
|
),
|
||||||
|
format_assistant=StringFormatter(slots=["{{content}}<|im_end|>"]),
|
||||||
|
format_system=StringFormatter(slots=["<|im_system|>system<|im_middle|>{{content}}<|im_end|>"]),
|
||||||
|
default_system="You are a helpful assistant provided by Moonshot-AI.",
|
||||||
|
stop_words=["<|im_end|>"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# copied from vicuna template
|
# copied from vicuna template
|
||||||
register_template(
|
register_template(
|
||||||
name="llava",
|
name="llava",
|
||||||
|
@ -1315,6 +1315,21 @@ register_model_group(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_group(
|
||||||
|
models={
|
||||||
|
"Moonlight-16B-A3B": {
|
||||||
|
DownloadSource.DEFAULT: "moonshotai/Moonlight-16B-A3B",
|
||||||
|
DownloadSource.MODELSCOPE: "moonshotai/Moonlight-16B-A3B",
|
||||||
|
},
|
||||||
|
"Moonlight-16B-A3B-Instruct": {
|
||||||
|
DownloadSource.DEFAULT: "moonshotai/Moonlight-16B-A3B-Instruct",
|
||||||
|
DownloadSource.MODELSCOPE: "moonshotai/Moonlight-16B-A3B-Instruct",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
template="moonlight",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"OLMo-1B": {
|
"OLMo-1B": {
|
||||||
@ -2234,6 +2249,18 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct",
|
DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct",
|
||||||
DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct",
|
DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct",
|
||||||
},
|
},
|
||||||
|
"Qwen2.5-VL-3B-Instruct-AWQ": {
|
||||||
|
DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-3B-Instruct-AWQ",
|
||||||
|
DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-3B-Instruct-AWQ",
|
||||||
|
},
|
||||||
|
"Qwen2.5-VL-7B-Instruct-AWQ": {
|
||||||
|
DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
|
||||||
|
DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
|
||||||
|
},
|
||||||
|
"Qwen2.5-VL-72B-Instruct-AWQ": {
|
||||||
|
DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct-AWQ",
|
||||||
|
DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct-AWQ",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
template="qwen2_vl",
|
template="qwen2_vl",
|
||||||
multimodal=True,
|
multimodal=True,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user