[model] add models (#7054)

* add qwen25vl awq models * add moonlight Former-commit-id: ec1a1bc118
2026-03-07 04:05:58 +08:00 · 2025-02-24 22:05:13 +08:00
parent 9359ee18ad
commit ca78ba964d
7 changed files with 44 additions and 0 deletions
--- a/examples/train_full/llama3_full_sft.yaml
+++ b/examples/train_full/llama3_full_sft.yaml
@@ -34,6 +34,7 @@ bf16: true
 ddp_timeout: 180000000
 ### eval
 # eval_dataset: alpaca_en_demo
 # val_size: 0.1
 # per_device_eval_batch_size: 1
 # eval_strategy: steps
--- a/examples/train_lora/llama3_lora_dpo.yaml
+++ b/examples/train_lora/llama3_lora_dpo.yaml
@@ -37,6 +37,7 @@ bf16: true
 ddp_timeout: 180000000
 ### eval
 # eval_dataset: dpo_en_demo
 # val_size: 0.1
 # per_device_eval_batch_size: 1
 # eval_strategy: steps
--- a/examples/train_lora/llama3_lora_pretrain.yaml
+++ b/examples/train_lora/llama3_lora_pretrain.yaml
@@ -34,6 +34,7 @@ bf16: true
 ddp_timeout: 180000000
 ### eval
 # eval_dataset: c4_demo
 # val_size: 0.1
 # per_device_eval_batch_size: 1
 # eval_strategy: steps
--- a/examples/train_lora/llama3_lora_reward.yaml
+++ b/examples/train_lora/llama3_lora_reward.yaml
@@ -35,6 +35,7 @@ bf16: true
 ddp_timeout: 180000000
 ### eval
 # eval_dataset: dpo_en_demo
 # val_size: 0.1
 # per_device_eval_batch_size: 1
 # eval_strategy: steps
--- a/examples/train_lora/llama3_lora_sft.yaml
+++ b/examples/train_lora/llama3_lora_sft.yaml
@@ -35,6 +35,7 @@ bf16: true
 ddp_timeout: 180000000
 ### eval
 # eval_dataset: alpaca_en_demo
 # val_size: 0.1
 # per_device_eval_batch_size: 1
 # eval_strategy: steps
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -985,6 +985,18 @@ register_template(
 )
 register_template(
    name="moonlight",
    format_user=StringFormatter(
        slots=["<|im_user|>user<|im_middle|>{{content}}<|im_end|><|im_assistant|>assistant<|im_middle|>"]
    ),
    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>"]),
    format_system=StringFormatter(slots=["<|im_system|>system<|im_middle|>{{content}}<|im_end|>"]),
    default_system="You are a helpful assistant provided by Moonshot-AI.",
    stop_words=["<|im_end|>"],
 )
 # copied from vicuna template
 register_template(
    name="llava",
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -1315,6 +1315,21 @@ register_model_group(
 )
 register_model_group(
    models={
        "Moonlight-16B-A3B": {
            DownloadSource.DEFAULT: "moonshotai/Moonlight-16B-A3B",
            DownloadSource.MODELSCOPE: "moonshotai/Moonlight-16B-A3B",
        },
        "Moonlight-16B-A3B-Instruct": {
            DownloadSource.DEFAULT: "moonshotai/Moonlight-16B-A3B-Instruct",
            DownloadSource.MODELSCOPE: "moonshotai/Moonlight-16B-A3B-Instruct",
        },
    },
    template="moonlight",
 )
 register_model_group(
    models={
        "OLMo-1B": {
@@ -2234,6 +2249,18 @@ register_model_group(
            DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct",
            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct",
        },
        "Qwen2.5-VL-3B-Instruct-AWQ": {
            DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-3B-Instruct-AWQ",
            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-3B-Instruct-AWQ",
        },
        "Qwen2.5-VL-7B-Instruct-AWQ": {
            DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
        },
        "Qwen2.5-VL-72B-Instruct-AWQ": {
            DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct-AWQ",
            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct-AWQ",
        },
    },
    template="qwen2_vl",
    multimodal=True,