mirror of
				https://github.com/hiyouga/LLaMA-Factory.git
				synced 2025-11-04 18:02:19 +08:00 
			
		
		
		
	[model] add models (#7054)
* add qwen25vl awq models * add moonlight Former-commit-id: ae3be2970fea8a35907202a313ab767381c44916
This commit is contained in:
		
							parent
							
								
									ee46011b34
								
							
						
					
					
						commit
						c1d5073bd3
					
				@ -34,6 +34,7 @@ bf16: true
 | 
			
		||||
ddp_timeout: 180000000
 | 
			
		||||
 | 
			
		||||
### eval
 | 
			
		||||
# eval_dataset: alpaca_en_demo
 | 
			
		||||
# val_size: 0.1
 | 
			
		||||
# per_device_eval_batch_size: 1
 | 
			
		||||
# eval_strategy: steps
 | 
			
		||||
 | 
			
		||||
@ -37,6 +37,7 @@ bf16: true
 | 
			
		||||
ddp_timeout: 180000000
 | 
			
		||||
 | 
			
		||||
### eval
 | 
			
		||||
# eval_dataset: dpo_en_demo
 | 
			
		||||
# val_size: 0.1
 | 
			
		||||
# per_device_eval_batch_size: 1
 | 
			
		||||
# eval_strategy: steps
 | 
			
		||||
 | 
			
		||||
@ -34,6 +34,7 @@ bf16: true
 | 
			
		||||
ddp_timeout: 180000000
 | 
			
		||||
 | 
			
		||||
### eval
 | 
			
		||||
# eval_dataset: c4_demo
 | 
			
		||||
# val_size: 0.1
 | 
			
		||||
# per_device_eval_batch_size: 1
 | 
			
		||||
# eval_strategy: steps
 | 
			
		||||
 | 
			
		||||
@ -35,6 +35,7 @@ bf16: true
 | 
			
		||||
ddp_timeout: 180000000
 | 
			
		||||
 | 
			
		||||
### eval
 | 
			
		||||
# eval_dataset: dpo_en_demo
 | 
			
		||||
# val_size: 0.1
 | 
			
		||||
# per_device_eval_batch_size: 1
 | 
			
		||||
# eval_strategy: steps
 | 
			
		||||
 | 
			
		||||
@ -35,6 +35,7 @@ bf16: true
 | 
			
		||||
ddp_timeout: 180000000
 | 
			
		||||
 | 
			
		||||
### eval
 | 
			
		||||
# eval_dataset: alpaca_en_demo
 | 
			
		||||
# val_size: 0.1
 | 
			
		||||
# per_device_eval_batch_size: 1
 | 
			
		||||
# eval_strategy: steps
 | 
			
		||||
 | 
			
		||||
@ -985,6 +985,18 @@ register_template(
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
register_template(
 | 
			
		||||
    name="moonlight",
 | 
			
		||||
    format_user=StringFormatter(
 | 
			
		||||
        slots=["<|im_user|>user<|im_middle|>{{content}}<|im_end|><|im_assistant|>assistant<|im_middle|>"]
 | 
			
		||||
    ),
 | 
			
		||||
    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>"]),
 | 
			
		||||
    format_system=StringFormatter(slots=["<|im_system|>system<|im_middle|>{{content}}<|im_end|>"]),
 | 
			
		||||
    default_system="You are a helpful assistant provided by Moonshot-AI.",
 | 
			
		||||
    stop_words=["<|im_end|>"],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# copied from vicuna template
 | 
			
		||||
register_template(
 | 
			
		||||
    name="llava",
 | 
			
		||||
 | 
			
		||||
@ -1315,6 +1315,21 @@ register_model_group(
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
register_model_group(
 | 
			
		||||
    models={
 | 
			
		||||
        "Moonlight-16B-A3B": {
 | 
			
		||||
            DownloadSource.DEFAULT: "moonshotai/Moonlight-16B-A3B",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "moonshotai/Moonlight-16B-A3B",
 | 
			
		||||
        },
 | 
			
		||||
        "Moonlight-16B-A3B-Instruct": {
 | 
			
		||||
            DownloadSource.DEFAULT: "moonshotai/Moonlight-16B-A3B-Instruct",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "moonshotai/Moonlight-16B-A3B-Instruct",
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    template="moonlight",
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
register_model_group(
 | 
			
		||||
    models={
 | 
			
		||||
        "OLMo-1B": {
 | 
			
		||||
@ -2234,6 +2249,18 @@ register_model_group(
 | 
			
		||||
            DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct",
 | 
			
		||||
        },
 | 
			
		||||
        "Qwen2.5-VL-3B-Instruct-AWQ": {
 | 
			
		||||
            DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-3B-Instruct-AWQ",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-3B-Instruct-AWQ",
 | 
			
		||||
        },
 | 
			
		||||
        "Qwen2.5-VL-7B-Instruct-AWQ": {
 | 
			
		||||
            DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-7B-Instruct-AWQ",
 | 
			
		||||
        },
 | 
			
		||||
        "Qwen2.5-VL-72B-Instruct-AWQ": {
 | 
			
		||||
            DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct-AWQ",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct-AWQ",
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    template="qwen2_vl",
 | 
			
		||||
    multimodal=True,
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user