mirror of
				https://github.com/hiyouga/LLaMA-Factory.git
				synced 2025-11-04 18:02:19 +08:00 
			
		
		
		
	[model] add qwen3vl 4b + 8b (#9275)
This commit is contained in:
		
							parent
							
								
									c867e28093
								
							
						
					
					
						commit
						1037f63311
					
				@ -1,6 +1,6 @@
 | 
				
			|||||||
# core deps
 | 
					# core deps
 | 
				
			||||||
transformers>=4.49.0,<=4.56.2,!=4.52.0; python_version < '3.10'
 | 
					transformers>=4.49.0,<=4.56.2,!=4.52.0; python_version < '3.10'
 | 
				
			||||||
transformers>=4.49.0,<=4.57.0,!=4.52.0; python_version >= '3.10'
 | 
					transformers>=4.49.0,<=4.57.1,!=4.52.0; python_version >= '3.10'
 | 
				
			||||||
datasets>=2.16.0,<=4.0.0
 | 
					datasets>=2.16.0,<=4.0.0
 | 
				
			||||||
accelerate>=1.3.0,<=1.11.0
 | 
					accelerate>=1.3.0,<=1.11.0
 | 
				
			||||||
peft>=0.14.0,<=0.17.1
 | 
					peft>=0.14.0,<=0.17.1
 | 
				
			||||||
 | 
				
			|||||||
@ -3193,14 +3193,22 @@ register_model_group(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
register_model_group(
 | 
					register_model_group(
 | 
				
			||||||
    models={
 | 
					    models={
 | 
				
			||||||
        "Qwen3-VL-235B-A22B-Instruct": {
 | 
					        "Qwen3-VL-4B-Instruct": {
 | 
				
			||||||
            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Instruct",
 | 
					            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-4B-Instruct",
 | 
				
			||||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Instruct",
 | 
					            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-4B-Instruct",
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        "Qwen3-VL-8B-Instruct": {
 | 
				
			||||||
 | 
					            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-8B-Instruct",
 | 
				
			||||||
 | 
					            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-8B-Instruct",
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        "Qwen3-VL-30B-A3B-Instruct": {
 | 
					        "Qwen3-VL-30B-A3B-Instruct": {
 | 
				
			||||||
            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-30B-A3B-Instruct",
 | 
					            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-30B-A3B-Instruct",
 | 
				
			||||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-30B-A3B-Instruct",
 | 
					            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-30B-A3B-Instruct",
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
 | 
					        "Qwen3-VL-235B-A22B-Instruct": {
 | 
				
			||||||
 | 
					            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Instruct",
 | 
				
			||||||
 | 
					            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Instruct",
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
    template="qwen3_vl_nothink",
 | 
					    template="qwen3_vl_nothink",
 | 
				
			||||||
    multimodal=True,
 | 
					    multimodal=True,
 | 
				
			||||||
@ -3209,14 +3217,22 @@ register_model_group(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
register_model_group(
 | 
					register_model_group(
 | 
				
			||||||
    models={
 | 
					    models={
 | 
				
			||||||
        "Qwen3-VL-235B-A22B-Thinking": {
 | 
					        "Qwen3-VL-4B-Thinking": {
 | 
				
			||||||
            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Thinking",
 | 
					            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-4B-Thinking",
 | 
				
			||||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Thinking",
 | 
					            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-4B-Thinking",
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        "Qwen3-VL-8B-Thinking": {
 | 
				
			||||||
 | 
					            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-8B-Thinking",
 | 
				
			||||||
 | 
					            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-8B-Thinking",
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        "Qwen3-VL-30B-A3B-Thinking": {
 | 
					        "Qwen3-VL-30B-A3B-Thinking": {
 | 
				
			||||||
            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-30B-A3B-Thinking",
 | 
					            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-30B-A3B-Thinking",
 | 
				
			||||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-30B-A3B-Thinking",
 | 
					            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-30B-A3B-Thinking",
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
 | 
					        "Qwen3-VL-235B-A22B-Thinking": {
 | 
				
			||||||
 | 
					            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Thinking",
 | 
				
			||||||
 | 
					            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Thinking",
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
    template="qwen3_vl",
 | 
					    template="qwen3_vl",
 | 
				
			||||||
    multimodal=True,
 | 
					    multimodal=True,
 | 
				
			||||||
 | 
				
			|||||||
@ -94,7 +94,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def check_dependencies() -> None:
 | 
					def check_dependencies() -> None:
 | 
				
			||||||
    r"""Check the version of the required packages."""
 | 
					    r"""Check the version of the required packages."""
 | 
				
			||||||
    check_version("transformers>=4.49.0,<=4.57.0")
 | 
					    check_version("transformers>=4.49.0,<=4.57.1")
 | 
				
			||||||
    check_version("datasets>=2.16.0,<=4.0.0")
 | 
					    check_version("datasets>=2.16.0,<=4.0.0")
 | 
				
			||||||
    check_version("accelerate>=1.3.0,<=1.11.0")
 | 
					    check_version("accelerate>=1.3.0,<=1.11.0")
 | 
				
			||||||
    check_version("peft>=0.14.0,<=0.17.1")
 | 
					    check_version("peft>=0.14.0,<=0.17.1")
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user