mirror of
				https://github.com/hiyouga/LLaMA-Factory.git
				synced 2025-11-04 18:02:19 +08:00 
			
		
		
		
	[model] add qwen2vl 32b & upgrade peft (#7469)
* add qwen2vl 32b * fix ci * upgrade peft to 0.15 * fix ci * fix ci
This commit is contained in:
		
							parent
							
								
									ec6a261568
								
							
						
					
					
						commit
						0583d06676
					
				
							
								
								
									
										2
									
								
								.github/ISSUE_TEMPLATE/1-bug-report.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ISSUE_TEMPLATE/1-bug-report.yml
									
									
									
									
										vendored
									
									
								
							@ -12,7 +12,7 @@ body:
 | 
			
		||||
    attributes:
 | 
			
		||||
      value: |
 | 
			
		||||
        Please do not create issues that are not related to framework bugs under this category, use **[Discussions](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)** instead.
 | 
			
		||||
        请勿在此分类下创建和框架 bug 无关的 issues,请使用 **[讨论区](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)**。
 | 
			
		||||
        请勿在此分类下创建和框架 bug 无关的 issues,训练问题求助请使用 **[讨论区](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)**。
 | 
			
		||||
 | 
			
		||||
  - type: checkboxes
 | 
			
		||||
    id: reminder
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										2
									
								
								.github/workflows/tests.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/tests.yml
									
									
									
									
										vendored
									
									
								
							@ -59,6 +59,7 @@ jobs:
 | 
			
		||||
          python -m pip install ".[torch,dev]"
 | 
			
		||||
 | 
			
		||||
      - name: Cache files
 | 
			
		||||
        id: hf-hub-cache
 | 
			
		||||
        uses: actions/cache@v4
 | 
			
		||||
        with:
 | 
			
		||||
          path: ${{ runner.temp }}/huggingface
 | 
			
		||||
@ -77,3 +78,4 @@ jobs:
 | 
			
		||||
          make test
 | 
			
		||||
        env:
 | 
			
		||||
          HF_HOME: ${{ runner.temp }}/huggingface
 | 
			
		||||
          HF_HUB_OFFLINE: "${{ steps.hf-hub-cache.outputs.cache-hit == 'true' && '1' || '0' }}"
 | 
			
		||||
 | 
			
		||||
@ -261,7 +261,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | 
			
		||||
| [Pixtral](https://huggingface.co/mistralai)                       | 12B                              | pixtral             |
 | 
			
		||||
| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen)   | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                |
 | 
			
		||||
| [Qwen2-Audio](https://huggingface.co/Qwen)                        | 7B                               | qwen2_audio         |
 | 
			
		||||
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen)            | 2B/3B/7B/72B                     | qwen2_vl            |
 | 
			
		||||
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen)            | 2B/3B/7B/32B/72B                 | qwen2_vl            |
 | 
			
		||||
| [Skywork o1](https://huggingface.co/Skywork)                      | 8B                               | skywork_o1          |
 | 
			
		||||
| [StarCoder 2](https://huggingface.co/bigcode)                     | 3B/7B/15B                        | -                   |
 | 
			
		||||
| [TeleChat2](https://huggingface.co/Tele-AI)                       | 3B/7B/35B/115B                   | telechat2           |
 | 
			
		||||
@ -399,11 +399,11 @@ huggingface-cli login
 | 
			
		||||
| Mandatory    | Minimum | Recommend |
 | 
			
		||||
| ------------ | ------- | --------- |
 | 
			
		||||
| python       | 3.9     | 3.10      |
 | 
			
		||||
| torch        | 1.13.1  | 2.5.1     |
 | 
			
		||||
| transformers | 4.41.2  | 4.49.0    |
 | 
			
		||||
| torch        | 1.13.1  | 2.6.0     |
 | 
			
		||||
| transformers | 4.41.2  | 4.50.0    |
 | 
			
		||||
| datasets     | 2.16.0  | 3.2.0     |
 | 
			
		||||
| accelerate   | 0.34.0  | 1.2.1     |
 | 
			
		||||
| peft         | 0.11.1  | 0.12.0    |
 | 
			
		||||
| peft         | 0.14.0  | 0.15.0    |
 | 
			
		||||
| trl          | 0.8.6   | 0.9.6     |
 | 
			
		||||
 | 
			
		||||
| Optional     | Minimum | Recommend |
 | 
			
		||||
 | 
			
		||||
@ -263,7 +263,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | 
			
		||||
| [Pixtral](https://huggingface.co/mistralai)                       | 12B                              | pixtral             |
 | 
			
		||||
| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen)   | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                |
 | 
			
		||||
| [Qwen2-Audio](https://huggingface.co/Qwen)                        | 7B                               | qwen2_audio         |
 | 
			
		||||
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen)            | 2B/3B/7B/72B                     | qwen2_vl            |
 | 
			
		||||
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen)            | 2B/3B/7B/32B/72B                 | qwen2_vl            |
 | 
			
		||||
| [Skywork o1](https://huggingface.co/Skywork)                      | 8B                               | skywork_o1          |
 | 
			
		||||
| [StarCoder 2](https://huggingface.co/bigcode)                     | 3B/7B/15B                        | -                   |
 | 
			
		||||
| [TeleChat2](https://huggingface.co/Tele-AI)                       | 3B/7B/35B/115B                   | telechat2           |
 | 
			
		||||
@ -401,11 +401,11 @@ huggingface-cli login
 | 
			
		||||
| 必需项       | 至少     | 推荐      |
 | 
			
		||||
| ------------ | ------- | --------- |
 | 
			
		||||
| python       | 3.9     | 3.10      |
 | 
			
		||||
| torch        | 1.13.1  | 2.5.1     |
 | 
			
		||||
| transformers | 4.41.2  | 4.49.0    |
 | 
			
		||||
| torch        | 1.13.1  | 2.6.0     |
 | 
			
		||||
| transformers | 4.41.2  | 4.50.0    |
 | 
			
		||||
| datasets     | 2.16.0  | 3.2.0     |
 | 
			
		||||
| accelerate   | 0.34.0  | 1.2.1     |
 | 
			
		||||
| peft         | 0.11.1  | 0.12.0    |
 | 
			
		||||
| peft         | 0.14.0  | 0.15.0    |
 | 
			
		||||
| trl          | 0.8.6   | 0.9.6     |
 | 
			
		||||
 | 
			
		||||
| 可选项       | 至少     | 推荐      |
 | 
			
		||||
 | 
			
		||||
@ -1,9 +1,9 @@
 | 
			
		||||
transformers>=4.41.2,<=4.50.0,!=4.46.*,!=4.47.*,!=4.48.*;python_version<'3.10' and sys_platform != 'darwin'
 | 
			
		||||
transformers>=4.41.2,<=4.50.0,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10' and sys_platform != 'darwin'
 | 
			
		||||
transformers>=4.41.2,<=4.49.0,!=4.46.*,!=4.47.*,!=4.48.*;sys_platform == 'darwin'
 | 
			
		||||
datasets>=2.16.0,<=3.3.2
 | 
			
		||||
accelerate>=0.34.0,<=1.4.0
 | 
			
		||||
peft>=0.11.1,<=0.15.0
 | 
			
		||||
datasets>=2.16.0,<=3.4.1
 | 
			
		||||
accelerate>=0.34.0,<=1.5.2
 | 
			
		||||
peft>=0.14.0,<=0.15.0
 | 
			
		||||
trl>=0.8.6,<=0.9.6
 | 
			
		||||
tokenizers>=0.19.0,<=0.21.0
 | 
			
		||||
gradio>=4.38.0,<=5.21.0
 | 
			
		||||
 | 
			
		||||
@ -20,9 +20,9 @@ Level:
 | 
			
		||||
Dependency graph:
 | 
			
		||||
  main:
 | 
			
		||||
    transformers>=4.41.2,<=4.50.0,!=4.46.*,!=4.47.*,!=4.48.0
 | 
			
		||||
    datasets>=2.16.0,<=3.3.2
 | 
			
		||||
    accelerate>=0.34.0,<=1.4.0
 | 
			
		||||
    peft>=0.11.1,<=0.12.0
 | 
			
		||||
    datasets>=2.16.0,<=3.4.1
 | 
			
		||||
    accelerate>=0.34.0,<=1.5.2
 | 
			
		||||
    peft>=0.14.0,<=0.15.0
 | 
			
		||||
    trl>=0.8.6,<=0.9.6
 | 
			
		||||
  attention:
 | 
			
		||||
    transformers>=4.42.4 (gemma+fa2)
 | 
			
		||||
 | 
			
		||||
@ -2346,6 +2346,10 @@ register_model_group(
 | 
			
		||||
            DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-7B-Instruct",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-7B-Instruct",
 | 
			
		||||
        },
 | 
			
		||||
        "Qwen2.5-VL-32B-Instruct": {
 | 
			
		||||
            DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-32B-Instruct",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-32B-Instruct",
 | 
			
		||||
        },
 | 
			
		||||
        "Qwen2.5-VL-72B-Instruct": {
 | 
			
		||||
            DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct",
 | 
			
		||||
            DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct",
 | 
			
		||||
 | 
			
		||||
@ -89,9 +89,9 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
 | 
			
		||||
def check_dependencies() -> None:
 | 
			
		||||
    r"""Check the version of the required packages."""
 | 
			
		||||
    check_version("transformers>=4.41.2,<=4.50.0,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
 | 
			
		||||
    check_version("datasets>=2.16.0,<=3.3.2")
 | 
			
		||||
    check_version("accelerate>=0.34.0,<=1.4.0")
 | 
			
		||||
    check_version("peft>=0.11.1,<=0.15.0")
 | 
			
		||||
    check_version("datasets>=2.16.0,<=3.4.1")
 | 
			
		||||
    check_version("accelerate>=0.34.0,<=1.5.2")
 | 
			
		||||
    check_version("peft>=0.14.0,<=0.15.0")
 | 
			
		||||
    check_version("trl>=0.8.6,<=0.9.6")
 | 
			
		||||
    if is_transformers_version_greater_than("4.46.0") and not is_transformers_version_greater_than("4.48.1"):
 | 
			
		||||
        logger.warning_rank0_once("There are known bugs in transformers v4.46.0-v4.48.0, please use other versions.")
 | 
			
		||||
 | 
			
		||||
@ -161,13 +161,12 @@ class PissaConvertCallback(TrainerCallback):
 | 
			
		||||
                model.save_pretrained(pissa_backup_dir, safe_serialization=args.save_safetensors)
 | 
			
		||||
                setattr(model.peft_config["default"], "init_lora_weights", init_lora_weights)
 | 
			
		||||
                model.save_pretrained(
 | 
			
		||||
                    pissa_convert_dir, safe_serialization=args.save_safetensors, convert_pissa_to_lora=pissa_init_dir
 | 
			
		||||
                )  # TODO: use `path_initial_model_for_weight_conversion` (peft>=0.12.0)
 | 
			
		||||
                    pissa_convert_dir,
 | 
			
		||||
                    safe_serialization=args.save_safetensors,
 | 
			
		||||
                    path_initial_model_for_weight_conversion=pissa_init_dir,
 | 
			
		||||
                )
 | 
			
		||||
                model.load_adapter(pissa_backup_dir, "default", is_trainable=True)
 | 
			
		||||
                model.set_adapter("default")
 | 
			
		||||
                if "pissa_init" in model.peft_config.keys():  # backward compatibility (peft<0.12.0)
 | 
			
		||||
                    model.delete_adapter("pissa_init")
 | 
			
		||||
 | 
			
		||||
                setattr(model.peft_config["default"], "init_lora_weights", init_lora_weights)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -48,8 +48,6 @@ INFER_ARGS = {
 | 
			
		||||
    "infer_dtype": "float16",
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
OS_NAME = os.getenv("OS_NAME", "")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.xfail(reason="PiSSA initialization is not stable in different platform.")
 | 
			
		||||
def test_pissa_train():
 | 
			
		||||
@ -58,7 +56,7 @@ def test_pissa_train():
 | 
			
		||||
    compare_model(model, ref_model)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.xfail(OS_NAME.startswith("windows"), reason="Known connection error on Windows.")
 | 
			
		||||
@pytest.mark.xfail(reason="Known connection error.")
 | 
			
		||||
def test_pissa_inference():
 | 
			
		||||
    model = load_infer_model(**INFER_ARGS)
 | 
			
		||||
    ref_model = load_reference_model(TINY_LLAMA_PISSA, TINY_LLAMA_PISSA, use_pissa=True, is_trainable=False)
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user