[model] add qwen2vl 32b & upgrade peft (#7469)

* add qwen2vl 32b

* fix ci

* upgrade peft to 0.15

* fix ci

* fix ci
This commit is contained in:
hoshi-hiyouga 2025-03-25 12:15:58 +08:00 committed by GitHub
parent b6d8749bf3
commit 59e12bffe8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 29 additions and 26 deletions

View File

@ -12,7 +12,7 @@ body:
attributes:
value: |
Please do not create issues that are not related to framework bugs under this category, use **[Discussions](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)** instead.
请勿在此分类下创建和框架 bug 无关的 issues请使用 **[讨论区](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)**。
请勿在此分类下创建和框架 bug 无关的 issues训练问题求助请使用 **[讨论区](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)**。
- type: checkboxes
id: reminder

View File

@ -59,6 +59,7 @@ jobs:
python -m pip install ".[torch,dev]"
- name: Cache files
id: hf-hub-cache
uses: actions/cache@v4
with:
path: ${{ runner.temp }}/huggingface
@ -77,3 +78,4 @@ jobs:
make test
env:
HF_HOME: ${{ runner.temp }}/huggingface
HF_HUB_OFFLINE: "${{ steps.hf-hub-cache.outputs.cache-hit == 'true' && '1' || '0' }}"

View File

@ -261,7 +261,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/72B | qwen2_vl |
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
@ -399,11 +399,11 @@ huggingface-cli login
| Mandatory | Minimum | Recommend |
| ------------ | ------- | --------- |
| python | 3.9 | 3.10 |
| torch | 1.13.1 | 2.5.1 |
| transformers | 4.41.2 | 4.49.0 |
| torch | 1.13.1 | 2.6.0 |
| transformers | 4.41.2 | 4.50.0 |
| datasets | 2.16.0 | 3.2.0 |
| accelerate | 0.34.0 | 1.2.1 |
| peft | 0.11.1 | 0.12.0 |
| peft | 0.14.0 | 0.15.0 |
| trl | 0.8.6 | 0.9.6 |
| Optional | Minimum | Recommend |

View File

@ -263,7 +263,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/72B | qwen2_vl |
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
@ -401,11 +401,11 @@ huggingface-cli login
| 必需项 | 至少 | 推荐 |
| ------------ | ------- | --------- |
| python | 3.9 | 3.10 |
| torch | 1.13.1 | 2.5.1 |
| transformers | 4.41.2 | 4.49.0 |
| torch | 1.13.1 | 2.6.0 |
| transformers | 4.41.2 | 4.50.0 |
| datasets | 2.16.0 | 3.2.0 |
| accelerate | 0.34.0 | 1.2.1 |
| peft | 0.11.1 | 0.12.0 |
| peft | 0.14.0 | 0.15.0 |
| trl | 0.8.6 | 0.9.6 |
| 可选项 | 至少 | 推荐 |

View File

@ -1,9 +1,9 @@
transformers>=4.41.2,<=4.50.0,!=4.46.*,!=4.47.*,!=4.48.*;python_version<'3.10' and sys_platform != 'darwin'
transformers>=4.41.2,<=4.50.0,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10' and sys_platform != 'darwin'
transformers>=4.41.2,<=4.49.0,!=4.46.*,!=4.47.*,!=4.48.*;sys_platform == 'darwin'
datasets>=2.16.0,<=3.3.2
accelerate>=0.34.0,<=1.4.0
peft>=0.11.1,<=0.15.0
datasets>=2.16.0,<=3.4.1
accelerate>=0.34.0,<=1.5.2
peft>=0.14.0,<=0.15.0
trl>=0.8.6,<=0.9.6
tokenizers>=0.19.0,<=0.21.0
gradio>=4.38.0,<=5.21.0

View File

@ -20,9 +20,9 @@ Level:
Dependency graph:
main:
transformers>=4.41.2,<=4.50.0,!=4.46.*,!=4.47.*,!=4.48.0
datasets>=2.16.0,<=3.3.2
accelerate>=0.34.0,<=1.4.0
peft>=0.11.1,<=0.12.0
datasets>=2.16.0,<=3.4.1
accelerate>=0.34.0,<=1.5.2
peft>=0.14.0,<=0.15.0
trl>=0.8.6,<=0.9.6
attention:
transformers>=4.42.4 (gemma+fa2)

View File

@ -2346,6 +2346,10 @@ register_model_group(
DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-7B-Instruct",
DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-7B-Instruct",
},
"Qwen2.5-VL-32B-Instruct": {
DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-32B-Instruct",
DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-32B-Instruct",
},
"Qwen2.5-VL-72B-Instruct": {
DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct",
DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct",

View File

@ -89,9 +89,9 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
def check_dependencies() -> None:
r"""Check the version of the required packages."""
check_version("transformers>=4.41.2,<=4.50.0,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
check_version("datasets>=2.16.0,<=3.3.2")
check_version("accelerate>=0.34.0,<=1.4.0")
check_version("peft>=0.11.1,<=0.15.0")
check_version("datasets>=2.16.0,<=3.4.1")
check_version("accelerate>=0.34.0,<=1.5.2")
check_version("peft>=0.14.0,<=0.15.0")
check_version("trl>=0.8.6,<=0.9.6")
if is_transformers_version_greater_than("4.46.0") and not is_transformers_version_greater_than("4.48.1"):
logger.warning_rank0_once("There are known bugs in transformers v4.46.0-v4.48.0, please use other versions.")

View File

@ -161,13 +161,12 @@ class PissaConvertCallback(TrainerCallback):
model.save_pretrained(pissa_backup_dir, safe_serialization=args.save_safetensors)
setattr(model.peft_config["default"], "init_lora_weights", init_lora_weights)
model.save_pretrained(
pissa_convert_dir, safe_serialization=args.save_safetensors, convert_pissa_to_lora=pissa_init_dir
) # TODO: use `path_initial_model_for_weight_conversion` (peft>=0.12.0)
pissa_convert_dir,
safe_serialization=args.save_safetensors,
path_initial_model_for_weight_conversion=pissa_init_dir,
)
model.load_adapter(pissa_backup_dir, "default", is_trainable=True)
model.set_adapter("default")
if "pissa_init" in model.peft_config.keys(): # backward compatibility (peft<0.12.0)
model.delete_adapter("pissa_init")
setattr(model.peft_config["default"], "init_lora_weights", init_lora_weights)

View File

@ -48,8 +48,6 @@ INFER_ARGS = {
"infer_dtype": "float16",
}
OS_NAME = os.getenv("OS_NAME", "")
@pytest.mark.xfail(reason="PiSSA initialization is not stable in different platform.")
def test_pissa_train():
@ -58,7 +56,7 @@ def test_pissa_train():
compare_model(model, ref_model)
@pytest.mark.xfail(OS_NAME.startswith("windows"), reason="Known connection error on Windows.")
@pytest.mark.xfail(reason="Known connection error.")
def test_pissa_inference():
model = load_infer_model(**INFER_ARGS)
ref_model = load_reference_model(TINY_LLAMA_PISSA, TINY_LLAMA_PISSA, use_pissa=True, is_trainable=False)