diff --git a/.github/ISSUE_TEMPLATE/1-bug-report.yml b/.github/ISSUE_TEMPLATE/1-bug-report.yml index 4645bac9..a08596fa 100644 --- a/.github/ISSUE_TEMPLATE/1-bug-report.yml +++ b/.github/ISSUE_TEMPLATE/1-bug-report.yml @@ -12,7 +12,7 @@ body: attributes: value: | Please do not create issues that are not related to framework bugs under this category, use **[Discussions](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)** instead. - 请勿在此分类下创建和框架 bug 无关的 issues,请使用 **[讨论区](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)**。 + 请勿在此分类下创建和框架 bug 无关的 issues,训练问题求助请使用 **[讨论区](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)**。 - type: checkboxes id: reminder diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 37848158..158fddf3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -59,6 +59,7 @@ jobs: python -m pip install ".[torch,dev]" - name: Cache files + id: hf-hub-cache uses: actions/cache@v4 with: path: ${{ runner.temp }}/huggingface @@ -77,3 +78,4 @@ jobs: make test env: HF_HOME: ${{ runner.temp }}/huggingface + HF_HUB_OFFLINE: "${{ steps.hf-hub-cache.outputs.cache-hit == 'true' && '1' || '0' }}" diff --git a/README.md b/README.md index 4b20cbc0..5b34b915 100644 --- a/README.md +++ b/README.md @@ -261,7 +261,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | -| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/72B | qwen2_vl | +| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 | @@ -399,11 +399,11 @@ huggingface-cli login | Mandatory | Minimum | Recommend | | ------------ | ------- | --------- | | python | 3.9 | 3.10 | -| torch | 1.13.1 | 2.5.1 | -| transformers | 4.41.2 | 4.49.0 | +| torch | 1.13.1 | 2.6.0 | +| transformers | 4.41.2 | 4.50.0 | | datasets | 2.16.0 | 3.2.0 | | accelerate | 0.34.0 | 1.2.1 | -| peft | 0.11.1 | 0.12.0 | +| peft | 0.14.0 | 0.15.0 | | trl | 0.8.6 | 0.9.6 | | Optional | Minimum | Recommend | diff --git a/README_zh.md b/README_zh.md index ed67e804..344ac488 100644 --- a/README_zh.md +++ b/README_zh.md @@ -263,7 +263,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc | [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral | | [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen | | [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio | -| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/72B | qwen2_vl | +| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl | | [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 | | [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | | [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 | @@ -401,11 +401,11 @@ huggingface-cli login | 必需项 | 至少 | 推荐 | | ------------ | ------- | --------- | | python | 3.9 | 3.10 | -| torch | 1.13.1 | 2.5.1 | -| transformers | 4.41.2 | 4.49.0 | +| torch | 1.13.1 | 2.6.0 | +| transformers | 4.41.2 | 4.50.0 | | datasets | 2.16.0 | 3.2.0 | | accelerate | 0.34.0 | 1.2.1 | -| peft | 0.11.1 | 0.12.0 | +| peft | 0.14.0 | 0.15.0 | | trl | 0.8.6 | 0.9.6 | | 可选项 | 至少 | 推荐 | diff --git a/requirements.txt b/requirements.txt index 204e7534..df0af999 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ transformers>=4.41.2,<=4.50.0,!=4.46.*,!=4.47.*,!=4.48.*;python_version<'3.10' and sys_platform != 'darwin' transformers>=4.41.2,<=4.50.0,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10' and sys_platform != 'darwin' transformers>=4.41.2,<=4.49.0,!=4.46.*,!=4.47.*,!=4.48.*;sys_platform == 'darwin' -datasets>=2.16.0,<=3.3.2 -accelerate>=0.34.0,<=1.4.0 -peft>=0.11.1,<=0.15.0 +datasets>=2.16.0,<=3.4.1 +accelerate>=0.34.0,<=1.5.2 +peft>=0.14.0,<=0.15.0 trl>=0.8.6,<=0.9.6 tokenizers>=0.19.0,<=0.21.0 gradio>=4.38.0,<=5.21.0 diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index b23f3120..daf58b97 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -20,9 +20,9 @@ Level: Dependency graph: main: transformers>=4.41.2,<=4.50.0,!=4.46.*,!=4.47.*,!=4.48.0 - datasets>=2.16.0,<=3.3.2 - accelerate>=0.34.0,<=1.4.0 - peft>=0.11.1,<=0.12.0 + datasets>=2.16.0,<=3.4.1 + accelerate>=0.34.0,<=1.5.2 + peft>=0.14.0,<=0.15.0 trl>=0.8.6,<=0.9.6 attention: transformers>=4.42.4 (gemma+fa2) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index ee2b9e66..d0cd4891 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -2346,6 +2346,10 @@ register_model_group( DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-7B-Instruct", DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-7B-Instruct", }, + "Qwen2.5-VL-32B-Instruct": { + DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-32B-Instruct", + DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-32B-Instruct", + }, "Qwen2.5-VL-72B-Instruct": { DownloadSource.DEFAULT: "Qwen/Qwen2.5-VL-72B-Instruct", DownloadSource.MODELSCOPE: "Qwen/Qwen2.5-VL-72B-Instruct", diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index f5156d08..b87829ea 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -89,9 +89,9 @@ def check_version(requirement: str, mandatory: bool = False) -> None: def check_dependencies() -> None: r"""Check the version of the required packages.""" check_version("transformers>=4.41.2,<=4.50.0,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0") - check_version("datasets>=2.16.0,<=3.3.2") - check_version("accelerate>=0.34.0,<=1.4.0") - check_version("peft>=0.11.1,<=0.15.0") + check_version("datasets>=2.16.0,<=3.4.1") + check_version("accelerate>=0.34.0,<=1.5.2") + check_version("peft>=0.14.0,<=0.15.0") check_version("trl>=0.8.6,<=0.9.6") if is_transformers_version_greater_than("4.46.0") and not is_transformers_version_greater_than("4.48.1"): logger.warning_rank0_once("There are known bugs in transformers v4.46.0-v4.48.0, please use other versions.") diff --git a/src/llamafactory/train/callbacks.py b/src/llamafactory/train/callbacks.py index d6d8ecf7..02c1cf2d 100644 --- a/src/llamafactory/train/callbacks.py +++ b/src/llamafactory/train/callbacks.py @@ -161,13 +161,12 @@ class PissaConvertCallback(TrainerCallback): model.save_pretrained(pissa_backup_dir, safe_serialization=args.save_safetensors) setattr(model.peft_config["default"], "init_lora_weights", init_lora_weights) model.save_pretrained( - pissa_convert_dir, safe_serialization=args.save_safetensors, convert_pissa_to_lora=pissa_init_dir - ) # TODO: use `path_initial_model_for_weight_conversion` (peft>=0.12.0) + pissa_convert_dir, + safe_serialization=args.save_safetensors, + path_initial_model_for_weight_conversion=pissa_init_dir, + ) model.load_adapter(pissa_backup_dir, "default", is_trainable=True) model.set_adapter("default") - if "pissa_init" in model.peft_config.keys(): # backward compatibility (peft<0.12.0) - model.delete_adapter("pissa_init") - setattr(model.peft_config["default"], "init_lora_weights", init_lora_weights) diff --git a/tests/model/test_pissa.py b/tests/model/test_pissa.py index 875a3bf4..08863a07 100644 --- a/tests/model/test_pissa.py +++ b/tests/model/test_pissa.py @@ -48,8 +48,6 @@ INFER_ARGS = { "infer_dtype": "float16", } -OS_NAME = os.getenv("OS_NAME", "") - @pytest.mark.xfail(reason="PiSSA initialization is not stable in different platform.") def test_pissa_train(): @@ -58,7 +56,7 @@ def test_pissa_train(): compare_model(model, ref_model) -@pytest.mark.xfail(OS_NAME.startswith("windows"), reason="Known connection error on Windows.") +@pytest.mark.xfail(reason="Known connection error.") def test_pissa_inference(): model = load_infer_model(**INFER_ARGS) ref_model = load_reference_model(TINY_LLAMA_PISSA, TINY_LLAMA_PISSA, use_pissa=True, is_trainable=False)