From bff1b945838704a10f47c435242edc9792e17e06 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Tue, 17 Dec 2024 10:26:19 +0000 Subject: [PATCH] generalized packing & fix #6343 Former-commit-id: 2d107d3aefd5af61163056634c8b91fe3cb3e77c --- src/llamafactory/__init__.py | 2 +- src/llamafactory/extras/constants.py | 13 ----- src/llamafactory/model/model_utils/packing.py | 50 +++---------------- src/llamafactory/model/patcher.py | 2 +- 4 files changed, 10 insertions(+), 57 deletions(-) diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index 9bc4a0db..1b0d8ed0 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -30,7 +30,7 @@ Dependency graph: longlora: transformers>=4.41.2,<=4.46.1 packing: - transformers>=4.41.2,<=4.46.1 + transformers>=4.43.0,<=4.46.1 Disable version checking: DISABLE_VERSION_CHECK=1 Enable VRAM recording: RECORD_VRAM=1 diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 39b02cf4..c4d0fd84 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -81,19 +81,6 @@ TRAINING_STAGES = { STAGES_USE_PAIR_DATA = {"rm", "dpo"} -SUPPORTED_CLASS_FOR_BLOCK_DIAG_ATTN = { - "cohere", - "falcon", - "gemma", - "gemma2", - "llama", - "mistral", - "phi", - "phi3", - "qwen2", - "starcoder2", -} - SUPPORTED_CLASS_FOR_S2ATTN = {"llama"} VIDEO_PLACEHOLDER = os.environ.get("VIDEO_PLACEHOLDER", "