From 995491594d613a33bc116cc5011c18f5e1f0027b Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 5 Sep 2024 23:41:16 +0800 Subject: [PATCH] tiny fix Former-commit-id: 76f2e5950483c669a15a961f0554442b6eb5c4a6 --- src/llamafactory/__init__.py | 4 ++-- src/llamafactory/data/collator.py | 4 ++-- src/llamafactory/data/processors/pretrain.py | 2 +- src/llamafactory/model/model_utils/longlora.py | 2 +- src/llamafactory/model/model_utils/packing.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index 18f04b0a..07b6d0ad 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -28,9 +28,9 @@ Dependency graph: attention: transformers>=4.42.4 (gemma+fa2) longlora: - transformers>=4.41.2,<=4.44.3 + transformers>=4.41.2,<=4.45.0 packing: - transformers>=4.41.2,<=4.44.3 + transformers>=4.41.2,<=4.45.0 Disable version checking: DISABLE_VERSION_CHECK=1 Enable VRAM recording: RECORD_VRAM=1 diff --git a/src/llamafactory/data/collator.py b/src/llamafactory/data/collator.py index d86c5c43..92d86cc7 100644 --- a/src/llamafactory/data/collator.py +++ b/src/llamafactory/data/collator.py @@ -81,8 +81,8 @@ class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq): def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, "torch.Tensor"]: batch_images, batch_videos, batch_imglens, batch_vidlens, batch_seqlens = [], [], [], [], [] for feature in features: - images = feature.pop("images") or [] # avoid NoneType - videos = feature.pop("videos") or [] + images = feature.pop("images", None) or [] + videos = feature.pop("videos", None) or [] batch_images.extend(images) batch_videos.extend(videos) batch_imglens.append(len(images)) diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index 77282bad..6d6b98d6 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -36,7 +36,7 @@ def preprocess_pretrain_dataset( if data_args.template == "gemma": text_examples = [tokenizer.bos_token + example for example in text_examples] - result = tokenizer(text_examples, add_special_tokens=False, max_length=data_args.cutoff_len, truncation=True) + result = tokenizer(text_examples, add_special_tokens=False, truncation=True, max_length=data_args.cutoff_len) else: tokenized_examples = tokenizer(text_examples, add_special_tokens=False) concatenated_examples = {k: list(chain(*tokenized_examples[k])) for k in tokenized_examples.keys()} diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py index ef39bcd9..b341653a 100644 --- a/src/llamafactory/model/model_utils/longlora.py +++ b/src/llamafactory/model/model_utils/longlora.py @@ -353,7 +353,7 @@ def llama_sdpa_attention_forward( def _apply_llama_patch() -> None: - require_version("transformers>=4.41.2,<=4.44.3", "To fix: pip install transformers>=4.41.2,<=4.44.3") + require_version("transformers>=4.41.2,<=4.45.0", "To fix: pip install transformers>=4.41.2,<=4.45.0") LlamaAttention.forward = llama_attention_forward LlamaFlashAttention2.forward = llama_flash_attention_2_forward LlamaSdpaAttention.forward = llama_sdpa_attention_forward diff --git a/src/llamafactory/model/model_utils/packing.py b/src/llamafactory/model/model_utils/packing.py index 3d7f2dad..d52731b8 100644 --- a/src/llamafactory/model/model_utils/packing.py +++ b/src/llamafactory/model/model_utils/packing.py @@ -114,7 +114,7 @@ def get_unpad_data(attention_mask: "torch.Tensor") -> Tuple["torch.Tensor", "tor def _patch_for_block_diag_attn(model_type: str) -> None: - require_version("transformers>=4.41.2,<=4.44.3", "To fix: pip install transformers>=4.41.2,<=4.44.3") + require_version("transformers>=4.41.2,<=4.45.0", "To fix: pip install transformers>=4.41.2,<=4.45.0") if is_transformers_version_greater_than_4_43(): import transformers.modeling_flash_attention_utils