From 62c12a133e8cb47a5004c83c58164ff72b7191a1 Mon Sep 17 00:00:00 2001 From: fzc8578 <1428195643@qq.com> Date: Sat, 11 Jan 2025 01:10:24 +0800 Subject: [PATCH] add some Former-commit-id: a650e114e907278ece188922467c2514de544eeb --- src/llamafactory/data/collator.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/llamafactory/data/collator.py b/src/llamafactory/data/collator.py index 1c422ebf..5e599653 100644 --- a/src/llamafactory/data/collator.py +++ b/src/llamafactory/data/collator.py @@ -152,14 +152,11 @@ class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq): features.update(mm_inputs) if isinstance(features.get("pixel_values"), list): # for pixtral inputs features = features.data # use default_collate() instead of BatchEncoding.to() - + if "image_bound" in features: # for minicpmv inputs features["position_ids"] = [torch.arange(input_ids.size(0)).long() for input_ids in features["input_ids"]] features["position_ids"] = pad_sequence(features["position_ids"], batch_first=True, padding_value=0) - features["labels"] = pad_sequence(features["labels"], batch_first=True, padding_value=-100) - features["attention_mask"] = pad_sequence(features["attention_mask"], batch_first=True, padding_value=0) - new_features = {} - new_features.update({"data": features}) + new_features = {"data": features} new_features.update(features) features = new_features