From bd7bc31c797dcbfe83e333e0e29a4091e10e58b7 Mon Sep 17 00:00:00 2001
From: Changrui Chen <geoffreychen777@gmail.com>
Date: Mon, 21 Apr 2025 16:19:36 +0100
Subject: [PATCH] [data] Fix wrong position ids with packed attention masks
 (#7754)

Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
---
 src/llamafactory/data/collator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/data/collator.py b/src/llamafactory/data/collator.py
index db0562d9..3fb08f4b 100644
--- a/src/llamafactory/data/collator.py
+++ b/src/llamafactory/data/collator.py
@@ -176,7 +176,7 @@ class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
                 "input_ids": features["input_ids"],
                 "image_grid_thw": mm_inputs.get("image_grid_thw"),
                 "video_grid_thw": mm_inputs.get("video_grid_thw"),
-                "attention_mask": features["attention_mask"],
+                "attention_mask": (features["attention_mask"] >= 1).float(),
             }
             if "second_per_grid_ts" in mm_inputs:  # for qwen2vl
                 rope_index_kwargs["second_per_grid_ts"] = mm_inputs.get("second_per_grid_ts")