[data] Fix wrong position ids with packed attention masks (#7754)

Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-11-08 14:24:47 +08:00 · 2025-04-21 16:19:36 +01:00 · 2025-04-21 16:19:36 +01:00 · 81768df04c
commit 81768df04c
parent 1302ca39f6
1 changed files with 1 additions and 1 deletions
--- a/src/llamafactory/data/collator.py
+++ b/src/llamafactory/data/collator.py
@ -176,7 +176,7 @@ class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
                "input_ids": features["input_ids"],
                "image_grid_thw": mm_inputs.get("image_grid_thw"),
                "video_grid_thw": mm_inputs.get("video_grid_thw"),
-                "attention_mask": features["attention_mask"],
+                "attention_mask": (features["attention_mask"] >= 1).float(),
            }
            if "second_per_grid_ts" in mm_inputs:  # for qwen2vl
                rope_index_kwargs["second_per_grid_ts"] = mm_inputs.get("second_per_grid_ts")