fix #6482

2025-12-15 19:30:36 +08:00 · 2024-12-30 05:55:15 +00:00
parent b55890291b
commit 6f5bb3b8e5
7 changed files with 26 additions and 11 deletions
--- a/src/llamafactory/data/collator.py
+++ b/src/llamafactory/data/collator.py
@@ -168,6 +168,10 @@ class SFTDataCollatorWith4DAttentionMask(MultiModalDataCollatorForSeq2Seq):
        if self.block_diag_attn and self.attn_implementation != "flash_attention_2":
            features["attention_mask"] = prepare_4d_attention_mask(features["attention_mask"], self.compute_dtype)

+        for key, value in features.items():  # cast data dtype for paligemma
+            if torch.is_tensor(value) and torch.is_floating_point(value):
+                features[key] = value.to(self.compute_dtype)
+
        return features