fix gemma2 attention

Former-commit-id: 2f6af73da2
2026-03-07 04:05:58 +08:00 · 2024-07-13 23:33:45 +08:00
parent fb387ae1c3
commit 0b26011181
7 changed files with 53 additions and 26 deletions
--- a/src/llamafactory/data/collator.py
+++ b/src/llamafactory/data/collator.py
@@ -28,11 +28,10 @@ def prepare_4d_attention_mask(attention_mask_with_indices: "torch.Tensor", dtype
    while handles packed sequences and transforms the mask to lower triangular form to prevent future peeking.

    e.g.
-    ```
+    ```python
+    # input
    [[1, 1, 2, 2, 2, 0]]
-    ```
-    ->
-    ```
+    # output
    [
        [
            [