[v1] Add FlashAttention selection and implement normal / padding-free / dynamic batching (#10469)

2026-06-18 13:18:57 +08:00 · 2026-05-21 17:14:19 +08:00
parent 7e20db5735
commit bdcb92d035
23 changed files with 507 additions and 105 deletions
--- a/src/llamafactory/v1/utils/objects.py
+++ b/src/llamafactory/v1/utils/objects.py
@@ -33,6 +33,10 @@ class StatefulBuffer:
    def size(self) -> int:
        return self._buffer_size

+    @property
+    def samples(self) -> list[ModelInput]:
+        return self._buffer
+
    def put(self, samples: list[ModelInput]) -> None:
        """Add samples to the buffer."""
        num_tokens = sum(len(sample["input_ids"]) for sample in samples)