[v1] Add FlashAttention selection and implement normal / padding-free / dynamic batching (#10469)

2026-06-17 12:48:55 +08:00 · 2026-05-21 17:14:19 +08:00
parent 7e20db5735
commit bdcb92d035
23 changed files with 507 additions and 105 deletions
--- a/src/llamafactory/v1/config/init.py
+++ b/src/llamafactory/v1/config/init.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from ..utils.types import AttentionFunction
 from .arg_parser import InputArgument, get_args
 from .arg_utils import BatchingStrategy, ModelClass, SampleBackend
 from .data_args import DataArguments
@@ -21,6 +22,7 @@ from .training_args import TrainingArguments


 __all__ = [
+    "AttentionFunction",
    "BatchingStrategy",
    "DataArguments",
    "InputArgument",