implement efficient packing without cross-contamination attention

This commit is contained in:
ancv
2024-06-12 11:56:01 +07:00
parent 972ec9c668
commit b2c367bc61
9 changed files with 287 additions and 8 deletions

View File

@@ -84,6 +84,10 @@ class DataArguments:
"help": "Whether or not to pack the sequences in training. Will automatically enable in pre-training."
},
)
efficient_packing: Optional[bool] = field(
default=None,
metadata={"help": "Whether or not to pack the sequences without cross-contamination attention for efficient training."},
)
tokenized_path: Optional[str] = field(
default=None,
metadata={"help": "Path to save or load the tokenized datasets."},