support LongLoRA

Former-commit-id: 0832ed37e7947d699f17375648a52f80752c2b6b
This commit is contained in:
hiyouga
2023-09-27 21:55:50 +08:00
parent 73c48d0463
commit 20130b486c
8 changed files with 313 additions and 329 deletions

View File

@@ -45,7 +45,11 @@ class ModelArguments:
)
flash_attn: Optional[bool] = field(
default=False,
metadata={"help": "Enable flash attention for faster training."}
metadata={"help": "Enable FlashAttention-2 for faster training."}
)
shift_attn: Optional[bool] = field(
default=False,
metadata={"help": "Enable shift short attention (S^2-Attn) proposed by LongLoRA."}
)
checkpoint_dir: Optional[str] = field(
default=None,