Added Mixture of Depths

This commit is contained in:
Marco
2024-04-18 20:31:24 +02:00
parent 2aaaede247
commit 620add7b9f
10 changed files with 103 additions and 6 deletions

View File

@@ -69,6 +69,10 @@ class ModelArguments:
default=False,
metadata={"help": "Enable shift short attention (S^2-Attn) proposed by LongLoRA."},
)
mixture_of_depths: Optional[Literal["convert", "continue"]] = field(
default=None,
metadata={"help": "Whether or not to use MoD in the model."},
)
use_unsloth: bool = field(
default=False,
metadata={"help": "Whether or not to use unsloth's optimization for the LoRA training."},