mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-16 20:00:36 +08:00
move efficient_packing from data_args to model_args
This commit is contained in:
@@ -97,12 +97,6 @@ class DataArguments:
|
||||
"help": "Whether or not to pack the sequences in training. Will automatically enable in pre-training."
|
||||
},
|
||||
)
|
||||
efficient_packing: Optional[bool] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "Whether or not to pack the sequences without cross-contamination attention for efficient training."
|
||||
},
|
||||
)
|
||||
tool_format: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={"help": "Tool format to use for constructing function calling examples."},
|
||||
|
||||
@@ -109,6 +109,12 @@ class ModelArguments:
|
||||
default=False,
|
||||
metadata={"help": "Enable shift short attention (S^2-Attn) proposed by LongLoRA."},
|
||||
)
|
||||
efficient_packing: Optional[bool] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "Whether or not to pack the sequences without cross-contamination attention for efficient training."
|
||||
},
|
||||
)
|
||||
mixture_of_depths: Optional[Literal["convert", "load"]] = field(
|
||||
default=None,
|
||||
metadata={"help": "Convert the model to mixture-of-depths (MoD) or load the MoD model."},
|
||||
|
||||
@@ -170,6 +170,9 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
|
||||
if finetuning_args.stage == "ppo" and model_args.shift_attn:
|
||||
raise ValueError("PPO training is incompatible with S^2-Attn.")
|
||||
|
||||
if finetuning_args.stage != "sft" and model_args.efficient_packing:
|
||||
raise ValueError("`efficient_packing` cannot be set as True except SFT.")
|
||||
|
||||
if finetuning_args.stage == "ppo" and finetuning_args.reward_model_type == "lora" and model_args.use_unsloth:
|
||||
raise ValueError("Unsloth does not support lora reward model.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user