mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-05-28 19:08:57 +08:00
[train] add qwen35 patch for neat_packing (#10436)
This commit is contained in:
@@ -471,8 +471,8 @@ class SFTDataCollatorWith4DAttentionMask(MultiModalDataCollatorForSeq2Seq):
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
if self.neat_packing and self.attn_implementation == "flash_attention_2":
|
||||
if self.model is not None and getattr(self.model.config, "model_type", None) in ["qwen3_5", "qwen3_5_moe", "gpt_oss"]:
|
||||
raise ValueError("Neat packing is not supported for qwen3_5, qwen3_5_moe, gpt_oss models for now.")
|
||||
if self.model is not None and getattr(self.model.config, "model_type", None) in ["gemma4", "gpt_oss"]:
|
||||
raise ValueError("Neat packing is not supported for gemma4, gpt_oss models for now.")
|
||||
|
||||
@staticmethod
|
||||
def _unpad_packed_features(features: dict[str, Any]) -> None:
|
||||
|
||||
Reference in New Issue
Block a user