fix shift short attention

2025-12-16 20:00:36 +08:00 · 2023-10-09 17:07:46 +08:00
parent b8dbec086e
commit ab65c3063b
6 changed files with 46 additions and 52 deletions
--- a/src/llmtuner/tuner/core/parser.py
+++ b/src/llmtuner/tuner/core/parser.py
@@ -149,6 +149,9 @@ def get_train_args(
    if general_args.stage == "ppo" and data_args.streaming:
        raise ValueError("Streaming mode does not suppport PPO training currently.")

+    if general_args.stage == "ppo" and model_args.shift_attn:
+        raise ValueError("PPO training is incompatible with S^2-Attn.")
+
    if training_args.max_steps == -1 and data_args.streaming:
        raise ValueError("Please specify `max_steps` in streaming mode.")