mask_history args verify valid

Former-commit-id: 2f8388b4f4195d934400ad9267d72e10ca4105a3
2025-10-16 16:48:11 +08:00 · 2024-08-08 10:12:01 +08:00 · 2024-08-08 10:12:01 +08:00 · 6ec64a7e56
commit 6ec64a7e56
parent d71446e387
2 changed files with 5 additions and 0 deletions
--- a/src/llamafactory/data/loader.py
+++ b/src/llamafactory/data/loader.py
@ -206,6 +206,8 @@ def get_dataset(
    template = get_template_and_fix_tokenizer(tokenizer, data_args.template, data_args.tool_format)
    if data_args.train_on_prompt and template.efficient_eos:
        raise ValueError("Current template does not support `train_on_prompt`.")
    if stage!="sft" and data_args.mask_history:
        raise ValueError("`Train on the last turn only` is only valid for sft training.")
    # Load tokenized dataset
    if data_args.tokenized_path is not None:
--- a/src/llamafactory/hparams/data_args.py
+++ b/src/llamafactory/hparams/data_args.py
@ -141,3 +141,6 @@ class DataArguments:
        if self.streaming and self.max_samples is not None:
            raise ValueError("`max_samples` is incompatible with `streaming`.")
        if self.mask_history and self.train_on_prompt:
            raise ValueError("`Train on the last turn only` does not support `train_on_prompt`.")