Update pretrain.py

Former-commit-id: 0c292332374fb96c3fc753abde42d070a0c1dca8
2025-08-23 22:32:54 +08:00 · 2024-06-11 17:02:14 +08:00 · 2024-06-11 17:02:14 +08:00 · bf3de9bfe8
commit bf3de9bfe8
parent da39715085
1 changed files with 1 additions and 1 deletions
--- a/src/llamafactory/data/processors/pretrain.py
+++ b/src/llamafactory/data/processors/pretrain.py
@ -12,7 +12,7 @@ def preprocess_pretrain_dataset(
    examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments"
 ) -> Dict[str, List[List[int]]]:
    # build grouped texts with format `X1 X2 X3 ...` if packing is enabled
-    eos_token = '<|end_of_text|>' if data_args.template == 'llama3' else  tokenizer.eos_token
+    eos_token = "<|end_of_text|>" if data_args.template == "llama3" else tokenizer.eos_token
    text_examples = [messages[0]["content"] + eos_token for messages in examples["prompt"]]
    if not data_args.packing: