From bf3de9bfe851916a7cfae9c267a2a61b276951f2 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Tue, 11 Jun 2024 17:02:14 +0800
Subject: [PATCH] Update pretrain.py

Former-commit-id: 0c292332374fb96c3fc753abde42d070a0c1dca8
---
 src/llamafactory/data/processors/pretrain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py
index 4050f74c..832c987e 100644
--- a/src/llamafactory/data/processors/pretrain.py
+++ b/src/llamafactory/data/processors/pretrain.py
@@ -12,7 +12,7 @@ def preprocess_pretrain_dataset(
     examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments"
 ) -> Dict[str, List[List[int]]]:
     # build grouped texts with format `X1 X2 X3 ...` if packing is enabled
-    eos_token = '<|end_of_text|>' if data_args.template == 'llama3' else  tokenizer.eos_token
+    eos_token = "<|end_of_text|>" if data_args.template == "llama3" else tokenizer.eos_token
     text_examples = [messages[0]["content"] + eos_token for messages in examples["prompt"]]
 
     if not data_args.packing: