fix tokenizer

Former-commit-id: 32fa5e8d706050a30a3eb49f9a6bc2591f9c21ea
This commit is contained in:
hoshi-hiyouga 2023-08-09 00:54:54 +08:00 committed by GitHub
parent 4f714ba314
commit a37e1c11c9

View File

@ -67,12 +67,12 @@ class Template:
self, self,
tokenizer: "PreTrainedTokenizer" tokenizer: "PreTrainedTokenizer"
) -> Tuple[List[int], List[int]]: ) -> Tuple[List[int], List[int]]:
if tokenizer.bos_token_id and getattr(tokenizer, "add_bos_token", True): if tokenizer.bos_token_id:
bos_ids = [tokenizer.bos_token_id] bos_ids = [tokenizer.bos_token_id]
else: else:
bos_ids = [] # bos token is optional bos_ids = [] # bos token is optional
if tokenizer.eos_token_id and getattr(tokenizer, "add_eos_token", True): if tokenizer.eos_token_id:
eos_ids = [tokenizer.eos_token_id] eos_ids = [tokenizer.eos_token_id]
else: else:
raise ValueError("EOS token is required.") raise ValueError("EOS token is required.")