fix eos_token_id=0 bug

when eos_token_id=0, will never add eos_token

Former-commit-id: 3399c0d6459e797946daf966e32134a12f8e48f3
This commit is contained in:
Fallen Angel 2024-02-02 17:34:48 +08:00 committed by GitHub
parent c3dd4924cc
commit b4c776fa93

View File

@ -117,9 +117,9 @@ class Template:
elif isinstance(elem, dict): elif isinstance(elem, dict):
token_ids += [tokenizer.convert_tokens_to_ids(elem.get("token"))] token_ids += [tokenizer.convert_tokens_to_ids(elem.get("token"))]
elif isinstance(elem, set): elif isinstance(elem, set):
if "bos_token" in elem and tokenizer.bos_token_id: if "bos_token" in elem and tokenizer.bos_token_id is not None:
token_ids += [tokenizer.bos_token_id] token_ids += [tokenizer.bos_token_id]
elif "eos_token" in elem and tokenizer.eos_token_id: elif "eos_token" in elem and tokenizer.eos_token_id is not None:
token_ids += [tokenizer.eos_token_id] token_ids += [tokenizer.eos_token_id]
else: else:
raise ValueError("Input must be string, set[str] or dict[str, str], got {}".format(type(elem))) raise ValueError("Input must be string, set[str] or dict[str, str], got {}".format(type(elem)))