Merge pull request #2411 from lxsyz/main

fix eos_token_id=0 bug

Former-commit-id: 2eaaa9522f458eb9f81eddf130f3d4d04a9f3995
This commit is contained in:
hoshi-hiyouga 2024-02-02 17:38:16 +08:00 committed by GitHub
commit fcf1a49e4d

View File

@ -117,9 +117,9 @@ class Template:
elif isinstance(elem, dict): elif isinstance(elem, dict):
token_ids += [tokenizer.convert_tokens_to_ids(elem.get("token"))] token_ids += [tokenizer.convert_tokens_to_ids(elem.get("token"))]
elif isinstance(elem, set): elif isinstance(elem, set):
if "bos_token" in elem and tokenizer.bos_token_id: if "bos_token" in elem and tokenizer.bos_token_id is not None:
token_ids += [tokenizer.bos_token_id] token_ids += [tokenizer.bos_token_id]
elif "eos_token" in elem and tokenizer.eos_token_id: elif "eos_token" in elem and tokenizer.eos_token_id is not None:
token_ids += [tokenizer.eos_token_id] token_ids += [tokenizer.eos_token_id]
else: else:
raise ValueError("Input must be string, set[str] or dict[str, str], got {}".format(type(elem))) raise ValueError("Input must be string, set[str] or dict[str, str], got {}".format(type(elem)))