From b4c776fa934a9a7985dfd9cdb5953177646fe2a6 Mon Sep 17 00:00:00 2001 From: Fallen Angel Date: Fri, 2 Feb 2024 17:34:48 +0800 Subject: [PATCH] fix eos_token_id=0 bug when eos_token_id=0, will never add eos_token Former-commit-id: 3399c0d6459e797946daf966e32134a12f8e48f3 --- src/llmtuner/data/template.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index 2d89c3fa..a3b23be9 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -117,9 +117,9 @@ class Template: elif isinstance(elem, dict): token_ids += [tokenizer.convert_tokens_to_ids(elem.get("token"))] elif isinstance(elem, set): - if "bos_token" in elem and tokenizer.bos_token_id: + if "bos_token" in elem and tokenizer.bos_token_id is not None: token_ids += [tokenizer.bos_token_id] - elif "eos_token" in elem and tokenizer.eos_token_id: + elif "eos_token" in elem and tokenizer.eos_token_id is not None: token_ids += [tokenizer.eos_token_id] else: raise ValueError("Input must be string, set[str] or dict[str, str], got {}".format(type(elem)))