Merge pull request #5907 from hiyouga/hiyouga/dev

[data] fix template replace behavior

Former-commit-id: 8408339d8388a610a4e5fe303086d7536d1e72d5
This commit is contained in:
hoshi-hiyouga 2024-11-02 13:42:53 +08:00 committed by GitHub
commit 33e8bfc3ae
2 changed files with 4 additions and 4 deletions

View File

@ -398,11 +398,11 @@ def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args:
if num_added_tokens > 0:
logger.warning("New tokens have been added, make sure `resize_vocab` is True.")
if template.replace_jinja_template:
if tokenizer.chat_template is None or template.replace_jinja_template:
try:
tokenizer.chat_template = _get_jinja_template(template, tokenizer)
except ValueError:
logger.info("Cannot add this chat template to tokenizer.")
except ValueError as e:
logger.info(f"Cannot add this chat template to tokenizer: {e}.")
return template

View File

@ -100,7 +100,7 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
patch_processor(processor, config, tokenizer, model_args)
except Exception as e:
logger.warning(f"Processor was not found: {e}.")
logger.debug(f"Processor was not found: {e}.")
processor = None
# Avoid load tokenizer, see: