mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-23 14:22:51 +08:00
Merge pull request #5907 from hiyouga/hiyouga/dev
[data] fix template replace behavior Former-commit-id: 8408339d8388a610a4e5fe303086d7536d1e72d5
This commit is contained in:
commit
33e8bfc3ae
@ -398,11 +398,11 @@ def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args:
|
|||||||
if num_added_tokens > 0:
|
if num_added_tokens > 0:
|
||||||
logger.warning("New tokens have been added, make sure `resize_vocab` is True.")
|
logger.warning("New tokens have been added, make sure `resize_vocab` is True.")
|
||||||
|
|
||||||
if template.replace_jinja_template:
|
if tokenizer.chat_template is None or template.replace_jinja_template:
|
||||||
try:
|
try:
|
||||||
tokenizer.chat_template = _get_jinja_template(template, tokenizer)
|
tokenizer.chat_template = _get_jinja_template(template, tokenizer)
|
||||||
except ValueError:
|
except ValueError as e:
|
||||||
logger.info("Cannot add this chat template to tokenizer.")
|
logger.info(f"Cannot add this chat template to tokenizer: {e}.")
|
||||||
|
|
||||||
return template
|
return template
|
||||||
|
|
||||||
|
@ -100,7 +100,7 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
|
|||||||
processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
|
processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
|
||||||
patch_processor(processor, config, tokenizer, model_args)
|
patch_processor(processor, config, tokenizer, model_args)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Processor was not found: {e}.")
|
logger.debug(f"Processor was not found: {e}.")
|
||||||
processor = None
|
processor = None
|
||||||
|
|
||||||
# Avoid load tokenizer, see:
|
# Avoid load tokenizer, see:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user