fix chatglm2 tokenizer

Former-commit-id: d8d82ca281
This commit is contained in:
hiyouga
2023-09-09 13:50:29 +08:00
parent f803c7c309
commit c6265e6969
4 changed files with 17 additions and 16 deletions

View File

@@ -72,6 +72,10 @@ def load_model_and_tokenizer(
**config_kwargs
)
# Fix tokenizer (for ChatGLM2)
if "PreTrainedTokenizerBase" not in str(tokenizer._pad.__func__):
tokenizer._pad = MethodType(PreTrainedTokenizerBase._pad, tokenizer)
if finetuning_args.finetuning_type == "full" and model_args.checkpoint_dir is not None:
model_to_load = model_args.checkpoint_dir[0]
else: