Fix tokenizer max length (#6632)

Former-commit-id: 1807c7ba033985490aa7c8c39d880da6af983b92
This commit is contained in:
Xiaosu Zhu 2025-01-14 17:35:54 +08:00 committed by GitHub
parent f7857c83e1
commit 381f7120e6

View File

@ -85,6 +85,9 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
) )
except Exception as e: except Exception as e:
raise OSError("Failed to load tokenizer.") from e raise OSError("Failed to load tokenizer.") from e
if model_args.model_max_length is not None and tokenizer.model_max_length != model_args.model_max_length:
tokenizer.model_max_length = model_args.model_max_length
if model_args.new_special_tokens is not None: if model_args.new_special_tokens is not None:
num_added_tokens = tokenizer.add_special_tokens( num_added_tokens = tokenizer.add_special_tokens(