Fix tokenizer max length (#6632)

Former-commit-id: 58d029f3212dba1808e63cc8875022f6d741bd63
This commit is contained in:
Xiaosu Zhu 2025-01-14 17:35:54 +08:00 committed by GitHub
parent ad119afc58
commit a52496cc09

View File

@ -86,6 +86,9 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
except Exception as e:
raise OSError("Failed to load tokenizer.") from e
if model_args.model_max_length is not None and tokenizer.model_max_length != model_args.model_max_length:
tokenizer.model_max_length = model_args.model_max_length
if model_args.new_special_tokens is not None:
num_added_tokens = tokenizer.add_special_tokens(
dict(additional_special_tokens=model_args.new_special_tokens),