Fix tokenizer max length (#6632)

This commit is contained in:
Xiaosu Zhu
2025-01-14 17:35:54 +08:00
committed by GitHub
parent 158a127d34
commit 58d029f321

View File

@@ -85,6 +85,9 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
)
except Exception as e:
raise OSError("Failed to load tokenizer.") from e
if model_args.model_max_length is not None and tokenizer.model_max_length != model_args.model_max_length:
tokenizer.model_max_length = model_args.model_max_length
if model_args.new_special_tokens is not None:
num_added_tokens = tokenizer.add_special_tokens(