From a52496cc093d9093d3dc78b5ee4e4c8c480f298b Mon Sep 17 00:00:00 2001 From: Xiaosu Zhu Date: Tue, 14 Jan 2025 17:35:54 +0800 Subject: [PATCH] Fix tokenizer max length (#6632) Former-commit-id: 58d029f3212dba1808e63cc8875022f6d741bd63 --- src/llamafactory/model/loader.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index 52815bb2..4d6b5a99 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -85,6 +85,9 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule": ) except Exception as e: raise OSError("Failed to load tokenizer.") from e + + if model_args.model_max_length is not None and tokenizer.model_max_length != model_args.model_max_length: + tokenizer.model_max_length = model_args.model_max_length if model_args.new_special_tokens is not None: num_added_tokens = tokenizer.add_special_tokens(