support cohere commandR #3184

This commit is contained in:
hiyouga
2024-04-15 23:26:42 +08:00
parent 7a8ae3f4ac
commit e0dbac2845
7 changed files with 34 additions and 32 deletions

View File

@@ -36,13 +36,23 @@ def load_tokenizer(model_args: "ModelArguments") -> "PreTrainedTokenizer":
Note: including inplace operation of model_args.
"""
init_kwargs = _get_init_kwargs(model_args)
tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path,
use_fast=model_args.use_fast_tokenizer,
split_special_tokens=model_args.split_special_tokens,
padding_side="right",
**init_kwargs,
)
try:
tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path,
use_fast=model_args.use_fast_tokenizer,
split_special_tokens=model_args.split_special_tokens,
padding_side="right",
**init_kwargs,
)
except ValueError: # try the fast one
tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path,
use_fast=True,
split_special_tokens=model_args.split_special_tokens,
padding_side="right",
**init_kwargs,
)
patch_tokenizer(tokenizer)
return tokenizer